summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-03-15 14:46:54 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-03-15 14:46:54 -0700
commitc8e769961668ef56acabc67f040c58ed769c57e4 (patch)
treeaf8da6c41ede9859f698bd9da542cb52f10cb4ec /drivers
parent4138f02288333cb596885e9af03dd3ea2de845cb (diff)
parent6a8dbd71a70620c42d4fa82509204ba18231f28d (diff)
Merge tag 'v6.9-p1' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto updates from Herbert Xu: "API: - Avoid unnecessary copying in scomp for trivial SG lists Algorithms: - Optimise NEON CCM implementation on ARM64 Drivers: - Add queue stop/query debugfs support in hisilicon/qm - Intel qat updates and cleanups" * tag 'v6.9-p1' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (79 commits) Revert "crypto: remove CONFIG_CRYPTO_STATS" crypto: scomp - remove memcpy if sg_nents is 1 and pages are lowmem crypto: tcrypt - add ffdhe2048(dh) test crypto: iaa - fix the missing CRYPTO_ALG_ASYNC in cra_flags crypto: hisilicon/zip - fix the missing CRYPTO_ALG_ASYNC in cra_flags hwrng: hisi - use dev_err_probe MAINTAINERS: Remove T Ambarus from few mchp entries crypto: iaa - Fix comp/decomp delay statistics crypto: iaa - Fix async_disable descriptor leak dt-bindings: rng: atmel,at91-trng: add sam9x7 TRNG dt-bindings: crypto: add sam9x7 in Atmel TDES dt-bindings: crypto: add sam9x7 in Atmel SHA dt-bindings: crypto: add sam9x7 in Atmel AES crypto: remove CONFIG_CRYPTO_STATS crypto: dh - Make public key test FIPS-only crypto: rockchip - fix to check return value crypto: jitter - fix CRYPTO_JITTERENTROPY help text crypto: qat - make ring to service map common for QAT GEN4 crypto: qat - fix ring to service map for dcc in 420xx crypto: qat - fix ring to service map for dcc in 4xxx ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/char/hw_random/hisi-rng.c6
-rw-r--r--drivers/crypto/Kconfig14
-rw-r--r--drivers/crypto/Makefile2
-rw-r--r--drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c2
-rw-r--r--drivers/crypto/ccp/platform-access.c11
-rw-r--r--drivers/crypto/ccp/psp-dev.c11
-rw-r--r--drivers/crypto/hisilicon/debugfs.c58
-rw-r--r--drivers/crypto/hisilicon/hpre/hpre_main.c2
-rw-r--r--drivers/crypto/hisilicon/qm.c184
-rw-r--r--drivers/crypto/hisilicon/sec2/sec_crypto.c33
-rw-r--r--drivers/crypto/hisilicon/sec2/sec_main.c7
-rw-r--r--drivers/crypto/hisilicon/zip/zip_crypto.c1
-rw-r--r--drivers/crypto/hisilicon/zip/zip_main.c2
-rw-r--r--drivers/crypto/intel/iaa/iaa_crypto.h25
-rw-r--r--drivers/crypto/intel/iaa/iaa_crypto_comp_fixed.c1
-rw-r--r--drivers/crypto/intel/iaa/iaa_crypto_main.c122
-rw-r--r--drivers/crypto/intel/iaa/iaa_crypto_stats.c30
-rw-r--r--drivers/crypto/intel/iaa/iaa_crypto_stats.h8
-rw-r--r--drivers/crypto/intel/qat/Kconfig14
-rw-r--r--drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c64
-rw-r--r--drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c64
-rw-r--r--drivers/crypto/intel/qat/qat_common/Makefile2
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_accel_devices.h3
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_aer.c138
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_cfg_strings.h1
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_clock.c3
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_cnv_dbgfs.c1
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_common_drv.h10
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_dev_mgr.c4
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c59
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h1
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen4_ras.c6
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_heartbeat.c20
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_heartbeat.h21
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_heartbeat_dbgfs.c53
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_heartbeat_inject.c76
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_hw_arbiter.c25
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_init.c12
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_isr.c11
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_pfvf_msg.h7
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.c64
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.h21
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c8
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_pfvf_vf_proto.c6
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_rl.c20
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_sriov.c38
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_sysfs.c37
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_vf_isr.c2
-rw-r--r--drivers/crypto/intel/qat/qat_common/qat_comp_algs.c9
-rw-r--r--drivers/crypto/intel/qat/qat_common/qat_crypto.c4
-rw-r--r--drivers/crypto/rockchip/rk3288_crypto.c5
-rw-r--r--drivers/crypto/virtio/virtio_crypto_akcipher_algs.c12
-rw-r--r--drivers/crypto/virtio/virtio_crypto_core.c2
-rw-r--r--drivers/crypto/vmx/.gitignore3
-rw-r--r--drivers/crypto/vmx/Kconfig14
-rw-r--r--drivers/crypto/vmx/Makefile23
-rw-r--r--drivers/crypto/vmx/aes.c134
-rw-r--r--drivers/crypto/vmx/aes_cbc.c133
-rw-r--r--drivers/crypto/vmx/aes_ctr.c149
-rw-r--r--drivers/crypto/vmx/aes_xts.c162
-rw-r--r--drivers/crypto/vmx/aesp8-ppc.h30
-rw-r--r--drivers/crypto/vmx/aesp8-ppc.pl3889
-rw-r--r--drivers/crypto/vmx/ghash.c185
-rw-r--r--drivers/crypto/vmx/ghashp8-ppc.pl243
-rw-r--r--drivers/crypto/vmx/ppc-xlate.pl231
-rw-r--r--drivers/crypto/vmx/vmx.c77
-rw-r--r--drivers/crypto/xilinx/zynqmp-aes-gcm.c3
67 files changed, 927 insertions, 5691 deletions
diff --git a/drivers/char/hw_random/hisi-rng.c b/drivers/char/hw_random/hisi-rng.c
index b6f27566e0ba..4e501d5c121f 100644
--- a/drivers/char/hw_random/hisi-rng.c
+++ b/drivers/char/hw_random/hisi-rng.c
@@ -89,10 +89,8 @@ static int hisi_rng_probe(struct platform_device *pdev)
rng->rng.read = hisi_rng_read;
ret = devm_hwrng_register(&pdev->dev, &rng->rng);
- if (ret) {
- dev_err(&pdev->dev, "failed to register hwrng\n");
- return ret;
- }
+ if (ret)
+ return dev_err_probe(&pdev->dev, ret, "failed to register hwrng\n");
return 0;
}
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 0991f026cb07..3d02702456a5 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -611,13 +611,13 @@ config CRYPTO_DEV_QCOM_RNG
To compile this driver as a module, choose M here. The
module will be called qcom-rng. If unsure, say N.
-config CRYPTO_DEV_VMX
- bool "Support for VMX cryptographic acceleration instructions"
- depends on PPC64 && VSX
- help
- Support for VMX cryptographic acceleration instructions.
-
-source "drivers/crypto/vmx/Kconfig"
+#config CRYPTO_DEV_VMX
+# bool "Support for VMX cryptographic acceleration instructions"
+# depends on PPC64 && VSX
+# help
+# Support for VMX cryptographic acceleration instructions.
+#
+#source "drivers/crypto/vmx/Kconfig"
config CRYPTO_DEV_IMGTEC_HASH
tristate "Imagination Technologies hardware hash accelerator"
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index d859d6a5f3a4..95331bc6456b 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -42,7 +42,7 @@ obj-$(CONFIG_CRYPTO_DEV_SL3516) += gemini/
obj-y += stm32/
obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio/
-obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
+#obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) += bcm/
obj-$(CONFIG_CRYPTO_DEV_SAFEXCEL) += inside-secure/
obj-$(CONFIG_CRYPTO_DEV_ARTPEC6) += axis/
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
index d358334e5981..ee2a28c906ed 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
@@ -362,7 +362,7 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq)
digestsize = SHA512_DIGEST_SIZE;
/* the padding could be up to two block. */
- buf = kzalloc(bs * 2, GFP_KERNEL | GFP_DMA);
+ buf = kcalloc(2, bs, GFP_KERNEL | GFP_DMA);
if (!buf) {
err = -ENOMEM;
goto theend;
diff --git a/drivers/crypto/ccp/platform-access.c b/drivers/crypto/ccp/platform-access.c
index 94367bc49e35..1b8ed3389733 100644
--- a/drivers/crypto/ccp/platform-access.c
+++ b/drivers/crypto/ccp/platform-access.c
@@ -118,9 +118,16 @@ int psp_send_platform_access_msg(enum psp_platform_access_msg msg,
goto unlock;
}
- /* Store the status in request header for caller to investigate */
+ /*
+ * Read status from PSP. If status is non-zero, it indicates an error
+ * occurred during "processing" of the command.
+ * If status is zero, it indicates the command was "processed"
+ * successfully, but the result of the command is in the payload.
+ * Return both cases to the caller as -EIO to investigate.
+ */
cmd_reg = ioread32(cmd);
- req->header.status = FIELD_GET(PSP_CMDRESP_STS, cmd_reg);
+ if (FIELD_GET(PSP_CMDRESP_STS, cmd_reg))
+ req->header.status = FIELD_GET(PSP_CMDRESP_STS, cmd_reg);
if (req->header.status) {
ret = -EIO;
goto unlock;
diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c
index 124a2e0c8999..56bf832c2947 100644
--- a/drivers/crypto/ccp/psp-dev.c
+++ b/drivers/crypto/ccp/psp-dev.c
@@ -156,11 +156,14 @@ static unsigned int psp_get_capability(struct psp_device *psp)
}
psp->capability = val;
- /* Detect if TSME and SME are both enabled */
+ /* Detect TSME and/or SME status */
if (PSP_CAPABILITY(psp, PSP_SECURITY_REPORTING) &&
- psp->capability & (PSP_SECURITY_TSME_STATUS << PSP_CAPABILITY_PSP_SECURITY_OFFSET) &&
- cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
- dev_notice(psp->dev, "psp: Both TSME and SME are active, SME is unnecessary when TSME is active.\n");
+ psp->capability & (PSP_SECURITY_TSME_STATUS << PSP_CAPABILITY_PSP_SECURITY_OFFSET)) {
+ if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
+ dev_notice(psp->dev, "psp: Both TSME and SME are active, SME is unnecessary when TSME is active.\n");
+ else
+ dev_notice(psp->dev, "psp: TSME enabled\n");
+ }
return 0;
}
diff --git a/drivers/crypto/hisilicon/debugfs.c b/drivers/crypto/hisilicon/debugfs.c
index 80ed4b2d209c..cd67fa348ca7 100644
--- a/drivers/crypto/hisilicon/debugfs.c
+++ b/drivers/crypto/hisilicon/debugfs.c
@@ -24,6 +24,8 @@
#define QM_DFX_QN_SHIFT 16
#define QM_DFX_CNT_CLR_CE 0x100118
#define QM_DBG_WRITE_LEN 1024
+#define QM_IN_IDLE_ST_REG 0x1040e4
+#define QM_IN_IDLE_STATE 0x1
static const char * const qm_debug_file_name[] = {
[CURRENT_QM] = "current_qm",
@@ -81,6 +83,30 @@ static const struct debugfs_reg32 qm_dfx_regs[] = {
{"QM_DFX_FF_ST5 ", 0x1040dc},
{"QM_DFX_FF_ST6 ", 0x1040e0},
{"QM_IN_IDLE_ST ", 0x1040e4},
+ {"QM_CACHE_CTL ", 0x100050},
+ {"QM_TIMEOUT_CFG ", 0x100070},
+ {"QM_DB_TIMEOUT_CFG ", 0x100074},
+ {"QM_FLR_PENDING_TIME_CFG ", 0x100078},
+ {"QM_ARUSR_MCFG1 ", 0x100088},
+ {"QM_AWUSR_MCFG1 ", 0x100098},
+ {"QM_AXI_M_CFG_ENABLE ", 0x1000B0},
+ {"QM_RAS_CE_THRESHOLD ", 0x1000F8},
+ {"QM_AXI_TIMEOUT_CTRL ", 0x100120},
+ {"QM_AXI_TIMEOUT_STATUS ", 0x100124},
+ {"QM_CQE_AGGR_TIMEOUT_CTRL ", 0x100144},
+ {"ACC_RAS_MSI_INT_SEL ", 0x1040fc},
+ {"QM_CQE_OUT ", 0x104100},
+ {"QM_EQE_OUT ", 0x104104},
+ {"QM_AEQE_OUT ", 0x104108},
+ {"QM_DB_INFO0 ", 0x104180},
+ {"QM_DB_INFO1 ", 0x104184},
+ {"QM_AM_CTRL_GLOBAL ", 0x300000},
+ {"QM_AM_CURR_PORT_STS ", 0x300100},
+ {"QM_AM_CURR_TRANS_RETURN ", 0x300150},
+ {"QM_AM_CURR_RD_MAX_TXID ", 0x300154},
+ {"QM_AM_CURR_WR_MAX_TXID ", 0x300158},
+ {"QM_AM_ALARM_RRESP ", 0x300180},
+ {"QM_AM_ALARM_BRESP ", 0x300184},
};
static const struct debugfs_reg32 qm_vf_dfx_regs[] = {
@@ -1001,6 +1027,30 @@ static int qm_diff_regs_show(struct seq_file *s, void *unused)
}
DEFINE_SHOW_ATTRIBUTE(qm_diff_regs);
+static int qm_state_show(struct seq_file *s, void *unused)
+{
+ struct hisi_qm *qm = s->private;
+ u32 val;
+ int ret;
+
+ /* If device is in suspended, directly return the idle state. */
+ ret = hisi_qm_get_dfx_access(qm);
+ if (!ret) {
+ val = readl(qm->io_base + QM_IN_IDLE_ST_REG);
+ hisi_qm_put_dfx_access(qm);
+ } else if (ret == -EAGAIN) {
+ val = QM_IN_IDLE_STATE;
+ } else {
+ return ret;
+ }
+
+ seq_printf(s, "%u\n", val);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(qm_state);
+
static ssize_t qm_status_read(struct file *filp, char __user *buffer,
size_t count, loff_t *pos)
{
@@ -1062,6 +1112,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(qm_atomic64_ops, qm_debugfs_atomic64_get,
void hisi_qm_debug_init(struct hisi_qm *qm)
{
struct dfx_diff_registers *qm_regs = qm->debug.qm_diff_regs;
+ struct qm_dev_dfx *dev_dfx = &qm->debug.dev_dfx;
struct qm_dfx *dfx = &qm->debug.dfx;
struct dentry *qm_d;
void *data;
@@ -1072,6 +1123,9 @@ void hisi_qm_debug_init(struct hisi_qm *qm)
/* only show this in PF */
if (qm->fun_type == QM_HW_PF) {
+ debugfs_create_file("qm_state", 0444, qm->debug.qm_d,
+ qm, &qm_state_fops);
+
qm_create_debugfs_file(qm, qm->debug.debug_root, CURRENT_QM);
for (i = CURRENT_Q; i < DEBUG_FILE_NUM; i++)
qm_create_debugfs_file(qm, qm->debug.qm_d, i);
@@ -1087,6 +1141,10 @@ void hisi_qm_debug_init(struct hisi_qm *qm)
debugfs_create_file("status", 0444, qm->debug.qm_d, qm,
&qm_status_fops);
+
+ debugfs_create_u32("dev_state", 0444, qm->debug.qm_d, &dev_dfx->dev_state);
+ debugfs_create_u32("dev_timeout", 0644, qm->debug.qm_d, &dev_dfx->dev_timeout);
+
for (i = 0; i < ARRAY_SIZE(qm_dfx_files); i++) {
data = (atomic64_t *)((uintptr_t)dfx + qm_dfx_files[i].offset);
debugfs_create_file(qm_dfx_files[i].name,
diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c
index 3255b2a070c7..d93aa6630a57 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_main.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_main.c
@@ -440,7 +440,7 @@ MODULE_PARM_DESC(vfs_num, "Number of VFs to enable(1-63), 0(default)");
struct hisi_qp *hpre_create_qp(u8 type)
{
- int node = cpu_to_node(smp_processor_id());
+ int node = cpu_to_node(raw_smp_processor_id());
struct hisi_qp *qp = NULL;
int ret;
diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 4b20b94e6371..92f0a1d9b4a6 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -236,6 +236,12 @@
#define QM_DEV_ALG_MAX_LEN 256
+ /* abnormal status value for stopping queue */
+#define QM_STOP_QUEUE_FAIL 1
+#define QM_DUMP_SQC_FAIL 3
+#define QM_DUMP_CQC_FAIL 4
+#define QM_FINISH_WAIT 5
+
#define QM_MK_CQC_DW3_V1(hop_num, pg_sz, buf_sz, cqe_sz) \
(((hop_num) << QM_CQ_HOP_NUM_SHIFT) | \
((pg_sz) << QM_CQ_PAGE_SIZE_SHIFT) | \
@@ -312,6 +318,7 @@ static const struct hisi_qm_cap_info qm_cap_info_comm[] = {
{QM_SUPPORT_DB_ISOLATION, 0x30, 0, BIT(0), 0x0, 0x0, 0x0},
{QM_SUPPORT_FUNC_QOS, 0x3100, 0, BIT(8), 0x0, 0x0, 0x1},
{QM_SUPPORT_STOP_QP, 0x3100, 0, BIT(9), 0x0, 0x0, 0x1},
+ {QM_SUPPORT_STOP_FUNC, 0x3100, 0, BIT(10), 0x0, 0x0, 0x1},
{QM_SUPPORT_MB_COMMAND, 0x3100, 0, BIT(11), 0x0, 0x0, 0x1},
{QM_SUPPORT_SVA_PREFETCH, 0x3100, 0, BIT(14), 0x0, 0x0, 0x1},
};
@@ -1674,6 +1681,11 @@ unlock:
return ret;
}
+static int qm_drain_qm(struct hisi_qm *qm)
+{
+ return hisi_qm_mb(qm, QM_MB_CMD_FLUSH_QM, 0, 0, 0);
+}
+
static int qm_stop_qp(struct hisi_qp *qp)
{
return hisi_qm_mb(qp->qm, QM_MB_CMD_STOP_QP, 0, qp->qp_id, 0);
@@ -2031,43 +2043,25 @@ static void qp_stop_fail_cb(struct hisi_qp *qp)
}
}
-/**
- * qm_drain_qp() - Drain a qp.
- * @qp: The qp we want to drain.
- *
- * Determine whether the queue is cleared by judging the tail pointers of
- * sq and cq.
- */
-static int qm_drain_qp(struct hisi_qp *qp)
+static int qm_wait_qp_empty(struct hisi_qm *qm, u32 *state, u32 qp_id)
{
- struct hisi_qm *qm = qp->qm;
struct device *dev = &qm->pdev->dev;
struct qm_sqc sqc;
struct qm_cqc cqc;
int ret, i = 0;
- /* No need to judge if master OOO is blocked. */
- if (qm_check_dev_error(qm))
- return 0;
-
- /* Kunpeng930 supports drain qp by device */
- if (test_bit(QM_SUPPORT_STOP_QP, &qm->caps)) {
- ret = qm_stop_qp(qp);
- if (ret)
- dev_err(dev, "Failed to stop qp(%u)!\n", qp->qp_id);
- return ret;
- }
-
while (++i) {
- ret = qm_set_and_get_xqc(qm, QM_MB_CMD_SQC, &sqc, qp->qp_id, 1);
+ ret = qm_set_and_get_xqc(qm, QM_MB_CMD_SQC, &sqc, qp_id, 1);
if (ret) {
dev_err_ratelimited(dev, "Failed to dump sqc!\n");
+ *state = QM_DUMP_SQC_FAIL;
return ret;
}
- ret = qm_set_and_get_xqc(qm, QM_MB_CMD_CQC, &cqc, qp->qp_id, 1);
+ ret = qm_set_and_get_xqc(qm, QM_MB_CMD_CQC, &cqc, qp_id, 1);
if (ret) {
dev_err_ratelimited(dev, "Failed to dump cqc!\n");
+ *state = QM_DUMP_CQC_FAIL;
return ret;
}
@@ -2076,8 +2070,9 @@ static int qm_drain_qp(struct hisi_qp *qp)
break;
if (i == MAX_WAIT_COUNTS) {
- dev_err(dev, "Fail to empty queue %u!\n", qp->qp_id);
- return -EBUSY;
+ dev_err(dev, "Fail to empty queue %u!\n", qp_id);
+ *state = QM_STOP_QUEUE_FAIL;
+ return -ETIMEDOUT;
}
usleep_range(WAIT_PERIOD_US_MIN, WAIT_PERIOD_US_MAX);
@@ -2086,9 +2081,53 @@ static int qm_drain_qp(struct hisi_qp *qp)
return 0;
}
-static int qm_stop_qp_nolock(struct hisi_qp *qp)
+/**
+ * qm_drain_qp() - Drain a qp.
+ * @qp: The qp we want to drain.
+ *
+ * If the device does not support stopping queue by sending mailbox,
+ * determine whether the queue is cleared by judging the tail pointers of
+ * sq and cq.
+ */
+static int qm_drain_qp(struct hisi_qp *qp)
+{
+ struct hisi_qm *qm = qp->qm;
+ struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(qm->pdev));
+ u32 state = 0;
+ int ret;
+
+ /* No need to judge if master OOO is blocked. */
+ if (qm_check_dev_error(pf_qm))
+ return 0;
+
+ /* HW V3 supports drain qp by device */
+ if (test_bit(QM_SUPPORT_STOP_QP, &qm->caps)) {
+ ret = qm_stop_qp(qp);
+ if (ret) {
+ dev_err(&qm->pdev->dev, "Failed to stop qp!\n");
+ state = QM_STOP_QUEUE_FAIL;
+ goto set_dev_state;
+ }
+ return ret;
+ }
+
+ ret = qm_wait_qp_empty(qm, &state, qp->qp_id);
+ if (ret)
+ goto set_dev_state;
+
+ return 0;
+
+set_dev_state:
+ if (qm->debug.dev_dfx.dev_timeout)
+ qm->debug.dev_dfx.dev_state = state;
+
+ return ret;
+}
+
+static void qm_stop_qp_nolock(struct hisi_qp *qp)
{
- struct device *dev = &qp->qm->pdev->dev;
+ struct hisi_qm *qm = qp->qm;
+ struct device *dev = &qm->pdev->dev;
int ret;
/*
@@ -2099,39 +2138,36 @@ static int qm_stop_qp_nolock(struct hisi_qp *qp)
*/
if (atomic_read(&qp->qp_status.flags) != QP_START) {
qp->is_resetting = false;
- return 0;
+ return;
}
atomic_set(&qp->qp_status.flags, QP_STOP);
- ret = qm_drain_qp(qp);
- if (ret)
- dev_err(dev, "Failed to drain out data for stopping!\n");
+ /* V3 supports direct stop function when FLR prepare */
+ if (qm->ver < QM_HW_V3 || qm->status.stop_reason == QM_NORMAL) {
+ ret = qm_drain_qp(qp);
+ if (ret)
+ dev_err(dev, "Failed to drain out data for stopping qp(%u)!\n", qp->qp_id);
+ }
- flush_workqueue(qp->qm->wq);
+ flush_workqueue(qm->wq);
if (unlikely(qp->is_resetting && atomic_read(&qp->qp_status.used)))
qp_stop_fail_cb(qp);
dev_dbg(dev, "stop queue %u!", qp->qp_id);
-
- return 0;
}
/**
* hisi_qm_stop_qp() - Stop a qp in qm.
* @qp: The qp we want to stop.
*
- * This function is reverse of hisi_qm_start_qp. Return 0 if successful.
+ * This function is reverse of hisi_qm_start_qp.
*/
-int hisi_qm_stop_qp(struct hisi_qp *qp)
+void hisi_qm_stop_qp(struct hisi_qp *qp)
{
- int ret;
-
down_write(&qp->qm->qps_lock);
- ret = qm_stop_qp_nolock(qp);
+ qm_stop_qp_nolock(qp);
up_write(&qp->qm->qps_lock);
-
- return ret;
}
EXPORT_SYMBOL_GPL(hisi_qm_stop_qp);
@@ -2309,7 +2345,31 @@ static int hisi_qm_uacce_start_queue(struct uacce_queue *q)
static void hisi_qm_uacce_stop_queue(struct uacce_queue *q)
{
- hisi_qm_stop_qp(q->priv);
+ struct hisi_qp *qp = q->priv;
+ struct hisi_qm *qm = qp->qm;
+ struct qm_dev_dfx *dev_dfx = &qm->debug.dev_dfx;
+ u32 i = 0;
+
+ hisi_qm_stop_qp(qp);
+
+ if (!dev_dfx->dev_timeout || !dev_dfx->dev_state)
+ return;
+
+ /*
+ * After the queue fails to be stopped,
+ * wait for a period of time before releasing the queue.
+ */
+ while (++i) {
+ msleep(WAIT_PERIOD);
+
+ /* Since dev_timeout maybe modified, check i >= dev_timeout */
+ if (i >= dev_dfx->dev_timeout) {
+ dev_err(&qm->pdev->dev, "Stop q %u timeout, state %u\n",
+ qp->qp_id, dev_dfx->dev_state);
+ dev_dfx->dev_state = QM_FINISH_WAIT;
+ break;
+ }
+ }
}
static int hisi_qm_is_q_updated(struct uacce_queue *q)
@@ -3054,25 +3114,18 @@ static int qm_restart(struct hisi_qm *qm)
}
/* Stop started qps in reset flow */
-static int qm_stop_started_qp(struct hisi_qm *qm)
+static void qm_stop_started_qp(struct hisi_qm *qm)
{
- struct device *dev = &qm->pdev->dev;
struct hisi_qp *qp;
- int i, ret;
+ int i;
for (i = 0; i < qm->qp_num; i++) {
qp = &qm->qp_array[i];
- if (qp && atomic_read(&qp->qp_status.flags) == QP_START) {
+ if (atomic_read(&qp->qp_status.flags) == QP_START) {
qp->is_resetting = true;
- ret = qm_stop_qp_nolock(qp);
- if (ret < 0) {
- dev_err(dev, "Failed to stop qp%d!\n", i);
- return ret;
- }
+ qm_stop_qp_nolock(qp);
}
}
-
- return 0;
}
/**
@@ -3112,21 +3165,31 @@ int hisi_qm_stop(struct hisi_qm *qm, enum qm_stop_reason r)
down_write(&qm->qps_lock);
- qm->status.stop_reason = r;
if (atomic_read(&qm->status.flags) == QM_STOP)
goto err_unlock;
/* Stop all the request sending at first. */
atomic_set(&qm->status.flags, QM_STOP);
+ qm->status.stop_reason = r;
- if (qm->status.stop_reason == QM_SOFT_RESET ||
- qm->status.stop_reason == QM_DOWN) {
+ if (qm->status.stop_reason != QM_NORMAL) {
hisi_qm_set_hw_reset(qm, QM_RESET_STOP_TX_OFFSET);
- ret = qm_stop_started_qp(qm);
- if (ret < 0) {
- dev_err(dev, "Failed to stop started qp!\n");
- goto err_unlock;
+ /*
+ * When performing soft reset, the hardware will no longer
+ * do tasks, and the tasks in the device will be flushed
+ * out directly since the master ooo is closed.
+ */
+ if (test_bit(QM_SUPPORT_STOP_FUNC, &qm->caps) &&
+ r != QM_SOFT_RESET) {
+ ret = qm_drain_qm(qm);
+ if (ret) {
+ dev_err(dev, "failed to drain qm!\n");
+ goto err_unlock;
+ }
}
+
+ qm_stop_started_qp(qm);
+
hisi_qm_set_hw_reset(qm, QM_RESET_STOP_RX_OFFSET);
}
@@ -3141,6 +3204,7 @@ int hisi_qm_stop(struct hisi_qm *qm, enum qm_stop_reason r)
}
qm_clear_queues(qm);
+ qm->status.stop_reason = QM_NORMAL;
err_unlock:
up_write(&qm->qps_lock);
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index f028dcfd0ead..93a972fcbf63 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -118,7 +118,7 @@ struct sec_aead {
};
/* Get an en/de-cipher queue cyclically to balance load over queues of TFM */
-static inline int sec_alloc_queue_id(struct sec_ctx *ctx, struct sec_req *req)
+static inline u32 sec_alloc_queue_id(struct sec_ctx *ctx, struct sec_req *req)
{
if (req->c_req.encrypt)
return (u32)atomic_inc_return(&ctx->enc_qcyclic) %
@@ -485,8 +485,7 @@ static void sec_alg_resource_free(struct sec_ctx *ctx,
sec_free_mac_resource(dev, qp_ctx->res);
}
-static int sec_alloc_qp_ctx_resource(struct hisi_qm *qm, struct sec_ctx *ctx,
- struct sec_qp_ctx *qp_ctx)
+static int sec_alloc_qp_ctx_resource(struct sec_ctx *ctx, struct sec_qp_ctx *qp_ctx)
{
u16 q_depth = qp_ctx->qp->sq_depth;
struct device *dev = ctx->dev;
@@ -541,8 +540,7 @@ static void sec_free_qp_ctx_resource(struct sec_ctx *ctx, struct sec_qp_ctx *qp_
kfree(qp_ctx->req_list);
}
-static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx,
- int qp_ctx_id, int alg_type)
+static int sec_create_qp_ctx(struct sec_ctx *ctx, int qp_ctx_id)
{
struct sec_qp_ctx *qp_ctx;
struct hisi_qp *qp;
@@ -561,7 +559,7 @@ static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx,
idr_init(&qp_ctx->req_idr);
INIT_LIST_HEAD(&qp_ctx->backlog);
- ret = sec_alloc_qp_ctx_resource(qm, ctx, qp_ctx);
+ ret = sec_alloc_qp_ctx_resource(ctx, qp_ctx);
if (ret)
goto err_destroy_idr;
@@ -614,7 +612,7 @@ static int sec_ctx_base_init(struct sec_ctx *ctx)
}
for (i = 0; i < sec->ctx_q_num; i++) {
- ret = sec_create_qp_ctx(&sec->qm, ctx, i, 0);
+ ret = sec_create_qp_ctx(ctx, i);
if (ret)
goto err_sec_release_qp_ctx;
}
@@ -750,9 +748,7 @@ static void sec_skcipher_uninit(struct crypto_skcipher *tfm)
sec_ctx_base_uninit(ctx);
}
-static int sec_skcipher_3des_setkey(struct crypto_skcipher *tfm, const u8 *key,
- const u32 keylen,
- const enum sec_cmode c_mode)
+static int sec_skcipher_3des_setkey(struct crypto_skcipher *tfm, const u8 *key, const u32 keylen)
{
struct sec_ctx *ctx = crypto_skcipher_ctx(tfm);
struct sec_cipher_ctx *c_ctx = &ctx->c_ctx;
@@ -843,7 +839,7 @@ static int sec_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
switch (c_alg) {
case SEC_CALG_3DES:
- ret = sec_skcipher_3des_setkey(tfm, key, keylen, c_mode);
+ ret = sec_skcipher_3des_setkey(tfm, key, keylen);
break;
case SEC_CALG_AES:
case SEC_CALG_SM4:
@@ -1371,7 +1367,7 @@ static int sec_skcipher_bd_fill_v3(struct sec_ctx *ctx, struct sec_req *req)
sec_sqe3->bd_param = cpu_to_le32(bd_param);
sec_sqe3->c_len_ivin |= cpu_to_le32(c_req->c_len);
- sec_sqe3->tag = cpu_to_le64(req);
+ sec_sqe3->tag = cpu_to_le64((unsigned long)req);
return 0;
}
@@ -2145,8 +2141,8 @@ static int sec_skcipher_decrypt(struct skcipher_request *sk_req)
return sec_skcipher_crypto(sk_req, false);
}
-#define SEC_SKCIPHER_GEN_ALG(sec_cra_name, sec_set_key, sec_min_key_size, \
- sec_max_key_size, ctx_init, ctx_exit, blk_size, iv_size)\
+#define SEC_SKCIPHER_ALG(sec_cra_name, sec_set_key, \
+ sec_min_key_size, sec_max_key_size, blk_size, iv_size)\
{\
.base = {\
.cra_name = sec_cra_name,\
@@ -2158,8 +2154,8 @@ static int sec_skcipher_decrypt(struct skcipher_request *sk_req)
.cra_ctxsize = sizeof(struct sec_ctx),\
.cra_module = THIS_MODULE,\
},\
- .init = ctx_init,\
- .exit = ctx_exit,\
+ .init = sec_skcipher_ctx_init,\
+ .exit = sec_skcipher_ctx_exit,\
.setkey = sec_set_key,\
.decrypt = sec_skcipher_decrypt,\
.encrypt = sec_skcipher_encrypt,\
@@ -2168,11 +2164,6 @@ static int sec_skcipher_decrypt(struct skcipher_request *sk_req)
.ivsize = iv_size,\
}
-#define SEC_SKCIPHER_ALG(name, key_func, min_key_size, \
- max_key_size, blk_size, iv_size) \
- SEC_SKCIPHER_GEN_ALG(name, key_func, min_key_size, max_key_size, \
- sec_skcipher_ctx_init, sec_skcipher_ctx_exit, blk_size, iv_size)
-
static struct sec_skcipher sec_skciphers[] = {
{
.alg_msk = BIT(0),
diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
index 7bb99381bbdf..c290d8937b19 100644
--- a/drivers/crypto/hisilicon/sec2/sec_main.c
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -282,6 +282,11 @@ static const struct debugfs_reg32 sec_dfx_regs[] = {
{"SEC_BD_SAA6 ", 0x301C38},
{"SEC_BD_SAA7 ", 0x301C3C},
{"SEC_BD_SAA8 ", 0x301C40},
+ {"SEC_RAS_CE_ENABLE ", 0x301050},
+ {"SEC_RAS_FE_ENABLE ", 0x301054},
+ {"SEC_RAS_NFE_ENABLE ", 0x301058},
+ {"SEC_REQ_TRNG_TIME_TH ", 0x30112C},
+ {"SEC_CHANNEL_RNG_REQ_THLD ", 0x302110},
};
/* define the SEC's dfx regs region and region length */
@@ -374,7 +379,7 @@ void sec_destroy_qps(struct hisi_qp **qps, int qp_num)
struct hisi_qp **sec_create_qps(void)
{
- int node = cpu_to_node(smp_processor_id());
+ int node = cpu_to_node(raw_smp_processor_id());
u32 ctx_num = ctx_q_num;
struct hisi_qp **qps;
int ret;
diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c b/drivers/crypto/hisilicon/zip/zip_crypto.c
index c650c741a18d..94e2d66b04b6 100644
--- a/drivers/crypto/hisilicon/zip/zip_crypto.c
+++ b/drivers/crypto/hisilicon/zip/zip_crypto.c
@@ -591,6 +591,7 @@ static struct acomp_alg hisi_zip_acomp_deflate = {
.base = {
.cra_name = "deflate",
.cra_driver_name = "hisi-deflate-acomp",
+ .cra_flags = CRYPTO_ALG_ASYNC,
.cra_module = THIS_MODULE,
.cra_priority = HZIP_ALG_PRIORITY,
.cra_ctxsize = sizeof(struct hisi_zip_ctx),
diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c
index 479ba8a1d6b5..c065fd867161 100644
--- a/drivers/crypto/hisilicon/zip/zip_main.c
+++ b/drivers/crypto/hisilicon/zip/zip_main.c
@@ -454,7 +454,7 @@ MODULE_DEVICE_TABLE(pci, hisi_zip_dev_ids);
int zip_create_qps(struct hisi_qp **qps, int qp_num, int node)
{
if (node == NUMA_NO_NODE)
- node = cpu_to_node(smp_processor_id());
+ node = cpu_to_node(raw_smp_processor_id());
return hisi_qm_alloc_qps_node(&zip_devices, qp_num, 0, node, qps);
}
diff --git a/drivers/crypto/intel/iaa/iaa_crypto.h b/drivers/crypto/intel/iaa/iaa_crypto.h
index 014420f7beb0..2524091a5f70 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto.h
+++ b/drivers/crypto/intel/iaa/iaa_crypto.h
@@ -59,10 +59,8 @@ struct iaa_device_compression_mode {
const char *name;
struct aecs_comp_table_record *aecs_comp_table;
- struct aecs_decomp_table_record *aecs_decomp_table;
dma_addr_t aecs_comp_table_dma_addr;
- dma_addr_t aecs_decomp_table_dma_addr;
};
/* Representation of IAA device with wqs, populated by probe */
@@ -107,23 +105,6 @@ struct aecs_comp_table_record {
u32 reserved_padding[2];
} __packed;
-/* AECS for decompress */
-struct aecs_decomp_table_record {
- u32 crc;
- u32 xor_checksum;
- u32 low_filter_param;
- u32 high_filter_param;
- u32 output_mod_idx;
- u32 drop_init_decomp_out_bytes;
- u32 reserved[36];
- u32 output_accum_data[2];
- u32 out_bits_valid;
- u32 bit_off_indexing;
- u32 input_accum_data[64];
- u8 size_qw[32];
- u32 decomp_state[1220];
-} __packed;
-
int iaa_aecs_init_fixed(void);
void iaa_aecs_cleanup_fixed(void);
@@ -136,9 +117,6 @@ struct iaa_compression_mode {
int ll_table_size;
u32 *d_table;
int d_table_size;
- u32 *header_table;
- int header_table_size;
- u16 gen_decomp_table_flags;
iaa_dev_comp_init_fn_t init;
iaa_dev_comp_free_fn_t free;
};
@@ -148,9 +126,6 @@ int add_iaa_compression_mode(const char *name,
int ll_table_size,
const u32 *d_table,
int d_table_size,
- const u8 *header_table,
- int header_table_size,
- u16 gen_decomp_table_flags,
iaa_dev_comp_init_fn_t init,
iaa_dev_comp_free_fn_t free);
diff --git a/drivers/crypto/intel/iaa/iaa_crypto_comp_fixed.c b/drivers/crypto/intel/iaa/iaa_crypto_comp_fixed.c
index 45cf5d74f0fb..19d9a333ac49 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto_comp_fixed.c
+++ b/drivers/crypto/intel/iaa/iaa_crypto_comp_fixed.c
@@ -78,7 +78,6 @@ int iaa_aecs_init_fixed(void)
sizeof(fixed_ll_sym),
fixed_d_sym,
sizeof(fixed_d_sym),
- NULL, 0, 0,
init_fixed_mode, NULL);
if (!ret)
pr_debug("IAA fixed compression mode initialized\n");
diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c
index dfd3baf0a8d8..1cd304de5388 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto_main.c
+++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c
@@ -258,16 +258,14 @@ static void free_iaa_compression_mode(struct iaa_compression_mode *mode)
kfree(mode->name);
kfree(mode->ll_table);
kfree(mode->d_table);
- kfree(mode->header_table);
kfree(mode);
}
/*
- * IAA Compression modes are defined by an ll_table, a d_table, and an
- * optional header_table. These tables are typically generated and
- * captured using statistics collected from running actual
- * compress/decompress workloads.
+ * IAA Compression modes are defined by an ll_table and a d_table.
+ * These tables are typically generated and captured using statistics
+ * collected from running actual compress/decompress workloads.
*
* A module or other kernel code can add and remove compression modes
* with a given name using the exported @add_iaa_compression_mode()
@@ -315,9 +313,6 @@ EXPORT_SYMBOL_GPL(remove_iaa_compression_mode);
* @ll_table_size: The ll table size in bytes
* @d_table: The d table
* @d_table_size: The d table size in bytes
- * @header_table: Optional header table
- * @header_table_size: Optional header table size in bytes
- * @gen_decomp_table_flags: Otional flags used to generate the decomp table
* @init: Optional callback function to init the compression mode data
* @free: Optional callback function to free the compression mode data
*
@@ -330,9 +325,6 @@ int add_iaa_compression_mode(const char *name,
int ll_table_size,
const u32 *d_table,
int d_table_size,
- const u8 *header_table,
- int header_table_size,
- u16 gen_decomp_table_flags,
iaa_dev_comp_init_fn_t init,
iaa_dev_comp_free_fn_t free)
{
@@ -370,16 +362,6 @@ int add_iaa_compression_mode(const char *name,
mode->d_table_size = d_table_size;
}
- if (header_table) {
- mode->header_table = kzalloc(header_table_size, GFP_KERNEL);
- if (!mode->header_table)
- goto free;
- memcpy(mode->header_table, header_table, header_table_size);
- mode->header_table_size = header_table_size;
- }
-
- mode->gen_decomp_table_flags = gen_decomp_table_flags;
-
mode->init = init;
mode->free = free;
@@ -420,10 +402,6 @@ static void free_device_compression_mode(struct iaa_device *iaa_device,
if (device_mode->aecs_comp_table)
dma_free_coherent(dev, size, device_mode->aecs_comp_table,
device_mode->aecs_comp_table_dma_addr);
- if (device_mode->aecs_decomp_table)
- dma_free_coherent(dev, size, device_mode->aecs_decomp_table,
- device_mode->aecs_decomp_table_dma_addr);
-
kfree(device_mode);
}
@@ -440,73 +418,6 @@ static int check_completion(struct device *dev,
bool compress,
bool only_once);
-static int decompress_header(struct iaa_device_compression_mode *device_mode,
- struct iaa_compression_mode *mode,
- struct idxd_wq *wq)
-{
- dma_addr_t src_addr, src2_addr;
- struct idxd_desc *idxd_desc;
- struct iax_hw_desc *desc;
- struct device *dev;
- int ret = 0;
-
- idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
- if (IS_ERR(idxd_desc))
- return PTR_ERR(idxd_desc);
-
- desc = idxd_desc->iax_hw;
-
- dev = &wq->idxd->pdev->dev;
-
- src_addr = dma_map_single(dev, (void *)mode->header_table,
- mode->header_table_size, DMA_TO_DEVICE);
- dev_dbg(dev, "%s: mode->name %s, src_addr %llx, dev %p, src %p, slen %d\n",
- __func__, mode->name, src_addr, dev,
- mode->header_table, mode->header_table_size);
- if (unlikely(dma_mapping_error(dev, src_addr))) {
- dev_dbg(dev, "dma_map_single err, exiting\n");
- ret = -ENOMEM;
- return ret;
- }
-
- desc->flags = IAX_AECS_GEN_FLAG;
- desc->opcode = IAX_OPCODE_DECOMPRESS;
-
- desc->src1_addr = (u64)src_addr;
- desc->src1_size = mode->header_table_size;
-
- src2_addr = device_mode->aecs_decomp_table_dma_addr;
- desc->src2_addr = (u64)src2_addr;
- desc->src2_size = 1088;
- dev_dbg(dev, "%s: mode->name %s, src2_addr %llx, dev %p, src2_size %d\n",
- __func__, mode->name, desc->src2_addr, dev, desc->src2_size);
- desc->max_dst_size = 0; // suppressed output
-
- desc->decompr_flags = mode->gen_decomp_table_flags;
-
- desc->priv = 0;
-
- desc->completion_addr = idxd_desc->compl_dma;
-
- ret = idxd_submit_desc(wq, idxd_desc);
- if (ret) {
- pr_err("%s: submit_desc failed ret=0x%x\n", __func__, ret);
- goto out;
- }
-
- ret = check_completion(dev, idxd_desc->iax_completion, false, false);
- if (ret)
- dev_dbg(dev, "%s: mode->name %s check_completion failed ret=%d\n",
- __func__, mode->name, ret);
- else
- dev_dbg(dev, "%s: mode->name %s succeeded\n", __func__,
- mode->name);
-out:
- dma_unmap_single(dev, src_addr, 1088, DMA_TO_DEVICE);
-
- return ret;
-}
-
static int init_device_compression_mode(struct iaa_device *iaa_device,
struct iaa_compression_mode *mode,
int idx, struct idxd_wq *wq)
@@ -529,24 +440,11 @@ static int init_device_compression_mode(struct iaa_device *iaa_device,
if (!device_mode->aecs_comp_table)
goto free;
- device_mode->aecs_decomp_table = dma_alloc_coherent(dev, size,
- &device_mode->aecs_decomp_table_dma_addr, GFP_KERNEL);
- if (!device_mode->aecs_decomp_table)
- goto free;
-
/* Add Huffman table to aecs */
memset(device_mode->aecs_comp_table, 0, sizeof(*device_mode->aecs_comp_table));
memcpy(device_mode->aecs_comp_table->ll_sym, mode->ll_table, mode->ll_table_size);
memcpy(device_mode->aecs_comp_table->d_sym, mode->d_table, mode->d_table_size);
- if (mode->header_table) {
- ret = decompress_header(device_mode, mode, wq);
- if (ret) {
- pr_debug("iaa header decompression failed: ret=%d\n", ret);
- goto free;
- }
- }
-
if (mode->init) {
ret = mode->init(device_mode);
if (ret)
@@ -1324,7 +1222,7 @@ static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req,
*compression_crc = idxd_desc->iax_completion->crc;
- if (!ctx->async_mode)
+ if (!ctx->async_mode || disable_async)
idxd_free_desc(wq, idxd_desc);
out:
return ret;
@@ -1570,7 +1468,7 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
*dlen = req->dlen;
- if (!ctx->async_mode)
+ if (!ctx->async_mode || disable_async)
idxd_free_desc(wq, idxd_desc);
/* Update stats */
@@ -1596,6 +1494,7 @@ static int iaa_comp_acompress(struct acomp_req *req)
u32 compression_crc;
struct idxd_wq *wq;
struct device *dev;
+ u64 start_time_ns;
int order = -1;
compression_ctx = crypto_tfm_ctx(tfm);
@@ -1669,8 +1568,10 @@ static int iaa_comp_acompress(struct acomp_req *req)
" req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
req->dst, req->dlen, sg_dma_len(req->dst));
+ start_time_ns = iaa_get_ts();
ret = iaa_compress(tfm, req, wq, src_addr, req->slen, dst_addr,
&req->dlen, &compression_crc, disable_async);
+ update_max_comp_delay_ns(start_time_ns);
if (ret == -EINPROGRESS)
return ret;
@@ -1717,6 +1618,7 @@ static int iaa_comp_adecompress_alloc_dest(struct acomp_req *req)
struct iaa_wq *iaa_wq;
struct device *dev;
struct idxd_wq *wq;
+ u64 start_time_ns;
int order = -1;
cpu = get_cpu();
@@ -1773,8 +1675,10 @@ alloc_dest:
dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
" req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
req->dst, req->dlen, sg_dma_len(req->dst));
+ start_time_ns = iaa_get_ts();
ret = iaa_decompress(tfm, req, wq, src_addr, req->slen,
dst_addr, &req->dlen, true);
+ update_max_decomp_delay_ns(start_time_ns);
if (ret == -EOVERFLOW) {
dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
req->dlen *= 2;
@@ -1805,6 +1709,7 @@ static int iaa_comp_adecompress(struct acomp_req *req)
int nr_sgs, cpu, ret = 0;
struct iaa_wq *iaa_wq;
struct device *dev;
+ u64 start_time_ns;
struct idxd_wq *wq;
if (!iaa_crypto_enabled) {
@@ -1864,8 +1769,10 @@ static int iaa_comp_adecompress(struct acomp_req *req)
" req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
req->dst, req->dlen, sg_dma_len(req->dst));
+ start_time_ns = iaa_get_ts();
ret = iaa_decompress(tfm, req, wq, src_addr, req->slen,
dst_addr, &req->dlen, false);
+ update_max_decomp_delay_ns(start_time_ns);
if (ret == -EINPROGRESS)
return ret;
@@ -1916,6 +1823,7 @@ static struct acomp_alg iaa_acomp_fixed_deflate = {
.base = {
.cra_name = "deflate",
.cra_driver_name = "deflate-iaa",
+ .cra_flags = CRYPTO_ALG_ASYNC,
.cra_ctxsize = sizeof(struct iaa_compression_ctx),
.cra_module = THIS_MODULE,
.cra_priority = IAA_ALG_PRIORITY,
diff --git a/drivers/crypto/intel/iaa/iaa_crypto_stats.c b/drivers/crypto/intel/iaa/iaa_crypto_stats.c
index 2e3b7b73af20..c9f83af4b307 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto_stats.c
+++ b/drivers/crypto/intel/iaa/iaa_crypto_stats.c
@@ -22,8 +22,6 @@ static u64 total_decomp_calls;
static u64 total_sw_decomp_calls;
static u64 max_comp_delay_ns;
static u64 max_decomp_delay_ns;
-static u64 max_acomp_delay_ns;
-static u64 max_adecomp_delay_ns;
static u64 total_comp_bytes_out;
static u64 total_decomp_bytes_in;
static u64 total_completion_einval_errors;
@@ -92,26 +90,6 @@ void update_max_decomp_delay_ns(u64 start_time_ns)
max_decomp_delay_ns = time_diff;
}
-void update_max_acomp_delay_ns(u64 start_time_ns)
-{
- u64 time_diff;
-
- time_diff = ktime_get_ns() - start_time_ns;
-
- if (time_diff > max_acomp_delay_ns)
- max_acomp_delay_ns = time_diff;
-}
-
-void update_max_adecomp_delay_ns(u64 start_time_ns)
-{
- u64 time_diff;
-
- time_diff = ktime_get_ns() - start_time_ns;
-
- if (time_diff > max_adecomp_delay_ns)
- max_adecomp_delay_ns = time_diff;
-}
-
void update_wq_comp_calls(struct idxd_wq *idxd_wq)
{
struct iaa_wq *wq = idxd_wq_get_private(idxd_wq);
@@ -151,8 +129,6 @@ static void reset_iaa_crypto_stats(void)
total_sw_decomp_calls = 0;
max_comp_delay_ns = 0;
max_decomp_delay_ns = 0;
- max_acomp_delay_ns = 0;
- max_adecomp_delay_ns = 0;
total_comp_bytes_out = 0;
total_decomp_bytes_in = 0;
total_completion_einval_errors = 0;
@@ -275,17 +251,11 @@ int __init iaa_crypto_debugfs_init(void)
return -ENODEV;
iaa_crypto_debugfs_root = debugfs_create_dir("iaa_crypto", NULL);
- if (!iaa_crypto_debugfs_root)
- return -ENOMEM;
debugfs_create_u64("max_comp_delay_ns", 0644,
iaa_crypto_debugfs_root, &max_comp_delay_ns);
debugfs_create_u64("max_decomp_delay_ns", 0644,
iaa_crypto_debugfs_root, &max_decomp_delay_ns);
- debugfs_create_u64("max_acomp_delay_ns", 0644,
- iaa_crypto_debugfs_root, &max_comp_delay_ns);
- debugfs_create_u64("max_adecomp_delay_ns", 0644,
- iaa_crypto_debugfs_root, &max_decomp_delay_ns);
debugfs_create_u64("total_comp_calls", 0644,
iaa_crypto_debugfs_root, &total_comp_calls);
debugfs_create_u64("total_decomp_calls", 0644,
diff --git a/drivers/crypto/intel/iaa/iaa_crypto_stats.h b/drivers/crypto/intel/iaa/iaa_crypto_stats.h
index c10b87b86fa4..c916ca83f070 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto_stats.h
+++ b/drivers/crypto/intel/iaa/iaa_crypto_stats.h
@@ -15,8 +15,6 @@ void update_total_sw_decomp_calls(void);
void update_total_decomp_bytes_in(int n);
void update_max_comp_delay_ns(u64 start_time_ns);
void update_max_decomp_delay_ns(u64 start_time_ns);
-void update_max_acomp_delay_ns(u64 start_time_ns);
-void update_max_adecomp_delay_ns(u64 start_time_ns);
void update_completion_einval_errs(void);
void update_completion_timeout_errs(void);
void update_completion_comp_buf_overflow_errs(void);
@@ -26,6 +24,8 @@ void update_wq_comp_bytes(struct idxd_wq *idxd_wq, int n);
void update_wq_decomp_calls(struct idxd_wq *idxd_wq);
void update_wq_decomp_bytes(struct idxd_wq *idxd_wq, int n);
+static inline u64 iaa_get_ts(void) { return ktime_get_ns(); }
+
#else
static inline int iaa_crypto_debugfs_init(void) { return 0; }
static inline void iaa_crypto_debugfs_cleanup(void) {}
@@ -37,8 +37,6 @@ static inline void update_total_sw_decomp_calls(void) {}
static inline void update_total_decomp_bytes_in(int n) {}
static inline void update_max_comp_delay_ns(u64 start_time_ns) {}
static inline void update_max_decomp_delay_ns(u64 start_time_ns) {}
-static inline void update_max_acomp_delay_ns(u64 start_time_ns) {}
-static inline void update_max_adecomp_delay_ns(u64 start_time_ns) {}
static inline void update_completion_einval_errs(void) {}
static inline void update_completion_timeout_errs(void) {}
static inline void update_completion_comp_buf_overflow_errs(void) {}
@@ -48,6 +46,8 @@ static inline void update_wq_comp_bytes(struct idxd_wq *idxd_wq, int n) {}
static inline void update_wq_decomp_calls(struct idxd_wq *idxd_wq) {}
static inline void update_wq_decomp_bytes(struct idxd_wq *idxd_wq, int n) {}
+static inline u64 iaa_get_ts(void) { return 0; }
+
#endif // CONFIG_CRYPTO_DEV_IAA_CRYPTO_STATS
#endif
diff --git a/drivers/crypto/intel/qat/Kconfig b/drivers/crypto/intel/qat/Kconfig
index c120f6715a09..02fb8abe4e6e 100644
--- a/drivers/crypto/intel/qat/Kconfig
+++ b/drivers/crypto/intel/qat/Kconfig
@@ -106,3 +106,17 @@ config CRYPTO_DEV_QAT_C62XVF
To compile this as a module, choose M here: the module
will be called qat_c62xvf.
+
+config CRYPTO_DEV_QAT_ERROR_INJECTION
+ bool "Support for Intel(R) QAT Devices Heartbeat Error Injection"
+ depends on CRYPTO_DEV_QAT
+ depends on DEBUG_FS
+ help
+ Enables a mechanism that allows to inject a heartbeat error on
+ Intel(R) QuickAssist devices for testing purposes.
+
+ This is intended for developer use only.
+ If unsure, say N.
+
+ This functionality is available via debugfs entry of the Intel(R)
+ QuickAssist device
diff --git a/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c
index a87d29ae724f..1102c47f8293 100644
--- a/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c
@@ -361,53 +361,6 @@ static u32 get_ena_thd_mask(struct adf_accel_dev *accel_dev, u32 obj_num)
}
}
-static u16 get_ring_to_svc_map(struct adf_accel_dev *accel_dev)
-{
- enum adf_cfg_service_type rps[RP_GROUP_COUNT] = { };
- const struct adf_fw_config *fw_config;
- u16 ring_to_svc_map;
- int i, j;
-
- fw_config = get_fw_config(accel_dev);
- if (!fw_config)
- return 0;
-
- for (i = 0; i < RP_GROUP_COUNT; i++) {
- switch (fw_config[i].ae_mask) {
- case ADF_AE_GROUP_0:
- j = RP_GROUP_0;
- break;
- case ADF_AE_GROUP_1:
- j = RP_GROUP_1;
- break;
- default:
- return 0;
- }
-
- switch (fw_config[i].obj) {
- case ADF_FW_SYM_OBJ:
- rps[j] = SYM;
- break;
- case ADF_FW_ASYM_OBJ:
- rps[j] = ASYM;
- break;
- case ADF_FW_DC_OBJ:
- rps[j] = COMP;
- break;
- default:
- rps[j] = 0;
- break;
- }
- }
-
- ring_to_svc_map = rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_0_SHIFT |
- rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_1_SHIFT |
- rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_2_SHIFT |
- rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_3_SHIFT;
-
- return ring_to_svc_map;
-}
-
static const char *uof_get_name(struct adf_accel_dev *accel_dev, u32 obj_num,
const char * const fw_objs[], int num_objs)
{
@@ -433,6 +386,20 @@ static const char *uof_get_name_420xx(struct adf_accel_dev *accel_dev, u32 obj_n
return uof_get_name(accel_dev, obj_num, adf_420xx_fw_objs, num_fw_objs);
}
+static int uof_get_obj_type(struct adf_accel_dev *accel_dev, u32 obj_num)
+{
+ const struct adf_fw_config *fw_config;
+
+ if (obj_num >= uof_get_num_objs(accel_dev))
+ return -EINVAL;
+
+ fw_config = get_fw_config(accel_dev);
+ if (!fw_config)
+ return -EINVAL;
+
+ return fw_config[obj_num].obj;
+}
+
static u32 uof_get_ae_mask(struct adf_accel_dev *accel_dev, u32 obj_num)
{
const struct adf_fw_config *fw_config;
@@ -496,12 +463,13 @@ void adf_init_hw_data_420xx(struct adf_hw_device_data *hw_data, u32 dev_id)
hw_data->fw_mmp_name = ADF_420XX_MMP;
hw_data->uof_get_name = uof_get_name_420xx;
hw_data->uof_get_num_objs = uof_get_num_objs;
+ hw_data->uof_get_obj_type = uof_get_obj_type;
hw_data->uof_get_ae_mask = uof_get_ae_mask;
hw_data->get_rp_group = get_rp_group;
hw_data->get_ena_thd_mask = get_ena_thd_mask;
hw_data->set_msix_rttable = adf_gen4_set_msix_default_rttable;
hw_data->set_ssm_wdtimer = adf_gen4_set_ssm_wdtimer;
- hw_data->get_ring_to_svc_map = get_ring_to_svc_map;
+ hw_data->get_ring_to_svc_map = adf_gen4_get_ring_to_svc_map;
hw_data->disable_iov = adf_disable_sriov;
hw_data->ring_pair_reset = adf_gen4_ring_pair_reset;
hw_data->enable_pm = adf_gen4_enable_pm;
diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
index 94a0ebb03d8c..927506cf271d 100644
--- a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
@@ -320,53 +320,6 @@ static u32 get_ena_thd_mask_401xx(struct adf_accel_dev *accel_dev, u32 obj_num)
}
}
-static u16 get_ring_to_svc_map(struct adf_accel_dev *accel_dev)
-{
- enum adf_cfg_service_type rps[RP_GROUP_COUNT];
- const struct adf_fw_config *fw_config;
- u16 ring_to_svc_map;
- int i, j;
-
- fw_config = get_fw_config(accel_dev);
- if (!fw_config)
- return 0;
-
- for (i = 0; i < RP_GROUP_COUNT; i++) {
- switch (fw_config[i].ae_mask) {
- case ADF_AE_GROUP_0:
- j = RP_GROUP_0;
- break;
- case ADF_AE_GROUP_1:
- j = RP_GROUP_1;
- break;
- default:
- return 0;
- }
-
- switch (fw_config[i].obj) {
- case ADF_FW_SYM_OBJ:
- rps[j] = SYM;
- break;
- case ADF_FW_ASYM_OBJ:
- rps[j] = ASYM;
- break;
- case ADF_FW_DC_OBJ:
- rps[j] = COMP;
- break;
- default:
- rps[j] = 0;
- break;
- }
- }
-
- ring_to_svc_map = rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_0_SHIFT |
- rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_1_SHIFT |
- rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_2_SHIFT |
- rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_3_SHIFT;
-
- return ring_to_svc_map;
-}
-
static const char *uof_get_name(struct adf_accel_dev *accel_dev, u32 obj_num,
const char * const fw_objs[], int num_objs)
{
@@ -399,6 +352,20 @@ static const char *uof_get_name_402xx(struct adf_accel_dev *accel_dev, u32 obj_n
return uof_get_name(accel_dev, obj_num, adf_402xx_fw_objs, num_fw_objs);
}
+static int uof_get_obj_type(struct adf_accel_dev *accel_dev, u32 obj_num)
+{
+ const struct adf_fw_config *fw_config;
+
+ if (obj_num >= uof_get_num_objs(accel_dev))
+ return -EINVAL;
+
+ fw_config = get_fw_config(accel_dev);
+ if (!fw_config)
+ return -EINVAL;
+
+ return fw_config[obj_num].obj;
+}
+
static u32 uof_get_ae_mask(struct adf_accel_dev *accel_dev, u32 obj_num)
{
const struct adf_fw_config *fw_config;
@@ -479,11 +446,12 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 dev_id)
break;
}
hw_data->uof_get_num_objs = uof_get_num_objs;
+ hw_data->uof_get_obj_type = uof_get_obj_type;
hw_data->uof_get_ae_mask = uof_get_ae_mask;
hw_data->get_rp_group = get_rp_group;
hw_data->set_msix_rttable = adf_gen4_set_msix_default_rttable;
hw_data->set_ssm_wdtimer = adf_gen4_set_ssm_wdtimer;
- hw_data->get_ring_to_svc_map = get_ring_to_svc_map;
+ hw_data->get_ring_to_svc_map = adf_gen4_get_ring_to_svc_map;
hw_data->disable_iov = adf_disable_sriov;
hw_data->ring_pair_reset = adf_gen4_ring_pair_reset;
hw_data->enable_pm = adf_gen4_enable_pm;
diff --git a/drivers/crypto/intel/qat/qat_common/Makefile b/drivers/crypto/intel/qat/qat_common/Makefile
index 6908727bff3b..5915cde8a7aa 100644
--- a/drivers/crypto/intel/qat/qat_common/Makefile
+++ b/drivers/crypto/intel/qat/qat_common/Makefile
@@ -53,3 +53,5 @@ intel_qat-$(CONFIG_PCI_IOV) += adf_sriov.o adf_vf_isr.o adf_pfvf_utils.o \
adf_pfvf_pf_msg.o adf_pfvf_pf_proto.o \
adf_pfvf_vf_msg.o adf_pfvf_vf_proto.o \
adf_gen2_pfvf.o adf_gen4_pfvf.o
+
+intel_qat-$(CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION) += adf_heartbeat_inject.o
diff --git a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
index a16c7e6edc65..08658c3a01e9 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
@@ -248,6 +248,7 @@ struct adf_hw_device_data {
void (*set_msix_rttable)(struct adf_accel_dev *accel_dev);
const char *(*uof_get_name)(struct adf_accel_dev *accel_dev, u32 obj_num);
u32 (*uof_get_num_objs)(struct adf_accel_dev *accel_dev);
+ int (*uof_get_obj_type)(struct adf_accel_dev *accel_dev, u32 obj_num);
u32 (*uof_get_ae_mask)(struct adf_accel_dev *accel_dev, u32 obj_num);
int (*get_rp_group)(struct adf_accel_dev *accel_dev, u32 ae_mask);
u32 (*get_ena_thd_mask)(struct adf_accel_dev *accel_dev, u32 obj_num);
@@ -332,6 +333,7 @@ struct adf_accel_vf_info {
struct ratelimit_state vf2pf_ratelimit;
u32 vf_nr;
bool init;
+ bool restarting;
u8 vf_compat_ver;
};
@@ -401,6 +403,7 @@ struct adf_accel_dev {
struct adf_error_counters ras_errors;
struct mutex state_lock; /* protect state of the device */
bool is_vf;
+ bool autoreset_on_error;
u32 accel_id;
};
#endif
diff --git a/drivers/crypto/intel/qat/qat_common/adf_aer.c b/drivers/crypto/intel/qat/qat_common/adf_aer.c
index a39e70bd4b21..9da2278bd5b7 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_aer.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_aer.c
@@ -7,8 +7,15 @@
#include <linux/delay.h>
#include "adf_accel_devices.h"
#include "adf_common_drv.h"
+#include "adf_pfvf_pf_msg.h"
+
+struct adf_fatal_error_data {
+ struct adf_accel_dev *accel_dev;
+ struct work_struct work;
+};
static struct workqueue_struct *device_reset_wq;
+static struct workqueue_struct *device_sriov_wq;
static pci_ers_result_t adf_error_detected(struct pci_dev *pdev,
pci_channel_state_t state)
@@ -26,6 +33,19 @@ static pci_ers_result_t adf_error_detected(struct pci_dev *pdev,
return PCI_ERS_RESULT_DISCONNECT;
}
+ set_bit(ADF_STATUS_RESTARTING, &accel_dev->status);
+ if (accel_dev->hw_device->exit_arb) {
+ dev_dbg(&pdev->dev, "Disabling arbitration\n");
+ accel_dev->hw_device->exit_arb(accel_dev);
+ }
+ adf_error_notifier(accel_dev);
+ adf_pf2vf_notify_fatal_error(accel_dev);
+ adf_dev_restarting_notify(accel_dev);
+ adf_pf2vf_notify_restarting(accel_dev);
+ adf_pf2vf_wait_for_restarting_complete(accel_dev);
+ pci_clear_master(pdev);
+ adf_dev_down(accel_dev, false);
+
return PCI_ERS_RESULT_NEED_RESET;
}
@@ -37,6 +57,13 @@ struct adf_reset_dev_data {
struct work_struct reset_work;
};
+/* sriov dev data */
+struct adf_sriov_dev_data {
+ struct adf_accel_dev *accel_dev;
+ struct completion compl;
+ struct work_struct sriov_work;
+};
+
void adf_reset_sbr(struct adf_accel_dev *accel_dev)
{
struct pci_dev *pdev = accel_to_pci_dev(accel_dev);
@@ -82,29 +109,57 @@ void adf_dev_restore(struct adf_accel_dev *accel_dev)
}
}
+static void adf_device_sriov_worker(struct work_struct *work)
+{
+ struct adf_sriov_dev_data *sriov_data =
+ container_of(work, struct adf_sriov_dev_data, sriov_work);
+
+ adf_reenable_sriov(sriov_data->accel_dev);
+ complete(&sriov_data->compl);
+}
+
static void adf_device_reset_worker(struct work_struct *work)
{
struct adf_reset_dev_data *reset_data =
container_of(work, struct adf_reset_dev_data, reset_work);
struct adf_accel_dev *accel_dev = reset_data->accel_dev;
+ unsigned long wait_jiffies = msecs_to_jiffies(10000);
+ struct adf_sriov_dev_data sriov_data;
adf_dev_restarting_notify(accel_dev);
if (adf_dev_restart(accel_dev)) {
/* The device hanged and we can't restart it so stop here */
dev_err(&GET_DEV(accel_dev), "Restart device failed\n");
- if (reset_data->mode == ADF_DEV_RESET_ASYNC)
+ if (reset_data->mode == ADF_DEV_RESET_ASYNC ||
+ completion_done(&reset_data->compl))
kfree(reset_data);
WARN(1, "QAT: device restart failed. Device is unusable\n");
return;
}
+
+ sriov_data.accel_dev = accel_dev;
+ init_completion(&sriov_data.compl);
+ INIT_WORK(&sriov_data.sriov_work, adf_device_sriov_worker);
+ queue_work(device_sriov_wq, &sriov_data.sriov_work);
+ if (wait_for_completion_timeout(&sriov_data.compl, wait_jiffies))
+ adf_pf2vf_notify_restarted(accel_dev);
+
adf_dev_restarted_notify(accel_dev);
clear_bit(ADF_STATUS_RESTARTING, &accel_dev->status);
- /* The dev is back alive. Notify the caller if in sync mode */
- if (reset_data->mode == ADF_DEV_RESET_SYNC)
- complete(&reset_data->compl);
- else
+ /*
+ * The dev is back alive. Notify the caller if in sync mode
+ *
+ * If device restart will take a more time than expected,
+ * the schedule_reset() function can timeout and exit. This can be
+ * detected by calling the completion_done() function. In this case
+ * the reset_data structure needs to be freed here.
+ */
+ if (reset_data->mode == ADF_DEV_RESET_ASYNC ||
+ completion_done(&reset_data->compl))
kfree(reset_data);
+ else
+ complete(&reset_data->compl);
}
static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev,
@@ -137,8 +192,9 @@ static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev,
dev_err(&GET_DEV(accel_dev),
"Reset device timeout expired\n");
ret = -EFAULT;
+ } else {
+ kfree(reset_data);
}
- kfree(reset_data);
return ret;
}
return 0;
@@ -147,14 +203,25 @@ static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev,
static pci_ers_result_t adf_slot_reset(struct pci_dev *pdev)
{
struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev);
+ int res = 0;
if (!accel_dev) {
pr_err("QAT: Can't find acceleration device\n");
return PCI_ERS_RESULT_DISCONNECT;
}
- if (adf_dev_aer_schedule_reset(accel_dev, ADF_DEV_RESET_SYNC))
+
+ if (!pdev->is_busmaster)
+ pci_set_master(pdev);
+ pci_restore_state(pdev);
+ pci_save_state(pdev);
+ res = adf_dev_up(accel_dev, false);
+ if (res && res != -EALREADY)
return PCI_ERS_RESULT_DISCONNECT;
+ adf_reenable_sriov(accel_dev);
+ adf_pf2vf_notify_restarted(accel_dev);
+ adf_dev_restarted_notify(accel_dev);
+ clear_bit(ADF_STATUS_RESTARTING, &accel_dev->status);
return PCI_ERS_RESULT_RECOVERED;
}
@@ -171,11 +238,62 @@ const struct pci_error_handlers adf_err_handler = {
};
EXPORT_SYMBOL_GPL(adf_err_handler);
+int adf_dev_autoreset(struct adf_accel_dev *accel_dev)
+{
+ if (accel_dev->autoreset_on_error)
+ return adf_dev_aer_schedule_reset(accel_dev, ADF_DEV_RESET_ASYNC);
+
+ return 0;
+}
+
+static void adf_notify_fatal_error_worker(struct work_struct *work)
+{
+ struct adf_fatal_error_data *wq_data =
+ container_of(work, struct adf_fatal_error_data, work);
+ struct adf_accel_dev *accel_dev = wq_data->accel_dev;
+ struct adf_hw_device_data *hw_device = accel_dev->hw_device;
+
+ adf_error_notifier(accel_dev);
+
+ if (!accel_dev->is_vf) {
+ /* Disable arbitration to stop processing of new requests */
+ if (accel_dev->autoreset_on_error && hw_device->exit_arb)
+ hw_device->exit_arb(accel_dev);
+ if (accel_dev->pf.vf_info)
+ adf_pf2vf_notify_fatal_error(accel_dev);
+ adf_dev_autoreset(accel_dev);
+ }
+
+ kfree(wq_data);
+}
+
+int adf_notify_fatal_error(struct adf_accel_dev *accel_dev)
+{
+ struct adf_fatal_error_data *wq_data;
+
+ wq_data = kzalloc(sizeof(*wq_data), GFP_ATOMIC);
+ if (!wq_data)
+ return -ENOMEM;
+
+ wq_data->accel_dev = accel_dev;
+ INIT_WORK(&wq_data->work, adf_notify_fatal_error_worker);
+ adf_misc_wq_queue_work(&wq_data->work);
+
+ return 0;
+}
+
int adf_init_aer(void)
{
device_reset_wq = alloc_workqueue("qat_device_reset_wq",
WQ_MEM_RECLAIM, 0);
- return !device_reset_wq ? -EFAULT : 0;
+ if (!device_reset_wq)
+ return -EFAULT;
+
+ device_sriov_wq = alloc_workqueue("qat_device_sriov_wq", 0, 0);
+ if (!device_sriov_wq)
+ return -EFAULT;
+
+ return 0;
}
void adf_exit_aer(void)
@@ -183,4 +301,8 @@ void adf_exit_aer(void)
if (device_reset_wq)
destroy_workqueue(device_reset_wq);
device_reset_wq = NULL;
+
+ if (device_sriov_wq)
+ destroy_workqueue(device_sriov_wq);
+ device_sriov_wq = NULL;
}
diff --git a/drivers/crypto/intel/qat/qat_common/adf_cfg_strings.h b/drivers/crypto/intel/qat/qat_common/adf_cfg_strings.h
index 322b76903a73..e015ad6cace2 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_cfg_strings.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_cfg_strings.h
@@ -49,5 +49,6 @@
ADF_ETRMGR_BANK "%d" ADF_ETRMGR_CORE_AFFINITY
#define ADF_ACCEL_STR "Accelerator%d"
#define ADF_HEARTBEAT_TIMER "HeartbeatTimer"
+#define ADF_SRIOV_ENABLED "SriovEnabled"
#endif
diff --git a/drivers/crypto/intel/qat/qat_common/adf_clock.c b/drivers/crypto/intel/qat/qat_common/adf_clock.c
index 01e0a389e462..cf89f57de2a7 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_clock.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_clock.c
@@ -83,6 +83,9 @@ static int measure_clock(struct adf_accel_dev *accel_dev, u32 *frequency)
}
delta_us = timespec_to_us(&ts3) - timespec_to_us(&ts1);
+ if (!delta_us)
+ return -EINVAL;
+
temp = (timestamp2 - timestamp1) * ME_CLK_DIVIDER * 10;
temp = DIV_ROUND_CLOSEST_ULL(temp, delta_us);
/*
diff --git a/drivers/crypto/intel/qat/qat_common/adf_cnv_dbgfs.c b/drivers/crypto/intel/qat/qat_common/adf_cnv_dbgfs.c
index 07119c487da0..627953a72d47 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_cnv_dbgfs.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_cnv_dbgfs.c
@@ -16,7 +16,6 @@
#define CNV_ERR_INFO_MASK GENMASK(11, 0)
#define CNV_ERR_TYPE_MASK GENMASK(15, 12)
-#define CNV_SLICE_ERR_MASK GENMASK(7, 0)
#define CNV_SLICE_ERR_SIGN_BIT_INDEX 7
#define CNV_DELTA_ERR_SIGN_BIT_INDEX 11
diff --git a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h
index f06188033a93..57328249c89e 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h
@@ -40,6 +40,7 @@ enum adf_event {
ADF_EVENT_SHUTDOWN,
ADF_EVENT_RESTARTING,
ADF_EVENT_RESTARTED,
+ ADF_EVENT_FATAL_ERROR,
};
struct service_hndl {
@@ -60,6 +61,8 @@ int adf_dev_restart(struct adf_accel_dev *accel_dev);
void adf_devmgr_update_class_index(struct adf_hw_device_data *hw_data);
void adf_clean_vf_map(bool);
+int adf_notify_fatal_error(struct adf_accel_dev *accel_dev);
+void adf_error_notifier(struct adf_accel_dev *accel_dev);
int adf_devmgr_add_dev(struct adf_accel_dev *accel_dev,
struct adf_accel_dev *pf);
void adf_devmgr_rm_dev(struct adf_accel_dev *accel_dev,
@@ -84,12 +87,14 @@ int adf_ae_stop(struct adf_accel_dev *accel_dev);
extern const struct pci_error_handlers adf_err_handler;
void adf_reset_sbr(struct adf_accel_dev *accel_dev);
void adf_reset_flr(struct adf_accel_dev *accel_dev);
+int adf_dev_autoreset(struct adf_accel_dev *accel_dev);
void adf_dev_restore(struct adf_accel_dev *accel_dev);
int adf_init_aer(void);
void adf_exit_aer(void);
int adf_init_arb(struct adf_accel_dev *accel_dev);
void adf_exit_arb(struct adf_accel_dev *accel_dev);
void adf_update_ring_arb(struct adf_etr_ring_data *ring);
+int adf_disable_arb_thd(struct adf_accel_dev *accel_dev, u32 ae, u32 thr);
int adf_dev_get(struct adf_accel_dev *accel_dev);
void adf_dev_put(struct adf_accel_dev *accel_dev);
@@ -188,6 +193,7 @@ bool adf_misc_wq_queue_delayed_work(struct delayed_work *work,
#if defined(CONFIG_PCI_IOV)
int adf_sriov_configure(struct pci_dev *pdev, int numvfs);
void adf_disable_sriov(struct adf_accel_dev *accel_dev);
+void adf_reenable_sriov(struct adf_accel_dev *accel_dev);
void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask);
void adf_disable_all_vf2pf_interrupts(struct adf_accel_dev *accel_dev);
bool adf_recv_and_handle_pf2vf_msg(struct adf_accel_dev *accel_dev);
@@ -208,6 +214,10 @@ static inline void adf_disable_sriov(struct adf_accel_dev *accel_dev)
{
}
+static inline void adf_reenable_sriov(struct adf_accel_dev *accel_dev)
+{
+}
+
static inline int adf_init_pf_wq(void)
{
return 0;
diff --git a/drivers/crypto/intel/qat/qat_common/adf_dev_mgr.c b/drivers/crypto/intel/qat/qat_common/adf_dev_mgr.c
index 86ee36feefad..f07b748795f7 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_dev_mgr.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_dev_mgr.c
@@ -60,10 +60,10 @@ static int adf_get_vf_real_id(u32 fake)
/**
* adf_clean_vf_map() - Cleans VF id mapings
- *
- * Function cleans internal ids for virtual functions.
* @vf: flag indicating whether mappings is cleaned
* for vfs only or for vfs and pfs
+ *
+ * Function cleans internal ids for virtual functions.
*/
void adf_clean_vf_map(bool vf)
{
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c
index 9985683056d5..d28e1921940a 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c
@@ -4,6 +4,7 @@
#include "adf_accel_devices.h"
#include "adf_cfg_services.h"
#include "adf_common_drv.h"
+#include "adf_fw_config.h"
#include "adf_gen4_hw_data.h"
#include "adf_gen4_pm.h"
@@ -398,6 +399,9 @@ int adf_gen4_init_thd2arb_map(struct adf_accel_dev *accel_dev)
ADF_GEN4_ADMIN_ACCELENGINES;
if (srv_id == SVC_DCC) {
+ if (ae_cnt > ICP_QAT_HW_AE_DELIMITER)
+ return -EINVAL;
+
memcpy(thd2arb_map, thrd_to_arb_map_dcc,
array_size(sizeof(*thd2arb_map), ae_cnt));
return 0;
@@ -430,3 +434,58 @@ int adf_gen4_init_thd2arb_map(struct adf_accel_dev *accel_dev)
return 0;
}
EXPORT_SYMBOL_GPL(adf_gen4_init_thd2arb_map);
+
+u16 adf_gen4_get_ring_to_svc_map(struct adf_accel_dev *accel_dev)
+{
+ struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev);
+ enum adf_cfg_service_type rps[RP_GROUP_COUNT] = { };
+ unsigned int ae_mask, start_id, worker_obj_cnt, i;
+ u16 ring_to_svc_map;
+ int rp_group;
+
+ if (!hw_data->get_rp_group || !hw_data->uof_get_ae_mask ||
+ !hw_data->uof_get_obj_type || !hw_data->uof_get_num_objs)
+ return 0;
+
+ /* If dcc, all rings handle compression requests */
+ if (adf_get_service_enabled(accel_dev) == SVC_DCC) {
+ for (i = 0; i < RP_GROUP_COUNT; i++)
+ rps[i] = COMP;
+ goto set_mask;
+ }
+
+ worker_obj_cnt = hw_data->uof_get_num_objs(accel_dev) -
+ ADF_GEN4_ADMIN_ACCELENGINES;
+ start_id = worker_obj_cnt - RP_GROUP_COUNT;
+
+ for (i = start_id; i < worker_obj_cnt; i++) {
+ ae_mask = hw_data->uof_get_ae_mask(accel_dev, i);
+ rp_group = hw_data->get_rp_group(accel_dev, ae_mask);
+ if (rp_group >= RP_GROUP_COUNT || rp_group < RP_GROUP_0)
+ return 0;
+
+ switch (hw_data->uof_get_obj_type(accel_dev, i)) {
+ case ADF_FW_SYM_OBJ:
+ rps[rp_group] = SYM;
+ break;
+ case ADF_FW_ASYM_OBJ:
+ rps[rp_group] = ASYM;
+ break;
+ case ADF_FW_DC_OBJ:
+ rps[rp_group] = COMP;
+ break;
+ default:
+ rps[rp_group] = 0;
+ break;
+ }
+ }
+
+set_mask:
+ ring_to_svc_map = rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_0_SHIFT |
+ rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_1_SHIFT |
+ rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_2_SHIFT |
+ rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_3_SHIFT;
+
+ return ring_to_svc_map;
+}
+EXPORT_SYMBOL_GPL(adf_gen4_get_ring_to_svc_map);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h
index 7d8a774cadc8..c6e80df5a85a 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h
@@ -235,5 +235,6 @@ int adf_gen4_ring_pair_reset(struct adf_accel_dev *accel_dev, u32 bank_number);
void adf_gen4_set_msix_default_rttable(struct adf_accel_dev *accel_dev);
void adf_gen4_set_ssm_wdtimer(struct adf_accel_dev *accel_dev);
int adf_gen4_init_thd2arb_map(struct adf_accel_dev *accel_dev);
+u16 adf_gen4_get_ring_to_svc_map(struct adf_accel_dev *accel_dev);
#endif
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_ras.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_ras.c
index 048c24607939..2dd3772bf58a 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_gen4_ras.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_ras.c
@@ -1007,8 +1007,7 @@ static bool adf_handle_spppar_err(struct adf_accel_dev *accel_dev,
static bool adf_handle_ssmcpppar_err(struct adf_accel_dev *accel_dev,
void __iomem *csr, u32 iastatssm)
{
- u32 reg = ADF_CSR_RD(csr, ADF_GEN4_SSMCPPERR);
- u32 bits_num = BITS_PER_REG(reg);
+ u32 reg, bits_num = BITS_PER_REG(reg);
bool reset_required = false;
unsigned long errs_bits;
u32 bit_iterator;
@@ -1106,8 +1105,7 @@ static bool adf_handle_rf_parr_err(struct adf_accel_dev *accel_dev,
static bool adf_handle_ser_err_ssmsh(struct adf_accel_dev *accel_dev,
void __iomem *csr, u32 iastatssm)
{
- u32 reg = ADF_CSR_RD(csr, ADF_GEN4_SER_ERR_SSMSH);
- u32 bits_num = BITS_PER_REG(reg);
+ u32 reg, bits_num = BITS_PER_REG(reg);
bool reset_required = false;
unsigned long errs_bits;
u32 bit_iterator;
diff --git a/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c
index 13f48d2f6da8..b19aa1ef8eee 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c
@@ -23,12 +23,6 @@
#define ADF_HB_EMPTY_SIG 0xA5A5A5A5
-/* Heartbeat counter pair */
-struct hb_cnt_pair {
- __u16 resp_heartbeat_cnt;
- __u16 req_heartbeat_cnt;
-};
-
static int adf_hb_check_polling_freq(struct adf_accel_dev *accel_dev)
{
u64 curr_time = adf_clock_get_current_time();
@@ -211,6 +205,19 @@ static int adf_hb_get_status(struct adf_accel_dev *accel_dev)
return ret;
}
+static void adf_heartbeat_reset(struct adf_accel_dev *accel_dev)
+{
+ u64 curr_time = adf_clock_get_current_time();
+ u64 time_since_reset = curr_time - accel_dev->heartbeat->last_hb_reset_time;
+
+ if (time_since_reset < ADF_CFG_HB_RESET_MS)
+ return;
+
+ accel_dev->heartbeat->last_hb_reset_time = curr_time;
+ if (adf_notify_fatal_error(accel_dev))
+ dev_err(&GET_DEV(accel_dev), "Failed to notify fatal error\n");
+}
+
void adf_heartbeat_status(struct adf_accel_dev *accel_dev,
enum adf_device_heartbeat_status *hb_status)
{
@@ -235,6 +242,7 @@ void adf_heartbeat_status(struct adf_accel_dev *accel_dev,
"Heartbeat ERROR: QAT is not responding.\n");
*hb_status = HB_DEV_UNRESPONSIVE;
hb->hb_failed_counter++;
+ adf_heartbeat_reset(accel_dev);
return;
}
diff --git a/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h
index b22e3cb29798..16fdfb48b196 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h
@@ -13,17 +13,26 @@ struct dentry;
#define ADF_CFG_HB_TIMER_DEFAULT_MS 500
#define ADF_CFG_HB_COUNT_THRESHOLD 3
+#define ADF_CFG_HB_RESET_MS 5000
+
enum adf_device_heartbeat_status {
HB_DEV_UNRESPONSIVE = 0,
HB_DEV_ALIVE,
HB_DEV_UNSUPPORTED,
};
+/* Heartbeat counter pair */
+struct hb_cnt_pair {
+ __u16 resp_heartbeat_cnt;
+ __u16 req_heartbeat_cnt;
+};
+
struct adf_heartbeat {
unsigned int hb_sent_counter;
unsigned int hb_failed_counter;
unsigned int hb_timer;
u64 last_hb_check_time;
+ u64 last_hb_reset_time;
bool ctrs_cnt_checked;
struct hb_dma_addr {
dma_addr_t phy_addr;
@@ -35,6 +44,9 @@ struct adf_heartbeat {
struct dentry *cfg;
struct dentry *sent;
struct dentry *failed;
+#ifdef CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION
+ struct dentry *inject_error;
+#endif
} dbgfs;
};
@@ -51,6 +63,15 @@ void adf_heartbeat_status(struct adf_accel_dev *accel_dev,
enum adf_device_heartbeat_status *hb_status);
void adf_heartbeat_check_ctrs(struct adf_accel_dev *accel_dev);
+#ifdef CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION
+int adf_heartbeat_inject_error(struct adf_accel_dev *accel_dev);
+#else
+static inline int adf_heartbeat_inject_error(struct adf_accel_dev *accel_dev)
+{
+ return -EPERM;
+}
+#endif
+
#else
static inline int adf_heartbeat_init(struct adf_accel_dev *accel_dev)
{
diff --git a/drivers/crypto/intel/qat/qat_common/adf_heartbeat_dbgfs.c b/drivers/crypto/intel/qat/qat_common/adf_heartbeat_dbgfs.c
index 2661af6a2ef6..cccdff24b48d 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_heartbeat_dbgfs.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_heartbeat_dbgfs.c
@@ -155,6 +155,44 @@ static const struct file_operations adf_hb_cfg_fops = {
.write = adf_hb_cfg_write,
};
+static ssize_t adf_hb_error_inject_write(struct file *file,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct adf_accel_dev *accel_dev = file->private_data;
+ char buf[3];
+ int ret;
+
+ /* last byte left as string termination */
+ if (*ppos != 0 || count != 2)
+ return -EINVAL;
+
+ if (copy_from_user(buf, user_buf, count))
+ return -EFAULT;
+ buf[count] = '\0';
+
+ if (buf[0] != '1')
+ return -EINVAL;
+
+ ret = adf_heartbeat_inject_error(accel_dev);
+ if (ret) {
+ dev_err(&GET_DEV(accel_dev),
+ "Heartbeat error injection failed with status %d\n",
+ ret);
+ return ret;
+ }
+
+ dev_info(&GET_DEV(accel_dev), "Heartbeat error injection enabled\n");
+
+ return count;
+}
+
+static const struct file_operations adf_hb_error_inject_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = adf_hb_error_inject_write,
+};
+
void adf_heartbeat_dbgfs_add(struct adf_accel_dev *accel_dev)
{
struct adf_heartbeat *hb = accel_dev->heartbeat;
@@ -171,6 +209,17 @@ void adf_heartbeat_dbgfs_add(struct adf_accel_dev *accel_dev)
&hb->hb_failed_counter, &adf_hb_stats_fops);
hb->dbgfs.cfg = debugfs_create_file("config", 0600, hb->dbgfs.base_dir,
accel_dev, &adf_hb_cfg_fops);
+
+ if (IS_ENABLED(CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION)) {
+ struct dentry *inject_error __maybe_unused;
+
+ inject_error = debugfs_create_file("inject_error", 0200,
+ hb->dbgfs.base_dir, accel_dev,
+ &adf_hb_error_inject_fops);
+#ifdef CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION
+ hb->dbgfs.inject_error = inject_error;
+#endif
+ }
}
EXPORT_SYMBOL_GPL(adf_heartbeat_dbgfs_add);
@@ -189,6 +238,10 @@ void adf_heartbeat_dbgfs_rm(struct adf_accel_dev *accel_dev)
hb->dbgfs.failed = NULL;
debugfs_remove(hb->dbgfs.cfg);
hb->dbgfs.cfg = NULL;
+#ifdef CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION
+ debugfs_remove(hb->dbgfs.inject_error);
+ hb->dbgfs.inject_error = NULL;
+#endif
debugfs_remove(hb->dbgfs.base_dir);
hb->dbgfs.base_dir = NULL;
}
diff --git a/drivers/crypto/intel/qat/qat_common/adf_heartbeat_inject.c b/drivers/crypto/intel/qat/qat_common/adf_heartbeat_inject.c
new file mode 100644
index 000000000000..a3b474bdef6c
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/adf_heartbeat_inject.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2023 Intel Corporation */
+#include <linux/random.h>
+
+#include "adf_admin.h"
+#include "adf_common_drv.h"
+#include "adf_heartbeat.h"
+
+#define MAX_HB_TICKS 0xFFFFFFFF
+
+static int adf_hb_set_timer_to_max(struct adf_accel_dev *accel_dev)
+{
+ struct adf_hw_device_data *hw_data = accel_dev->hw_device;
+
+ accel_dev->heartbeat->hb_timer = 0;
+
+ if (hw_data->stop_timer)
+ hw_data->stop_timer(accel_dev);
+
+ return adf_send_admin_hb_timer(accel_dev, MAX_HB_TICKS);
+}
+
+static void adf_set_hb_counters_fail(struct adf_accel_dev *accel_dev, u32 ae,
+ u32 thr)
+{
+ struct hb_cnt_pair *stats = accel_dev->heartbeat->dma.virt_addr;
+ struct adf_hw_device_data *hw_device = accel_dev->hw_device;
+ const size_t max_aes = hw_device->get_num_aes(hw_device);
+ const size_t hb_ctrs = hw_device->num_hb_ctrs;
+ size_t thr_id = ae * hb_ctrs + thr;
+ u16 num_rsp = stats[thr_id].resp_heartbeat_cnt;
+
+ /*
+ * Inject live.req != live.rsp and live.rsp == last.rsp
+ * to trigger the heartbeat error detection
+ */
+ stats[thr_id].req_heartbeat_cnt++;
+ stats += (max_aes * hb_ctrs);
+ stats[thr_id].resp_heartbeat_cnt = num_rsp;
+}
+
+int adf_heartbeat_inject_error(struct adf_accel_dev *accel_dev)
+{
+ struct adf_hw_device_data *hw_device = accel_dev->hw_device;
+ const size_t max_aes = hw_device->get_num_aes(hw_device);
+ const size_t hb_ctrs = hw_device->num_hb_ctrs;
+ u32 rand, rand_ae, rand_thr;
+ unsigned long ae_mask;
+ int ret;
+
+ ae_mask = hw_device->ae_mask;
+
+ do {
+ /* Ensure we have a valid ae */
+ get_random_bytes(&rand, sizeof(rand));
+ rand_ae = rand % max_aes;
+ } while (!test_bit(rand_ae, &ae_mask));
+
+ get_random_bytes(&rand, sizeof(rand));
+ rand_thr = rand % hb_ctrs;
+
+ /* Increase the heartbeat timer to prevent FW updating HB counters */
+ ret = adf_hb_set_timer_to_max(accel_dev);
+ if (ret)
+ return ret;
+
+ /* Configure worker threads to stop processing any packet */
+ ret = adf_disable_arb_thd(accel_dev, rand_ae, rand_thr);
+ if (ret)
+ return ret;
+
+ /* Change HB counters memory to simulate a hang */
+ adf_set_hb_counters_fail(accel_dev, rand_ae, rand_thr);
+
+ return 0;
+}
diff --git a/drivers/crypto/intel/qat/qat_common/adf_hw_arbiter.c b/drivers/crypto/intel/qat/qat_common/adf_hw_arbiter.c
index da6956699246..65bd26b25abc 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_hw_arbiter.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_hw_arbiter.c
@@ -103,3 +103,28 @@ void adf_exit_arb(struct adf_accel_dev *accel_dev)
csr_ops->write_csr_ring_srv_arb_en(csr, i, 0);
}
EXPORT_SYMBOL_GPL(adf_exit_arb);
+
+int adf_disable_arb_thd(struct adf_accel_dev *accel_dev, u32 ae, u32 thr)
+{
+ void __iomem *csr = accel_dev->transport->banks[0].csr_addr;
+ struct adf_hw_device_data *hw_data = accel_dev->hw_device;
+ const u32 *thd_2_arb_cfg;
+ struct arb_info info;
+ u32 ae_thr_map;
+
+ if (ADF_AE_STRAND0_THREAD == thr || ADF_AE_STRAND1_THREAD == thr)
+ thr = ADF_AE_ADMIN_THREAD;
+
+ hw_data->get_arb_info(&info);
+ thd_2_arb_cfg = hw_data->get_arb_mapping(accel_dev);
+ if (!thd_2_arb_cfg)
+ return -EFAULT;
+
+ /* Disable scheduling for this particular AE and thread */
+ ae_thr_map = *(thd_2_arb_cfg + ae);
+ ae_thr_map &= ~(GENMASK(3, 0) << (thr * BIT(2)));
+
+ WRITE_CSR_ARB_WT2SAM(csr, info.arb_offset, info.wt2sam_offset, ae,
+ ae_thr_map);
+ return 0;
+}
diff --git a/drivers/crypto/intel/qat/qat_common/adf_init.c b/drivers/crypto/intel/qat/qat_common/adf_init.c
index f43ae9111553..74f0818c0703 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_init.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_init.c
@@ -433,6 +433,18 @@ int adf_dev_restarted_notify(struct adf_accel_dev *accel_dev)
return 0;
}
+void adf_error_notifier(struct adf_accel_dev *accel_dev)
+{
+ struct service_hndl *service;
+
+ list_for_each_entry(service, &service_table, list) {
+ if (service->event_hld(accel_dev, ADF_EVENT_FATAL_ERROR))
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to send error event to %s.\n",
+ service->name);
+ }
+}
+
static int adf_dev_shutdown_cache_cfg(struct adf_accel_dev *accel_dev)
{
char services[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = {0};
diff --git a/drivers/crypto/intel/qat/qat_common/adf_isr.c b/drivers/crypto/intel/qat/qat_common/adf_isr.c
index 3557a0d6dea2..cae1aee5479a 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_isr.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_isr.c
@@ -139,8 +139,13 @@ static bool adf_handle_ras_int(struct adf_accel_dev *accel_dev)
if (ras_ops->handle_interrupt &&
ras_ops->handle_interrupt(accel_dev, &reset_required)) {
- if (reset_required)
+ if (reset_required) {
dev_err(&GET_DEV(accel_dev), "Fatal error, reset required\n");
+ if (adf_notify_fatal_error(accel_dev))
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to notify fatal error\n");
+ }
+
return true;
}
@@ -272,7 +277,7 @@ static int adf_isr_alloc_msix_vectors_data(struct adf_accel_dev *accel_dev)
if (!accel_dev->pf.vf_info)
msix_num_entries += hw_data->num_banks;
- irqs = kzalloc_node(msix_num_entries * sizeof(*irqs),
+ irqs = kcalloc_node(msix_num_entries, sizeof(*irqs),
GFP_KERNEL, dev_to_node(&GET_DEV(accel_dev)));
if (!irqs)
return -ENOMEM;
@@ -375,8 +380,6 @@ EXPORT_SYMBOL_GPL(adf_isr_resource_alloc);
/**
* adf_init_misc_wq() - Init misc workqueue
*
- * Function init workqueue 'qat_misc_wq' for general purpose.
- *
* Return: 0 on success, error code otherwise.
*/
int __init adf_init_misc_wq(void)
diff --git a/drivers/crypto/intel/qat/qat_common/adf_pfvf_msg.h b/drivers/crypto/intel/qat/qat_common/adf_pfvf_msg.h
index 204a42438992..d1b3ef9cadac 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_pfvf_msg.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_pfvf_msg.h
@@ -99,6 +99,8 @@ enum pf2vf_msgtype {
ADF_PF2VF_MSGTYPE_RESTARTING = 0x01,
ADF_PF2VF_MSGTYPE_VERSION_RESP = 0x02,
ADF_PF2VF_MSGTYPE_BLKMSG_RESP = 0x03,
+ ADF_PF2VF_MSGTYPE_FATAL_ERROR = 0x04,
+ ADF_PF2VF_MSGTYPE_RESTARTED = 0x05,
/* Values from 0x10 are Gen4 specific, message type is only 4 bits in Gen2 devices. */
ADF_PF2VF_MSGTYPE_RP_RESET_RESP = 0x10,
};
@@ -112,6 +114,7 @@ enum vf2pf_msgtype {
ADF_VF2PF_MSGTYPE_LARGE_BLOCK_REQ = 0x07,
ADF_VF2PF_MSGTYPE_MEDIUM_BLOCK_REQ = 0x08,
ADF_VF2PF_MSGTYPE_SMALL_BLOCK_REQ = 0x09,
+ ADF_VF2PF_MSGTYPE_RESTARTING_COMPLETE = 0x0a,
/* Values from 0x10 are Gen4 specific, message type is only 4 bits in Gen2 devices. */
ADF_VF2PF_MSGTYPE_RP_RESET = 0x10,
};
@@ -124,8 +127,10 @@ enum pfvf_compatibility_version {
ADF_PFVF_COMPAT_FAST_ACK = 0x03,
/* Ring to service mapping support for non-standard mappings */
ADF_PFVF_COMPAT_RING_TO_SVC_MAP = 0x04,
+ /* Fallback compat */
+ ADF_PFVF_COMPAT_FALLBACK = 0x05,
/* Reference to the latest version */
- ADF_PFVF_COMPAT_THIS_VERSION = 0x04,
+ ADF_PFVF_COMPAT_THIS_VERSION = 0x05,
};
/* PF->VF Version Response */
diff --git a/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.c b/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.c
index 14c069f0d71a..0e31f4b41844 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.c
@@ -1,21 +1,83 @@
// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
/* Copyright(c) 2015 - 2021 Intel Corporation */
+#include <linux/delay.h>
#include <linux/pci.h>
#include "adf_accel_devices.h"
#include "adf_pfvf_msg.h"
#include "adf_pfvf_pf_msg.h"
#include "adf_pfvf_pf_proto.h"
+#define ADF_PF_WAIT_RESTARTING_COMPLETE_DELAY 100
+#define ADF_VF_SHUTDOWN_RETRY 100
+
void adf_pf2vf_notify_restarting(struct adf_accel_dev *accel_dev)
{
struct adf_accel_vf_info *vf;
struct pfvf_message msg = { .type = ADF_PF2VF_MSGTYPE_RESTARTING };
int i, num_vfs = pci_num_vf(accel_to_pci_dev(accel_dev));
+ dev_dbg(&GET_DEV(accel_dev), "pf2vf notify restarting\n");
for (i = 0, vf = accel_dev->pf.vf_info; i < num_vfs; i++, vf++) {
- if (vf->init && adf_send_pf2vf_msg(accel_dev, i, msg))
+ vf->restarting = false;
+ if (!vf->init)
+ continue;
+ if (adf_send_pf2vf_msg(accel_dev, i, msg))
dev_err(&GET_DEV(accel_dev),
"Failed to send restarting msg to VF%d\n", i);
+ else if (vf->vf_compat_ver >= ADF_PFVF_COMPAT_FALLBACK)
+ vf->restarting = true;
+ }
+}
+
+void adf_pf2vf_wait_for_restarting_complete(struct adf_accel_dev *accel_dev)
+{
+ int num_vfs = pci_num_vf(accel_to_pci_dev(accel_dev));
+ int i, retries = ADF_VF_SHUTDOWN_RETRY;
+ struct adf_accel_vf_info *vf;
+ bool vf_running;
+
+ dev_dbg(&GET_DEV(accel_dev), "pf2vf wait for restarting complete\n");
+ do {
+ vf_running = false;
+ for (i = 0, vf = accel_dev->pf.vf_info; i < num_vfs; i++, vf++)
+ if (vf->restarting)
+ vf_running = true;
+ if (!vf_running)
+ break;
+ msleep(ADF_PF_WAIT_RESTARTING_COMPLETE_DELAY);
+ } while (--retries);
+
+ if (vf_running)
+ dev_warn(&GET_DEV(accel_dev), "Some VFs are still running\n");
+}
+
+void adf_pf2vf_notify_restarted(struct adf_accel_dev *accel_dev)
+{
+ struct pfvf_message msg = { .type = ADF_PF2VF_MSGTYPE_RESTARTED };
+ int i, num_vfs = pci_num_vf(accel_to_pci_dev(accel_dev));
+ struct adf_accel_vf_info *vf;
+
+ dev_dbg(&GET_DEV(accel_dev), "pf2vf notify restarted\n");
+ for (i = 0, vf = accel_dev->pf.vf_info; i < num_vfs; i++, vf++) {
+ if (vf->init && vf->vf_compat_ver >= ADF_PFVF_COMPAT_FALLBACK &&
+ adf_send_pf2vf_msg(accel_dev, i, msg))
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to send restarted msg to VF%d\n", i);
+ }
+}
+
+void adf_pf2vf_notify_fatal_error(struct adf_accel_dev *accel_dev)
+{
+ struct pfvf_message msg = { .type = ADF_PF2VF_MSGTYPE_FATAL_ERROR };
+ int i, num_vfs = pci_num_vf(accel_to_pci_dev(accel_dev));
+ struct adf_accel_vf_info *vf;
+
+ dev_dbg(&GET_DEV(accel_dev), "pf2vf notify fatal error\n");
+ for (i = 0, vf = accel_dev->pf.vf_info; i < num_vfs; i++, vf++) {
+ if (vf->init && vf->vf_compat_ver >= ADF_PFVF_COMPAT_FALLBACK &&
+ adf_send_pf2vf_msg(accel_dev, i, msg))
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to send fatal error msg to VF%d\n", i);
}
}
diff --git a/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.h b/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.h
index e8982d1ac896..f203d88c919c 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.h
@@ -5,7 +5,28 @@
#include "adf_accel_devices.h"
+#if defined(CONFIG_PCI_IOV)
void adf_pf2vf_notify_restarting(struct adf_accel_dev *accel_dev);
+void adf_pf2vf_wait_for_restarting_complete(struct adf_accel_dev *accel_dev);
+void adf_pf2vf_notify_restarted(struct adf_accel_dev *accel_dev);
+void adf_pf2vf_notify_fatal_error(struct adf_accel_dev *accel_dev);
+#else
+static inline void adf_pf2vf_notify_restarting(struct adf_accel_dev *accel_dev)
+{
+}
+
+static inline void adf_pf2vf_wait_for_restarting_complete(struct adf_accel_dev *accel_dev)
+{
+}
+
+static inline void adf_pf2vf_notify_restarted(struct adf_accel_dev *accel_dev)
+{
+}
+
+static inline void adf_pf2vf_notify_fatal_error(struct adf_accel_dev *accel_dev)
+{
+}
+#endif
typedef int (*adf_pf2vf_blkmsg_provider)(struct adf_accel_dev *accel_dev,
u8 *buffer, u8 compat);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c b/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c
index 388e58bcbcaf..9ab93fbfefde 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c
@@ -291,6 +291,14 @@ static int adf_handle_vf2pf_msg(struct adf_accel_dev *accel_dev, u8 vf_nr,
vf_info->init = false;
}
break;
+ case ADF_VF2PF_MSGTYPE_RESTARTING_COMPLETE:
+ {
+ dev_dbg(&GET_DEV(accel_dev),
+ "Restarting Complete received from VF%d\n", vf_nr);
+ vf_info->restarting = false;
+ vf_info->init = false;
+ }
+ break;
case ADF_VF2PF_MSGTYPE_LARGE_BLOCK_REQ:
case ADF_VF2PF_MSGTYPE_MEDIUM_BLOCK_REQ:
case ADF_VF2PF_MSGTYPE_SMALL_BLOCK_REQ:
diff --git a/drivers/crypto/intel/qat/qat_common/adf_pfvf_vf_proto.c b/drivers/crypto/intel/qat/qat_common/adf_pfvf_vf_proto.c
index 1015155b6374..dc284a089c88 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_pfvf_vf_proto.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_pfvf_vf_proto.c
@@ -308,6 +308,12 @@ static bool adf_handle_pf2vf_msg(struct adf_accel_dev *accel_dev,
adf_pf2vf_handle_pf_restarting(accel_dev);
return false;
+ case ADF_PF2VF_MSGTYPE_RESTARTED:
+ dev_dbg(&GET_DEV(accel_dev), "Restarted message received from PF\n");
+ return true;
+ case ADF_PF2VF_MSGTYPE_FATAL_ERROR:
+ dev_err(&GET_DEV(accel_dev), "Fatal error received from PF\n");
+ return true;
case ADF_PF2VF_MSGTYPE_VERSION_RESP:
case ADF_PF2VF_MSGTYPE_BLKMSG_RESP:
case ADF_PF2VF_MSGTYPE_RP_RESET_RESP:
diff --git a/drivers/crypto/intel/qat/qat_common/adf_rl.c b/drivers/crypto/intel/qat/qat_common/adf_rl.c
index de1b214dba1f..d4f2db3c53d8 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_rl.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_rl.c
@@ -788,6 +788,24 @@ static void clear_sla(struct adf_rl *rl_data, struct rl_sla *sla)
sla_type_arr[node_id] = NULL;
}
+static void free_all_sla(struct adf_accel_dev *accel_dev)
+{
+ struct adf_rl *rl_data = accel_dev->rate_limiting;
+ int sla_id;
+
+ mutex_lock(&rl_data->rl_lock);
+
+ for (sla_id = 0; sla_id < RL_NODES_CNT_MAX; sla_id++) {
+ if (!rl_data->sla[sla_id])
+ continue;
+
+ kfree(rl_data->sla[sla_id]);
+ rl_data->sla[sla_id] = NULL;
+ }
+
+ mutex_unlock(&rl_data->rl_lock);
+}
+
/**
* add_update_sla() - handles the creation and the update of an SLA
* @accel_dev: pointer to acceleration device structure
@@ -1155,7 +1173,7 @@ void adf_rl_stop(struct adf_accel_dev *accel_dev)
return;
adf_sysfs_rl_rm(accel_dev);
- adf_rl_remove_sla_all(accel_dev, true);
+ free_all_sla(accel_dev);
}
void adf_rl_exit(struct adf_accel_dev *accel_dev)
diff --git a/drivers/crypto/intel/qat/qat_common/adf_sriov.c b/drivers/crypto/intel/qat/qat_common/adf_sriov.c
index f44025bb6f99..87a70c00c41e 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_sriov.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_sriov.c
@@ -60,7 +60,6 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev)
/* This ptr will be populated when VFs will be created */
vf_info->accel_dev = accel_dev;
vf_info->vf_nr = i;
- vf_info->vf_compat_ver = 0;
mutex_init(&vf_info->pf2vf_lock);
ratelimit_state_init(&vf_info->vf2pf_ratelimit,
@@ -84,6 +83,32 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev)
return pci_enable_sriov(pdev, totalvfs);
}
+void adf_reenable_sriov(struct adf_accel_dev *accel_dev)
+{
+ struct pci_dev *pdev = accel_to_pci_dev(accel_dev);
+ char cfg[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = {0};
+ unsigned long val = 0;
+
+ if (adf_cfg_get_param_value(accel_dev, ADF_GENERAL_SEC,
+ ADF_SRIOV_ENABLED, cfg))
+ return;
+
+ if (!accel_dev->pf.vf_info)
+ return;
+
+ if (adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_CY,
+ &val, ADF_DEC))
+ return;
+
+ if (adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_DC,
+ &val, ADF_DEC))
+ return;
+
+ set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status);
+ dev_dbg(&pdev->dev, "Re-enabling SRIOV\n");
+ adf_enable_sriov(accel_dev);
+}
+
/**
* adf_disable_sriov() - Disable SRIOV for the device
* @accel_dev: Pointer to accel device.
@@ -103,6 +128,7 @@ void adf_disable_sriov(struct adf_accel_dev *accel_dev)
return;
adf_pf2vf_notify_restarting(accel_dev);
+ adf_pf2vf_wait_for_restarting_complete(accel_dev);
pci_disable_sriov(accel_to_pci_dev(accel_dev));
/* Disable VF to PF interrupts */
@@ -115,8 +141,10 @@ void adf_disable_sriov(struct adf_accel_dev *accel_dev)
for (i = 0, vf = accel_dev->pf.vf_info; i < totalvfs; i++, vf++)
mutex_destroy(&vf->pf2vf_lock);
- kfree(accel_dev->pf.vf_info);
- accel_dev->pf.vf_info = NULL;
+ if (!test_bit(ADF_STATUS_RESTARTING, &accel_dev->status)) {
+ kfree(accel_dev->pf.vf_info);
+ accel_dev->pf.vf_info = NULL;
+ }
}
EXPORT_SYMBOL_GPL(adf_disable_sriov);
@@ -194,6 +222,10 @@ int adf_sriov_configure(struct pci_dev *pdev, int numvfs)
if (ret)
return ret;
+ val = 1;
+ adf_cfg_add_key_value_param(accel_dev, ADF_GENERAL_SEC, ADF_SRIOV_ENABLED,
+ &val, ADF_DEC);
+
return numvfs;
}
EXPORT_SYMBOL_GPL(adf_sriov_configure);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_sysfs.c b/drivers/crypto/intel/qat/qat_common/adf_sysfs.c
index d450dad32c9e..4e7f70d4049d 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_sysfs.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_sysfs.c
@@ -204,6 +204,42 @@ static ssize_t pm_idle_enabled_store(struct device *dev, struct device_attribute
}
static DEVICE_ATTR_RW(pm_idle_enabled);
+static ssize_t auto_reset_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ char *auto_reset;
+ struct adf_accel_dev *accel_dev;
+
+ accel_dev = adf_devmgr_pci_to_accel_dev(to_pci_dev(dev));
+ if (!accel_dev)
+ return -EINVAL;
+
+ auto_reset = accel_dev->autoreset_on_error ? "on" : "off";
+
+ return sysfs_emit(buf, "%s\n", auto_reset);
+}
+
+static ssize_t auto_reset_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct adf_accel_dev *accel_dev;
+ bool enabled = false;
+ int ret;
+
+ ret = kstrtobool(buf, &enabled);
+ if (ret)
+ return ret;
+
+ accel_dev = adf_devmgr_pci_to_accel_dev(to_pci_dev(dev));
+ if (!accel_dev)
+ return -EINVAL;
+
+ accel_dev->autoreset_on_error = enabled;
+
+ return count;
+}
+static DEVICE_ATTR_RW(auto_reset);
+
static DEVICE_ATTR_RW(state);
static DEVICE_ATTR_RW(cfg_services);
@@ -291,6 +327,7 @@ static struct attribute *qat_attrs[] = {
&dev_attr_pm_idle_enabled.attr,
&dev_attr_rp2srv.attr,
&dev_attr_num_rps.attr,
+ &dev_attr_auto_reset.attr,
NULL,
};
diff --git a/drivers/crypto/intel/qat/qat_common/adf_vf_isr.c b/drivers/crypto/intel/qat/qat_common/adf_vf_isr.c
index b05c3957a160..cdbb2d687b1b 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_vf_isr.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_vf_isr.c
@@ -293,8 +293,6 @@ EXPORT_SYMBOL_GPL(adf_flush_vf_wq);
/**
* adf_init_vf_wq() - Init workqueue for VF
*
- * Function init workqueue 'adf_vf_stop_wq' for VF.
- *
* Return: 0 on success, error code otherwise.
*/
int __init adf_init_vf_wq(void)
diff --git a/drivers/crypto/intel/qat/qat_common/qat_comp_algs.c b/drivers/crypto/intel/qat/qat_common/qat_comp_algs.c
index bf8c0ee62917..2ba4aa22e092 100644
--- a/drivers/crypto/intel/qat/qat_common/qat_comp_algs.c
+++ b/drivers/crypto/intel/qat/qat_common/qat_comp_algs.c
@@ -13,15 +13,6 @@
#include "qat_compression.h"
#include "qat_algs_send.h"
-#define QAT_RFC_1950_HDR_SIZE 2
-#define QAT_RFC_1950_FOOTER_SIZE 4
-#define QAT_RFC_1950_CM_DEFLATE 8
-#define QAT_RFC_1950_CM_DEFLATE_CINFO_32K 7
-#define QAT_RFC_1950_CM_MASK 0x0f
-#define QAT_RFC_1950_CM_OFFSET 4
-#define QAT_RFC_1950_DICT_MASK 0x20
-#define QAT_RFC_1950_COMP_HDR 0x785e
-
static DEFINE_MUTEX(algs_lock);
static unsigned int active_devs;
diff --git a/drivers/crypto/intel/qat/qat_common/qat_crypto.c b/drivers/crypto/intel/qat/qat_common/qat_crypto.c
index 40c8e74d1cf9..101c6ea41673 100644
--- a/drivers/crypto/intel/qat/qat_common/qat_crypto.c
+++ b/drivers/crypto/intel/qat/qat_common/qat_crypto.c
@@ -105,8 +105,8 @@ struct qat_crypto_instance *qat_crypto_get_instance_node(int node)
}
/**
- * qat_crypto_vf_dev_config()
- * create dev config required to create crypto inst.
+ * qat_crypto_vf_dev_config() - create dev config required to create
+ * crypto inst.
*
* @accel_dev: Pointer to acceleration device.
*
diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c
index 70edf40bc523..f74b3c81ba6d 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.c
+++ b/drivers/crypto/rockchip/rk3288_crypto.c
@@ -371,6 +371,11 @@ static int rk_crypto_probe(struct platform_device *pdev)
}
crypto_info->engine = crypto_engine_alloc_init(&pdev->dev, true);
+ if (!crypto_info->engine) {
+ err = -ENOMEM;
+ goto err_crypto;
+ }
+
crypto_engine_start(crypto_info->engine);
init_completion(&crypto_info->complete);
diff --git a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c
index de53eddf6796..cb92b7fa99c6 100644
--- a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c
+++ b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c
@@ -225,11 +225,11 @@ static int __virtio_crypto_akcipher_do_req(struct virtio_crypto_akcipher_request
struct virtio_crypto *vcrypto = ctx->vcrypto;
struct virtio_crypto_op_data_req *req_data = vc_req->req_data;
struct scatterlist *sgs[4], outhdr_sg, inhdr_sg, srcdata_sg, dstdata_sg;
- void *src_buf = NULL, *dst_buf = NULL;
+ void *src_buf, *dst_buf = NULL;
unsigned int num_out = 0, num_in = 0;
int node = dev_to_node(&vcrypto->vdev->dev);
unsigned long flags;
- int ret = -ENOMEM;
+ int ret;
bool verify = vc_akcipher_req->opcode == VIRTIO_CRYPTO_AKCIPHER_VERIFY;
unsigned int src_len = verify ? req->src_len + req->dst_len : req->src_len;
@@ -240,7 +240,7 @@ static int __virtio_crypto_akcipher_do_req(struct virtio_crypto_akcipher_request
/* src data */
src_buf = kcalloc_node(src_len, 1, GFP_KERNEL, node);
if (!src_buf)
- goto err;
+ return -ENOMEM;
if (verify) {
/* for verify operation, both src and dst data work as OUT direction */
@@ -255,7 +255,7 @@ static int __virtio_crypto_akcipher_do_req(struct virtio_crypto_akcipher_request
/* dst data */
dst_buf = kcalloc_node(req->dst_len, 1, GFP_KERNEL, node);
if (!dst_buf)
- goto err;
+ goto free_src;
sg_init_one(&dstdata_sg, dst_buf, req->dst_len);
sgs[num_out + num_in++] = &dstdata_sg;
@@ -278,9 +278,9 @@ static int __virtio_crypto_akcipher_do_req(struct virtio_crypto_akcipher_request
return 0;
err:
- kfree(src_buf);
kfree(dst_buf);
-
+free_src:
+ kfree(src_buf);
return -ENOMEM;
}
diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c
index b909c6a2bf1c..6a67d70e7f1c 100644
--- a/drivers/crypto/virtio/virtio_crypto_core.c
+++ b/drivers/crypto/virtio/virtio_crypto_core.c
@@ -42,8 +42,6 @@ static void virtcrypto_ctrlq_callback(struct virtqueue *vq)
virtio_crypto_ctrlq_callback(vc_ctrl_req);
spin_lock_irqsave(&vcrypto->ctrl_lock, flags);
}
- if (unlikely(virtqueue_is_broken(vq)))
- break;
} while (!virtqueue_enable_cb(vq));
spin_unlock_irqrestore(&vcrypto->ctrl_lock, flags);
}
diff --git a/drivers/crypto/vmx/.gitignore b/drivers/crypto/vmx/.gitignore
deleted file mode 100644
index 7aa71d83f739..000000000000
--- a/drivers/crypto/vmx/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-aesp8-ppc.S
-ghashp8-ppc.S
diff --git a/drivers/crypto/vmx/Kconfig b/drivers/crypto/vmx/Kconfig
deleted file mode 100644
index b2c28b87f14b..000000000000
--- a/drivers/crypto/vmx/Kconfig
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-config CRYPTO_DEV_VMX_ENCRYPT
- tristate "Encryption acceleration support on P8 CPU"
- depends on CRYPTO_DEV_VMX
- select CRYPTO_AES
- select CRYPTO_CBC
- select CRYPTO_CTR
- select CRYPTO_GHASH
- select CRYPTO_XTS
- default m
- help
- Support for VMX cryptographic acceleration instructions on Power8 CPU.
- This module supports acceleration for AES and GHASH in hardware. If you
- choose 'M' here, this module will be called vmx-crypto.
diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile
deleted file mode 100644
index 7257b8c44626..000000000000
--- a/drivers/crypto/vmx/Makefile
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o
-vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes_xts.o ghash.o
-
-ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
-override flavour := linux-ppc64le
-else
-ifdef CONFIG_PPC64_ELF_ABI_V2
-override flavour := linux-ppc64-elfv2
-else
-override flavour := linux-ppc64
-endif
-endif
-
-quiet_cmd_perl = PERL $@
- cmd_perl = $(PERL) $< $(flavour) > $@
-
-targets += aesp8-ppc.S ghashp8-ppc.S
-
-$(obj)/aesp8-ppc.S $(obj)/ghashp8-ppc.S: $(obj)/%.S: $(src)/%.pl FORCE
- $(call if_changed,perl)
-
-OBJECT_FILES_NON_STANDARD_aesp8-ppc.o := y
diff --git a/drivers/crypto/vmx/aes.c b/drivers/crypto/vmx/aes.c
deleted file mode 100644
index ec06189fbf99..000000000000
--- a/drivers/crypto/vmx/aes.c
+++ /dev/null
@@ -1,134 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * AES routines supporting VMX instructions on the Power 8
- *
- * Copyright (C) 2015 International Business Machines Inc.
- *
- * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com>
- */
-
-#include <linux/types.h>
-#include <linux/err.h>
-#include <linux/crypto.h>
-#include <linux/delay.h>
-#include <asm/simd.h>
-#include <asm/switch_to.h>
-#include <crypto/aes.h>
-#include <crypto/internal/cipher.h>
-#include <crypto/internal/simd.h>
-
-#include "aesp8-ppc.h"
-
-struct p8_aes_ctx {
- struct crypto_cipher *fallback;
- struct aes_key enc_key;
- struct aes_key dec_key;
-};
-
-static int p8_aes_init(struct crypto_tfm *tfm)
-{
- const char *alg = crypto_tfm_alg_name(tfm);
- struct crypto_cipher *fallback;
- struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-
- fallback = crypto_alloc_cipher(alg, 0, CRYPTO_ALG_NEED_FALLBACK);
- if (IS_ERR(fallback)) {
- printk(KERN_ERR
- "Failed to allocate transformation for '%s': %ld\n",
- alg, PTR_ERR(fallback));
- return PTR_ERR(fallback);
- }
-
- crypto_cipher_set_flags(fallback,
- crypto_cipher_get_flags((struct
- crypto_cipher *)
- tfm));
- ctx->fallback = fallback;
-
- return 0;
-}
-
-static void p8_aes_exit(struct crypto_tfm *tfm)
-{
- struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-
- if (ctx->fallback) {
- crypto_free_cipher(ctx->fallback);
- ctx->fallback = NULL;
- }
-}
-
-static int p8_aes_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen)
-{
- int ret;
- struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
- ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
- ret |= aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key);
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
-
- ret |= crypto_cipher_setkey(ctx->fallback, key, keylen);
-
- return ret ? -EINVAL : 0;
-}
-
-static void p8_aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
- struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-
- if (!crypto_simd_usable()) {
- crypto_cipher_encrypt_one(ctx->fallback, dst, src);
- } else {
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
- aes_p8_encrypt(src, dst, &ctx->enc_key);
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
- }
-}
-
-static void p8_aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
- struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-
- if (!crypto_simd_usable()) {
- crypto_cipher_decrypt_one(ctx->fallback, dst, src);
- } else {
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
- aes_p8_decrypt(src, dst, &ctx->dec_key);
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
- }
-}
-
-struct crypto_alg p8_aes_alg = {
- .cra_name = "aes",
- .cra_driver_name = "p8_aes",
- .cra_module = THIS_MODULE,
- .cra_priority = 1000,
- .cra_type = NULL,
- .cra_flags = CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_NEED_FALLBACK,
- .cra_alignmask = 0,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct p8_aes_ctx),
- .cra_init = p8_aes_init,
- .cra_exit = p8_aes_exit,
- .cra_cipher = {
- .cia_min_keysize = AES_MIN_KEY_SIZE,
- .cia_max_keysize = AES_MAX_KEY_SIZE,
- .cia_setkey = p8_aes_setkey,
- .cia_encrypt = p8_aes_encrypt,
- .cia_decrypt = p8_aes_decrypt,
- },
-};
diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c
deleted file mode 100644
index ed0debc7acb5..000000000000
--- a/drivers/crypto/vmx/aes_cbc.c
+++ /dev/null
@@ -1,133 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * AES CBC routines supporting VMX instructions on the Power 8
- *
- * Copyright (C) 2015 International Business Machines Inc.
- *
- * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com>
- */
-
-#include <asm/simd.h>
-#include <asm/switch_to.h>
-#include <crypto/aes.h>
-#include <crypto/internal/simd.h>
-#include <crypto/internal/skcipher.h>
-
-#include "aesp8-ppc.h"
-
-struct p8_aes_cbc_ctx {
- struct crypto_skcipher *fallback;
- struct aes_key enc_key;
- struct aes_key dec_key;
-};
-
-static int p8_aes_cbc_init(struct crypto_skcipher *tfm)
-{
- struct p8_aes_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct crypto_skcipher *fallback;
-
- fallback = crypto_alloc_skcipher("cbc(aes)", 0,
- CRYPTO_ALG_NEED_FALLBACK |
- CRYPTO_ALG_ASYNC);
- if (IS_ERR(fallback)) {
- pr_err("Failed to allocate cbc(aes) fallback: %ld\n",
- PTR_ERR(fallback));
- return PTR_ERR(fallback);
- }
-
- crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) +
- crypto_skcipher_reqsize(fallback));
- ctx->fallback = fallback;
- return 0;
-}
-
-static void p8_aes_cbc_exit(struct crypto_skcipher *tfm)
-{
- struct p8_aes_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- crypto_free_skcipher(ctx->fallback);
-}
-
-static int p8_aes_cbc_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct p8_aes_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
- int ret;
-
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
- ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
- ret |= aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key);
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
-
- ret |= crypto_skcipher_setkey(ctx->fallback, key, keylen);
-
- return ret ? -EINVAL : 0;
-}
-
-static int p8_aes_cbc_crypt(struct skcipher_request *req, int enc)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- const struct p8_aes_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- unsigned int nbytes;
- int ret;
-
- if (!crypto_simd_usable()) {
- struct skcipher_request *subreq = skcipher_request_ctx(req);
-
- *subreq = *req;
- skcipher_request_set_tfm(subreq, ctx->fallback);
- return enc ? crypto_skcipher_encrypt(subreq) :
- crypto_skcipher_decrypt(subreq);
- }
-
- ret = skcipher_walk_virt(&walk, req, false);
- while ((nbytes = walk.nbytes) != 0) {
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
- aes_p8_cbc_encrypt(walk.src.virt.addr,
- walk.dst.virt.addr,
- round_down(nbytes, AES_BLOCK_SIZE),
- enc ? &ctx->enc_key : &ctx->dec_key,
- walk.iv, enc);
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
-
- ret = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE);
- }
- return ret;
-}
-
-static int p8_aes_cbc_encrypt(struct skcipher_request *req)
-{
- return p8_aes_cbc_crypt(req, 1);
-}
-
-static int p8_aes_cbc_decrypt(struct skcipher_request *req)
-{
- return p8_aes_cbc_crypt(req, 0);
-}
-
-struct skcipher_alg p8_aes_cbc_alg = {
- .base.cra_name = "cbc(aes)",
- .base.cra_driver_name = "p8_aes_cbc",
- .base.cra_module = THIS_MODULE,
- .base.cra_priority = 2000,
- .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
- .base.cra_blocksize = AES_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct p8_aes_cbc_ctx),
- .setkey = p8_aes_cbc_setkey,
- .encrypt = p8_aes_cbc_encrypt,
- .decrypt = p8_aes_cbc_decrypt,
- .init = p8_aes_cbc_init,
- .exit = p8_aes_cbc_exit,
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
-};
diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c
deleted file mode 100644
index 9a3da8cd62f3..000000000000
--- a/drivers/crypto/vmx/aes_ctr.c
+++ /dev/null
@@ -1,149 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * AES CTR routines supporting VMX instructions on the Power 8
- *
- * Copyright (C) 2015 International Business Machines Inc.
- *
- * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com>
- */
-
-#include <asm/simd.h>
-#include <asm/switch_to.h>
-#include <crypto/aes.h>
-#include <crypto/internal/simd.h>
-#include <crypto/internal/skcipher.h>
-
-#include "aesp8-ppc.h"
-
-struct p8_aes_ctr_ctx {
- struct crypto_skcipher *fallback;
- struct aes_key enc_key;
-};
-
-static int p8_aes_ctr_init(struct crypto_skcipher *tfm)
-{
- struct p8_aes_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct crypto_skcipher *fallback;
-
- fallback = crypto_alloc_skcipher("ctr(aes)", 0,
- CRYPTO_ALG_NEED_FALLBACK |
- CRYPTO_ALG_ASYNC);
- if (IS_ERR(fallback)) {
- pr_err("Failed to allocate ctr(aes) fallback: %ld\n",
- PTR_ERR(fallback));
- return PTR_ERR(fallback);
- }
-
- crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) +
- crypto_skcipher_reqsize(fallback));
- ctx->fallback = fallback;
- return 0;
-}
-
-static void p8_aes_ctr_exit(struct crypto_skcipher *tfm)
-{
- struct p8_aes_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- crypto_free_skcipher(ctx->fallback);
-}
-
-static int p8_aes_ctr_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct p8_aes_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
- int ret;
-
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
- ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
-
- ret |= crypto_skcipher_setkey(ctx->fallback, key, keylen);
-
- return ret ? -EINVAL : 0;
-}
-
-static void p8_aes_ctr_final(const struct p8_aes_ctr_ctx *ctx,
- struct skcipher_walk *walk)
-{
- u8 *ctrblk = walk->iv;
- u8 keystream[AES_BLOCK_SIZE];
- u8 *src = walk->src.virt.addr;
- u8 *dst = walk->dst.virt.addr;
- unsigned int nbytes = walk->nbytes;
-
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
- aes_p8_encrypt(ctrblk, keystream, &ctx->enc_key);
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
-
- crypto_xor_cpy(dst, keystream, src, nbytes);
- crypto_inc(ctrblk, AES_BLOCK_SIZE);
-}
-
-static int p8_aes_ctr_crypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- const struct p8_aes_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- unsigned int nbytes;
- int ret;
-
- if (!crypto_simd_usable()) {
- struct skcipher_request *subreq = skcipher_request_ctx(req);
-
- *subreq = *req;
- skcipher_request_set_tfm(subreq, ctx->fallback);
- return crypto_skcipher_encrypt(subreq);
- }
-
- ret = skcipher_walk_virt(&walk, req, false);
- while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
- aes_p8_ctr32_encrypt_blocks(walk.src.virt.addr,
- walk.dst.virt.addr,
- nbytes / AES_BLOCK_SIZE,
- &ctx->enc_key, walk.iv);
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
-
- do {
- crypto_inc(walk.iv, AES_BLOCK_SIZE);
- } while ((nbytes -= AES_BLOCK_SIZE) >= AES_BLOCK_SIZE);
-
- ret = skcipher_walk_done(&walk, nbytes);
- }
- if (nbytes) {
- p8_aes_ctr_final(ctx, &walk);
- ret = skcipher_walk_done(&walk, 0);
- }
- return ret;
-}
-
-struct skcipher_alg p8_aes_ctr_alg = {
- .base.cra_name = "ctr(aes)",
- .base.cra_driver_name = "p8_aes_ctr",
- .base.cra_module = THIS_MODULE,
- .base.cra_priority = 2000,
- .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct p8_aes_ctr_ctx),
- .setkey = p8_aes_ctr_setkey,
- .encrypt = p8_aes_ctr_crypt,
- .decrypt = p8_aes_ctr_crypt,
- .init = p8_aes_ctr_init,
- .exit = p8_aes_ctr_exit,
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .chunksize = AES_BLOCK_SIZE,
-};
diff --git a/drivers/crypto/vmx/aes_xts.c b/drivers/crypto/vmx/aes_xts.c
deleted file mode 100644
index dabbccb41550..000000000000
--- a/drivers/crypto/vmx/aes_xts.c
+++ /dev/null
@@ -1,162 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * AES XTS routines supporting VMX In-core instructions on Power 8
- *
- * Copyright (C) 2015 International Business Machines Inc.
- *
- * Author: Leonidas S. Barbosa <leosilva@linux.vnet.ibm.com>
- */
-
-#include <asm/simd.h>
-#include <asm/switch_to.h>
-#include <crypto/aes.h>
-#include <crypto/internal/simd.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/xts.h>
-
-#include "aesp8-ppc.h"
-
-struct p8_aes_xts_ctx {
- struct crypto_skcipher *fallback;
- struct aes_key enc_key;
- struct aes_key dec_key;
- struct aes_key tweak_key;
-};
-
-static int p8_aes_xts_init(struct crypto_skcipher *tfm)
-{
- struct p8_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct crypto_skcipher *fallback;
-
- fallback = crypto_alloc_skcipher("xts(aes)", 0,
- CRYPTO_ALG_NEED_FALLBACK |
- CRYPTO_ALG_ASYNC);
- if (IS_ERR(fallback)) {
- pr_err("Failed to allocate xts(aes) fallback: %ld\n",
- PTR_ERR(fallback));
- return PTR_ERR(fallback);
- }
-
- crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) +
- crypto_skcipher_reqsize(fallback));
- ctx->fallback = fallback;
- return 0;
-}
-
-static void p8_aes_xts_exit(struct crypto_skcipher *tfm)
-{
- struct p8_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- crypto_free_skcipher(ctx->fallback);
-}
-
-static int p8_aes_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct p8_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
- int ret;
-
- ret = xts_verify_key(tfm, key, keylen);
- if (ret)
- return ret;
-
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
- ret = aes_p8_set_encrypt_key(key + keylen/2, (keylen/2) * 8, &ctx->tweak_key);
- ret |= aes_p8_set_encrypt_key(key, (keylen/2) * 8, &ctx->enc_key);
- ret |= aes_p8_set_decrypt_key(key, (keylen/2) * 8, &ctx->dec_key);
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
-
- ret |= crypto_skcipher_setkey(ctx->fallback, key, keylen);
-
- return ret ? -EINVAL : 0;
-}
-
-static int p8_aes_xts_crypt(struct skcipher_request *req, int enc)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- const struct p8_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- unsigned int nbytes;
- u8 tweak[AES_BLOCK_SIZE];
- int ret;
-
- if (req->cryptlen < AES_BLOCK_SIZE)
- return -EINVAL;
-
- if (!crypto_simd_usable() || (req->cryptlen % XTS_BLOCK_SIZE) != 0) {
- struct skcipher_request *subreq = skcipher_request_ctx(req);
-
- *subreq = *req;
- skcipher_request_set_tfm(subreq, ctx->fallback);
- return enc ? crypto_skcipher_encrypt(subreq) :
- crypto_skcipher_decrypt(subreq);
- }
-
- ret = skcipher_walk_virt(&walk, req, false);
- if (ret)
- return ret;
-
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
-
- aes_p8_encrypt(walk.iv, tweak, &ctx->tweak_key);
-
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
-
- while ((nbytes = walk.nbytes) != 0) {
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
- if (enc)
- aes_p8_xts_encrypt(walk.src.virt.addr,
- walk.dst.virt.addr,
- round_down(nbytes, AES_BLOCK_SIZE),
- &ctx->enc_key, NULL, tweak);
- else
- aes_p8_xts_decrypt(walk.src.virt.addr,
- walk.dst.virt.addr,
- round_down(nbytes, AES_BLOCK_SIZE),
- &ctx->dec_key, NULL, tweak);
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
-
- ret = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE);
- }
- return ret;
-}
-
-static int p8_aes_xts_encrypt(struct skcipher_request *req)
-{
- return p8_aes_xts_crypt(req, 1);
-}
-
-static int p8_aes_xts_decrypt(struct skcipher_request *req)
-{
- return p8_aes_xts_crypt(req, 0);
-}
-
-struct skcipher_alg p8_aes_xts_alg = {
- .base.cra_name = "xts(aes)",
- .base.cra_driver_name = "p8_aes_xts",
- .base.cra_module = THIS_MODULE,
- .base.cra_priority = 2000,
- .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
- .base.cra_blocksize = AES_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct p8_aes_xts_ctx),
- .setkey = p8_aes_xts_setkey,
- .encrypt = p8_aes_xts_encrypt,
- .decrypt = p8_aes_xts_decrypt,
- .init = p8_aes_xts_init,
- .exit = p8_aes_xts_exit,
- .min_keysize = 2 * AES_MIN_KEY_SIZE,
- .max_keysize = 2 * AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
-};
diff --git a/drivers/crypto/vmx/aesp8-ppc.h b/drivers/crypto/vmx/aesp8-ppc.h
deleted file mode 100644
index 5764d4438388..000000000000
--- a/drivers/crypto/vmx/aesp8-ppc.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/types.h>
-#include <crypto/aes.h>
-
-struct aes_key {
- u8 key[AES_MAX_KEYLENGTH];
- int rounds;
-};
-
-extern struct shash_alg p8_ghash_alg;
-extern struct crypto_alg p8_aes_alg;
-extern struct skcipher_alg p8_aes_cbc_alg;
-extern struct skcipher_alg p8_aes_ctr_alg;
-extern struct skcipher_alg p8_aes_xts_alg;
-
-int aes_p8_set_encrypt_key(const u8 *userKey, const int bits,
- struct aes_key *key);
-int aes_p8_set_decrypt_key(const u8 *userKey, const int bits,
- struct aes_key *key);
-void aes_p8_encrypt(const u8 *in, u8 *out, const struct aes_key *key);
-void aes_p8_decrypt(const u8 *in, u8 *out, const struct aes_key *key);
-void aes_p8_cbc_encrypt(const u8 *in, u8 *out, size_t len,
- const struct aes_key *key, u8 *iv, const int enc);
-void aes_p8_ctr32_encrypt_blocks(const u8 *in, u8 *out,
- size_t len, const struct aes_key *key,
- const u8 *iv);
-void aes_p8_xts_encrypt(const u8 *in, u8 *out, size_t len,
- const struct aes_key *key1, const struct aes_key *key2, u8 *iv);
-void aes_p8_xts_decrypt(const u8 *in, u8 *out, size_t len,
- const struct aes_key *key1, const struct aes_key *key2, u8 *iv);
diff --git a/drivers/crypto/vmx/aesp8-ppc.pl b/drivers/crypto/vmx/aesp8-ppc.pl
deleted file mode 100644
index f729589d792e..000000000000
--- a/drivers/crypto/vmx/aesp8-ppc.pl
+++ /dev/null
@@ -1,3889 +0,0 @@
-#! /usr/bin/env perl
-# SPDX-License-Identifier: GPL-2.0
-
-# This code is taken from CRYPTOGAMs[1] and is included here using the option
-# in the license to distribute the code under the GPL. Therefore this program
-# is free software; you can redistribute it and/or modify it under the terms of
-# the GNU General Public License version 2 as published by the Free Software
-# Foundation.
-#
-# [1] https://www.openssl.org/~appro/cryptogams/
-
-# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# * Redistributions of source code must retain copyright notices,
-# this list of conditions and the following disclaimer.
-#
-# * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following
-# disclaimer in the documentation and/or other materials
-# provided with the distribution.
-#
-# * Neither the name of the CRYPTOGAMS nor the names of its
-# copyright holder and contributors may be used to endorse or
-# promote products derived from this software without specific
-# prior written permission.
-#
-# ALTERNATIVELY, provided that this notice is retained in full, this
-# product may be distributed under the terms of the GNU General Public
-# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
-# those given above.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-# ====================================================================
-# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see https://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-#
-# This module implements support for AES instructions as per PowerISA
-# specification version 2.07, first implemented by POWER8 processor.
-# The module is endian-agnostic in sense that it supports both big-
-# and little-endian cases. Data alignment in parallelizable modes is
-# handled with VSX loads and stores, which implies MSR.VSX flag being
-# set. It should also be noted that ISA specification doesn't prohibit
-# alignment exceptions for these instructions on page boundaries.
-# Initially alignment was handled in pure AltiVec/VMX way [when data
-# is aligned programmatically, which in turn guarantees exception-
-# free execution], but it turned to hamper performance when vcipher
-# instructions are interleaved. It's reckoned that eventual
-# misalignment penalties at page boundaries are in average lower
-# than additional overhead in pure AltiVec approach.
-#
-# May 2016
-#
-# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
-# systems were measured.
-#
-######################################################################
-# Current large-block performance in cycles per byte processed with
-# 128-bit key (less is better).
-#
-# CBC en-/decrypt CTR XTS
-# POWER8[le] 3.96/0.72 0.74 1.1
-# POWER8[be] 3.75/0.65 0.66 1.0
-
-$flavour = shift;
-
-if ($flavour =~ /64/) {
- $SIZE_T =8;
- $LRSAVE =2*$SIZE_T;
- $STU ="stdu";
- $POP ="ld";
- $PUSH ="std";
- $UCMP ="cmpld";
- $SHL ="sldi";
-} elsif ($flavour =~ /32/) {
- $SIZE_T =4;
- $LRSAVE =$SIZE_T;
- $STU ="stwu";
- $POP ="lwz";
- $PUSH ="stw";
- $UCMP ="cmplw";
- $SHL ="slwi";
-} else { die "nonsense $flavour"; }
-
-$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
-die "can't locate ppc-xlate.pl";
-
-open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
-
-$FRAME=8*$SIZE_T;
-$prefix="aes_p8";
-
-$sp="r1";
-$vrsave="r12";
-
-#########################################################################
-{{{ # Key setup procedures #
-my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
-my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
-my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
-
-$code.=<<___;
-.machine "any"
-
-.text
-
-.align 7
-rcon:
-.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
-.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
-.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
-.long 0,0,0,0 ?asis
-.long 0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe
-Lconsts:
- mflr r0
- bcl 20,31,\$+4
- mflr $ptr #vvvvv "distance between . and rcon
- addi $ptr,$ptr,-0x58
- mtlr r0
- blr
- .long 0
- .byte 0,12,0x14,0,0,0,0,0
-.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
-
-.globl .${prefix}_set_encrypt_key
-Lset_encrypt_key:
- mflr r11
- $PUSH r11,$LRSAVE($sp)
-
- li $ptr,-1
- ${UCMP}i $inp,0
- beq- Lenc_key_abort # if ($inp==0) return -1;
- ${UCMP}i $out,0
- beq- Lenc_key_abort # if ($out==0) return -1;
- li $ptr,-2
- cmpwi $bits,128
- blt- Lenc_key_abort
- cmpwi $bits,256
- bgt- Lenc_key_abort
- andi. r0,$bits,0x3f
- bne- Lenc_key_abort
-
- lis r0,0xfff0
- mfspr $vrsave,256
- mtspr 256,r0
-
- bl Lconsts
- mtlr r11
-
- neg r9,$inp
- lvx $in0,0,$inp
- addi $inp,$inp,15 # 15 is not typo
- lvsr $key,0,r9 # borrow $key
- li r8,0x20
- cmpwi $bits,192
- lvx $in1,0,$inp
- le?vspltisb $mask,0x0f # borrow $mask
- lvx $rcon,0,$ptr
- le?vxor $key,$key,$mask # adjust for byte swap
- lvx $mask,r8,$ptr
- addi $ptr,$ptr,0x10
- vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
- li $cnt,8
- vxor $zero,$zero,$zero
- mtctr $cnt
-
- ?lvsr $outperm,0,$out
- vspltisb $outmask,-1
- lvx $outhead,0,$out
- ?vperm $outmask,$zero,$outmask,$outperm
-
- blt Loop128
- addi $inp,$inp,8
- beq L192
- addi $inp,$inp,8
- b L256
-
-.align 4
-Loop128:
- vperm $key,$in0,$in0,$mask # rotate-n-splat
- vsldoi $tmp,$zero,$in0,12 # >>32
- vperm $outtail,$in0,$in0,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- vcipherlast $key,$key,$rcon
- stvx $stage,0,$out
- addi $out,$out,16
-
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
- vadduwm $rcon,$rcon,$rcon
- vxor $in0,$in0,$key
- bdnz Loop128
-
- lvx $rcon,0,$ptr # last two round keys
-
- vperm $key,$in0,$in0,$mask # rotate-n-splat
- vsldoi $tmp,$zero,$in0,12 # >>32
- vperm $outtail,$in0,$in0,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- vcipherlast $key,$key,$rcon
- stvx $stage,0,$out
- addi $out,$out,16
-
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
- vadduwm $rcon,$rcon,$rcon
- vxor $in0,$in0,$key
-
- vperm $key,$in0,$in0,$mask # rotate-n-splat
- vsldoi $tmp,$zero,$in0,12 # >>32
- vperm $outtail,$in0,$in0,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- vcipherlast $key,$key,$rcon
- stvx $stage,0,$out
- addi $out,$out,16
-
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
- vxor $in0,$in0,$key
- vperm $outtail,$in0,$in0,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- stvx $stage,0,$out
-
- addi $inp,$out,15 # 15 is not typo
- addi $out,$out,0x50
-
- li $rounds,10
- b Ldone
-
-.align 4
-L192:
- lvx $tmp,0,$inp
- li $cnt,4
- vperm $outtail,$in0,$in0,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- stvx $stage,0,$out
- addi $out,$out,16
- vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
- vspltisb $key,8 # borrow $key
- mtctr $cnt
- vsububm $mask,$mask,$key # adjust the mask
-
-Loop192:
- vperm $key,$in1,$in1,$mask # roate-n-splat
- vsldoi $tmp,$zero,$in0,12 # >>32
- vcipherlast $key,$key,$rcon
-
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
-
- vsldoi $stage,$zero,$in1,8
- vspltw $tmp,$in0,3
- vxor $tmp,$tmp,$in1
- vsldoi $in1,$zero,$in1,12 # >>32
- vadduwm $rcon,$rcon,$rcon
- vxor $in1,$in1,$tmp
- vxor $in0,$in0,$key
- vxor $in1,$in1,$key
- vsldoi $stage,$stage,$in0,8
-
- vperm $key,$in1,$in1,$mask # rotate-n-splat
- vsldoi $tmp,$zero,$in0,12 # >>32
- vperm $outtail,$stage,$stage,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- vcipherlast $key,$key,$rcon
- stvx $stage,0,$out
- addi $out,$out,16
-
- vsldoi $stage,$in0,$in1,8
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vperm $outtail,$stage,$stage,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
- stvx $stage,0,$out
- addi $out,$out,16
-
- vspltw $tmp,$in0,3
- vxor $tmp,$tmp,$in1
- vsldoi $in1,$zero,$in1,12 # >>32
- vadduwm $rcon,$rcon,$rcon
- vxor $in1,$in1,$tmp
- vxor $in0,$in0,$key
- vxor $in1,$in1,$key
- vperm $outtail,$in0,$in0,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- stvx $stage,0,$out
- addi $inp,$out,15 # 15 is not typo
- addi $out,$out,16
- bdnz Loop192
-
- li $rounds,12
- addi $out,$out,0x20
- b Ldone
-
-.align 4
-L256:
- lvx $tmp,0,$inp
- li $cnt,7
- li $rounds,14
- vperm $outtail,$in0,$in0,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- stvx $stage,0,$out
- addi $out,$out,16
- vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
- mtctr $cnt
-
-Loop256:
- vperm $key,$in1,$in1,$mask # rotate-n-splat
- vsldoi $tmp,$zero,$in0,12 # >>32
- vperm $outtail,$in1,$in1,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- vcipherlast $key,$key,$rcon
- stvx $stage,0,$out
- addi $out,$out,16
-
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
- vadduwm $rcon,$rcon,$rcon
- vxor $in0,$in0,$key
- vperm $outtail,$in0,$in0,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- stvx $stage,0,$out
- addi $inp,$out,15 # 15 is not typo
- addi $out,$out,16
- bdz Ldone
-
- vspltw $key,$in0,3 # just splat
- vsldoi $tmp,$zero,$in1,12 # >>32
- vsbox $key,$key
-
- vxor $in1,$in1,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in1,$in1,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in1,$in1,$tmp
-
- vxor $in1,$in1,$key
- b Loop256
-
-.align 4
-Ldone:
- lvx $in1,0,$inp # redundant in aligned case
- vsel $in1,$outhead,$in1,$outmask
- stvx $in1,0,$inp
- li $ptr,0
- mtspr 256,$vrsave
- stw $rounds,0($out)
-
-Lenc_key_abort:
- mr r3,$ptr
- blr
- .long 0
- .byte 0,12,0x14,1,0,0,3,0
- .long 0
-.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
-
-.globl .${prefix}_set_decrypt_key
- $STU $sp,-$FRAME($sp)
- mflr r10
- $PUSH r10,$FRAME+$LRSAVE($sp)
- bl Lset_encrypt_key
- mtlr r10
-
- cmpwi r3,0
- bne- Ldec_key_abort
-
- slwi $cnt,$rounds,4
- subi $inp,$out,240 # first round key
- srwi $rounds,$rounds,1
- add $out,$inp,$cnt # last round key
- mtctr $rounds
-
-Ldeckey:
- lwz r0, 0($inp)
- lwz r6, 4($inp)
- lwz r7, 8($inp)
- lwz r8, 12($inp)
- addi $inp,$inp,16
- lwz r9, 0($out)
- lwz r10,4($out)
- lwz r11,8($out)
- lwz r12,12($out)
- stw r0, 0($out)
- stw r6, 4($out)
- stw r7, 8($out)
- stw r8, 12($out)
- subi $out,$out,16
- stw r9, -16($inp)
- stw r10,-12($inp)
- stw r11,-8($inp)
- stw r12,-4($inp)
- bdnz Ldeckey
-
- xor r3,r3,r3 # return value
-Ldec_key_abort:
- addi $sp,$sp,$FRAME
- blr
- .long 0
- .byte 0,12,4,1,0x80,0,3,0
- .long 0
-.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
-___
-}}}
-#########################################################################
-{{{ # Single block en- and decrypt procedures #
-sub gen_block () {
-my $dir = shift;
-my $n = $dir eq "de" ? "n" : "";
-my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
-
-$code.=<<___;
-.globl .${prefix}_${dir}crypt
- lwz $rounds,240($key)
- lis r0,0xfc00
- mfspr $vrsave,256
- li $idx,15 # 15 is not typo
- mtspr 256,r0
-
- lvx v0,0,$inp
- neg r11,$out
- lvx v1,$idx,$inp
- lvsl v2,0,$inp # inpperm
- le?vspltisb v4,0x0f
- ?lvsl v3,0,r11 # outperm
- le?vxor v2,v2,v4
- li $idx,16
- vperm v0,v0,v1,v2 # align [and byte swap in LE]
- lvx v1,0,$key
- ?lvsl v5,0,$key # keyperm
- srwi $rounds,$rounds,1
- lvx v2,$idx,$key
- addi $idx,$idx,16
- subi $rounds,$rounds,1
- ?vperm v1,v1,v2,v5 # align round key
-
- vxor v0,v0,v1
- lvx v1,$idx,$key
- addi $idx,$idx,16
- mtctr $rounds
-
-Loop_${dir}c:
- ?vperm v2,v2,v1,v5
- v${n}cipher v0,v0,v2
- lvx v2,$idx,$key
- addi $idx,$idx,16
- ?vperm v1,v1,v2,v5
- v${n}cipher v0,v0,v1
- lvx v1,$idx,$key
- addi $idx,$idx,16
- bdnz Loop_${dir}c
-
- ?vperm v2,v2,v1,v5
- v${n}cipher v0,v0,v2
- lvx v2,$idx,$key
- ?vperm v1,v1,v2,v5
- v${n}cipherlast v0,v0,v1
-
- vspltisb v2,-1
- vxor v1,v1,v1
- li $idx,15 # 15 is not typo
- ?vperm v2,v1,v2,v3 # outmask
- le?vxor v3,v3,v4
- lvx v1,0,$out # outhead
- vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
- vsel v1,v1,v0,v2
- lvx v4,$idx,$out
- stvx v1,0,$out
- vsel v0,v0,v4,v2
- stvx v0,$idx,$out
-
- mtspr 256,$vrsave
- blr
- .long 0
- .byte 0,12,0x14,0,0,0,3,0
- .long 0
-.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
-___
-}
-&gen_block("en");
-&gen_block("de");
-}}}
-#########################################################################
-{{{ # CBC en- and decrypt procedures #
-my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
-my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
-my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
- map("v$_",(4..10));
-$code.=<<___;
-.globl .${prefix}_cbc_encrypt
- ${UCMP}i $len,16
- bltlr-
-
- cmpwi $enc,0 # test direction
- lis r0,0xffe0
- mfspr $vrsave,256
- mtspr 256,r0
-
- li $idx,15
- vxor $rndkey0,$rndkey0,$rndkey0
- le?vspltisb $tmp,0x0f
-
- lvx $ivec,0,$ivp # load [unaligned] iv
- lvsl $inpperm,0,$ivp
- lvx $inptail,$idx,$ivp
- le?vxor $inpperm,$inpperm,$tmp
- vperm $ivec,$ivec,$inptail,$inpperm
-
- neg r11,$inp
- ?lvsl $keyperm,0,$key # prepare for unaligned key
- lwz $rounds,240($key)
-
- lvsr $inpperm,0,r11 # prepare for unaligned load
- lvx $inptail,0,$inp
- addi $inp,$inp,15 # 15 is not typo
- le?vxor $inpperm,$inpperm,$tmp
-
- ?lvsr $outperm,0,$out # prepare for unaligned store
- vspltisb $outmask,-1
- lvx $outhead,0,$out
- ?vperm $outmask,$rndkey0,$outmask,$outperm
- le?vxor $outperm,$outperm,$tmp
-
- srwi $rounds,$rounds,1
- li $idx,16
- subi $rounds,$rounds,1
- beq Lcbc_dec
-
-Lcbc_enc:
- vmr $inout,$inptail
- lvx $inptail,0,$inp
- addi $inp,$inp,16
- mtctr $rounds
- subi $len,$len,16 # len-=16
-
- lvx $rndkey0,0,$key
- vperm $inout,$inout,$inptail,$inpperm
- lvx $rndkey1,$idx,$key
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key
- addi $idx,$idx,16
- vxor $inout,$inout,$ivec
-
-Loop_cbc_enc:
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vcipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vcipher $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key
- addi $idx,$idx,16
- bdnz Loop_cbc_enc
-
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vcipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key
- li $idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vcipherlast $ivec,$inout,$rndkey0
- ${UCMP}i $len,16
-
- vperm $tmp,$ivec,$ivec,$outperm
- vsel $inout,$outhead,$tmp,$outmask
- vmr $outhead,$tmp
- stvx $inout,0,$out
- addi $out,$out,16
- bge Lcbc_enc
-
- b Lcbc_done
-
-.align 4
-Lcbc_dec:
- ${UCMP}i $len,128
- bge _aesp8_cbc_decrypt8x
- vmr $tmp,$inptail
- lvx $inptail,0,$inp
- addi $inp,$inp,16
- mtctr $rounds
- subi $len,$len,16 # len-=16
-
- lvx $rndkey0,0,$key
- vperm $tmp,$tmp,$inptail,$inpperm
- lvx $rndkey1,$idx,$key
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $inout,$tmp,$rndkey0
- lvx $rndkey0,$idx,$key
- addi $idx,$idx,16
-
-Loop_cbc_dec:
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vncipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vncipher $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key
- addi $idx,$idx,16
- bdnz Loop_cbc_dec
-
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vncipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key
- li $idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vncipherlast $inout,$inout,$rndkey0
- ${UCMP}i $len,16
-
- vxor $inout,$inout,$ivec
- vmr $ivec,$tmp
- vperm $tmp,$inout,$inout,$outperm
- vsel $inout,$outhead,$tmp,$outmask
- vmr $outhead,$tmp
- stvx $inout,0,$out
- addi $out,$out,16
- bge Lcbc_dec
-
-Lcbc_done:
- addi $out,$out,-1
- lvx $inout,0,$out # redundant in aligned case
- vsel $inout,$outhead,$inout,$outmask
- stvx $inout,0,$out
-
- neg $enc,$ivp # write [unaligned] iv
- li $idx,15 # 15 is not typo
- vxor $rndkey0,$rndkey0,$rndkey0
- vspltisb $outmask,-1
- le?vspltisb $tmp,0x0f
- ?lvsl $outperm,0,$enc
- ?vperm $outmask,$rndkey0,$outmask,$outperm
- le?vxor $outperm,$outperm,$tmp
- lvx $outhead,0,$ivp
- vperm $ivec,$ivec,$ivec,$outperm
- vsel $inout,$outhead,$ivec,$outmask
- lvx $inptail,$idx,$ivp
- stvx $inout,0,$ivp
- vsel $inout,$ivec,$inptail,$outmask
- stvx $inout,$idx,$ivp
-
- mtspr 256,$vrsave
- blr
- .long 0
- .byte 0,12,0x14,0,0,0,6,0
- .long 0
-___
-#########################################################################
-{{ # Optimized CBC decrypt procedure #
-my $key_="r11";
-my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
-my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
-my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
-my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
- # v26-v31 last 6 round keys
-my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
-
-$code.=<<___;
-.align 5
-_aesp8_cbc_decrypt8x:
- $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
- li r10,`$FRAME+8*16+15`
- li r11,`$FRAME+8*16+31`
- stvx v20,r10,$sp # ABI says so
- addi r10,r10,32
- stvx v21,r11,$sp
- addi r11,r11,32
- stvx v22,r10,$sp
- addi r10,r10,32
- stvx v23,r11,$sp
- addi r11,r11,32
- stvx v24,r10,$sp
- addi r10,r10,32
- stvx v25,r11,$sp
- addi r11,r11,32
- stvx v26,r10,$sp
- addi r10,r10,32
- stvx v27,r11,$sp
- addi r11,r11,32
- stvx v28,r10,$sp
- addi r10,r10,32
- stvx v29,r11,$sp
- addi r11,r11,32
- stvx v30,r10,$sp
- stvx v31,r11,$sp
- li r0,-1
- stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
- li $x10,0x10
- $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
- li $x20,0x20
- $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
- li $x30,0x30
- $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
- li $x40,0x40
- $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
- li $x50,0x50
- $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
- li $x60,0x60
- $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
- li $x70,0x70
- mtspr 256,r0
-
- subi $rounds,$rounds,3 # -4 in total
- subi $len,$len,128 # bias
-
- lvx $rndkey0,$x00,$key # load key schedule
- lvx v30,$x10,$key
- addi $key,$key,0x20
- lvx v31,$x00,$key
- ?vperm $rndkey0,$rndkey0,v30,$keyperm
- addi $key_,$sp,$FRAME+15
- mtctr $rounds
-
-Load_cbc_dec_key:
- ?vperm v24,v30,v31,$keyperm
- lvx v30,$x10,$key
- addi $key,$key,0x20
- stvx v24,$x00,$key_ # off-load round[1]
- ?vperm v25,v31,v30,$keyperm
- lvx v31,$x00,$key
- stvx v25,$x10,$key_ # off-load round[2]
- addi $key_,$key_,0x20
- bdnz Load_cbc_dec_key
-
- lvx v26,$x10,$key
- ?vperm v24,v30,v31,$keyperm
- lvx v27,$x20,$key
- stvx v24,$x00,$key_ # off-load round[3]
- ?vperm v25,v31,v26,$keyperm
- lvx v28,$x30,$key
- stvx v25,$x10,$key_ # off-load round[4]
- addi $key_,$sp,$FRAME+15 # rewind $key_
- ?vperm v26,v26,v27,$keyperm
- lvx v29,$x40,$key
- ?vperm v27,v27,v28,$keyperm
- lvx v30,$x50,$key
- ?vperm v28,v28,v29,$keyperm
- lvx v31,$x60,$key
- ?vperm v29,v29,v30,$keyperm
- lvx $out0,$x70,$key # borrow $out0
- ?vperm v30,v30,v31,$keyperm
- lvx v24,$x00,$key_ # pre-load round[1]
- ?vperm v31,v31,$out0,$keyperm
- lvx v25,$x10,$key_ # pre-load round[2]
-
- #lvx $inptail,0,$inp # "caller" already did this
- #addi $inp,$inp,15 # 15 is not typo
- subi $inp,$inp,15 # undo "caller"
-
- le?li $idx,8
- lvx_u $in0,$x00,$inp # load first 8 "words"
- le?lvsl $inpperm,0,$idx
- le?vspltisb $tmp,0x0f
- lvx_u $in1,$x10,$inp
- le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
- lvx_u $in2,$x20,$inp
- le?vperm $in0,$in0,$in0,$inpperm
- lvx_u $in3,$x30,$inp
- le?vperm $in1,$in1,$in1,$inpperm
- lvx_u $in4,$x40,$inp
- le?vperm $in2,$in2,$in2,$inpperm
- vxor $out0,$in0,$rndkey0
- lvx_u $in5,$x50,$inp
- le?vperm $in3,$in3,$in3,$inpperm
- vxor $out1,$in1,$rndkey0
- lvx_u $in6,$x60,$inp
- le?vperm $in4,$in4,$in4,$inpperm
- vxor $out2,$in2,$rndkey0
- lvx_u $in7,$x70,$inp
- addi $inp,$inp,0x80
- le?vperm $in5,$in5,$in5,$inpperm
- vxor $out3,$in3,$rndkey0
- le?vperm $in6,$in6,$in6,$inpperm
- vxor $out4,$in4,$rndkey0
- le?vperm $in7,$in7,$in7,$inpperm
- vxor $out5,$in5,$rndkey0
- vxor $out6,$in6,$rndkey0
- vxor $out7,$in7,$rndkey0
-
- mtctr $rounds
- b Loop_cbc_dec8x
-.align 5
-Loop_cbc_dec8x:
- vncipher $out0,$out0,v24
- vncipher $out1,$out1,v24
- vncipher $out2,$out2,v24
- vncipher $out3,$out3,v24
- vncipher $out4,$out4,v24
- vncipher $out5,$out5,v24
- vncipher $out6,$out6,v24
- vncipher $out7,$out7,v24
- lvx v24,$x20,$key_ # round[3]
- addi $key_,$key_,0x20
-
- vncipher $out0,$out0,v25
- vncipher $out1,$out1,v25
- vncipher $out2,$out2,v25
- vncipher $out3,$out3,v25
- vncipher $out4,$out4,v25
- vncipher $out5,$out5,v25
- vncipher $out6,$out6,v25
- vncipher $out7,$out7,v25
- lvx v25,$x10,$key_ # round[4]
- bdnz Loop_cbc_dec8x
-
- subic $len,$len,128 # $len-=128
- vncipher $out0,$out0,v24
- vncipher $out1,$out1,v24
- vncipher $out2,$out2,v24
- vncipher $out3,$out3,v24
- vncipher $out4,$out4,v24
- vncipher $out5,$out5,v24
- vncipher $out6,$out6,v24
- vncipher $out7,$out7,v24
-
- subfe. r0,r0,r0 # borrow?-1:0
- vncipher $out0,$out0,v25
- vncipher $out1,$out1,v25
- vncipher $out2,$out2,v25
- vncipher $out3,$out3,v25
- vncipher $out4,$out4,v25
- vncipher $out5,$out5,v25
- vncipher $out6,$out6,v25
- vncipher $out7,$out7,v25
-
- and r0,r0,$len
- vncipher $out0,$out0,v26
- vncipher $out1,$out1,v26
- vncipher $out2,$out2,v26
- vncipher $out3,$out3,v26
- vncipher $out4,$out4,v26
- vncipher $out5,$out5,v26
- vncipher $out6,$out6,v26
- vncipher $out7,$out7,v26
-
- add $inp,$inp,r0 # $inp is adjusted in such
- # way that at exit from the
- # loop inX-in7 are loaded
- # with last "words"
- vncipher $out0,$out0,v27
- vncipher $out1,$out1,v27
- vncipher $out2,$out2,v27
- vncipher $out3,$out3,v27
- vncipher $out4,$out4,v27
- vncipher $out5,$out5,v27
- vncipher $out6,$out6,v27
- vncipher $out7,$out7,v27
-
- addi $key_,$sp,$FRAME+15 # rewind $key_
- vncipher $out0,$out0,v28
- vncipher $out1,$out1,v28
- vncipher $out2,$out2,v28
- vncipher $out3,$out3,v28
- vncipher $out4,$out4,v28
- vncipher $out5,$out5,v28
- vncipher $out6,$out6,v28
- vncipher $out7,$out7,v28
- lvx v24,$x00,$key_ # re-pre-load round[1]
-
- vncipher $out0,$out0,v29
- vncipher $out1,$out1,v29
- vncipher $out2,$out2,v29
- vncipher $out3,$out3,v29
- vncipher $out4,$out4,v29
- vncipher $out5,$out5,v29
- vncipher $out6,$out6,v29
- vncipher $out7,$out7,v29
- lvx v25,$x10,$key_ # re-pre-load round[2]
-
- vncipher $out0,$out0,v30
- vxor $ivec,$ivec,v31 # xor with last round key
- vncipher $out1,$out1,v30
- vxor $in0,$in0,v31
- vncipher $out2,$out2,v30
- vxor $in1,$in1,v31
- vncipher $out3,$out3,v30
- vxor $in2,$in2,v31
- vncipher $out4,$out4,v30
- vxor $in3,$in3,v31
- vncipher $out5,$out5,v30
- vxor $in4,$in4,v31
- vncipher $out6,$out6,v30
- vxor $in5,$in5,v31
- vncipher $out7,$out7,v30
- vxor $in6,$in6,v31
-
- vncipherlast $out0,$out0,$ivec
- vncipherlast $out1,$out1,$in0
- lvx_u $in0,$x00,$inp # load next input block
- vncipherlast $out2,$out2,$in1
- lvx_u $in1,$x10,$inp
- vncipherlast $out3,$out3,$in2
- le?vperm $in0,$in0,$in0,$inpperm
- lvx_u $in2,$x20,$inp
- vncipherlast $out4,$out4,$in3
- le?vperm $in1,$in1,$in1,$inpperm
- lvx_u $in3,$x30,$inp
- vncipherlast $out5,$out5,$in4
- le?vperm $in2,$in2,$in2,$inpperm
- lvx_u $in4,$x40,$inp
- vncipherlast $out6,$out6,$in5
- le?vperm $in3,$in3,$in3,$inpperm
- lvx_u $in5,$x50,$inp
- vncipherlast $out7,$out7,$in6
- le?vperm $in4,$in4,$in4,$inpperm
- lvx_u $in6,$x60,$inp
- vmr $ivec,$in7
- le?vperm $in5,$in5,$in5,$inpperm
- lvx_u $in7,$x70,$inp
- addi $inp,$inp,0x80
-
- le?vperm $out0,$out0,$out0,$inpperm
- le?vperm $out1,$out1,$out1,$inpperm
- stvx_u $out0,$x00,$out
- le?vperm $in6,$in6,$in6,$inpperm
- vxor $out0,$in0,$rndkey0
- le?vperm $out2,$out2,$out2,$inpperm
- stvx_u $out1,$x10,$out
- le?vperm $in7,$in7,$in7,$inpperm
- vxor $out1,$in1,$rndkey0
- le?vperm $out3,$out3,$out3,$inpperm
- stvx_u $out2,$x20,$out
- vxor $out2,$in2,$rndkey0
- le?vperm $out4,$out4,$out4,$inpperm
- stvx_u $out3,$x30,$out
- vxor $out3,$in3,$rndkey0
- le?vperm $out5,$out5,$out5,$inpperm
- stvx_u $out4,$x40,$out
- vxor $out4,$in4,$rndkey0
- le?vperm $out6,$out6,$out6,$inpperm
- stvx_u $out5,$x50,$out
- vxor $out5,$in5,$rndkey0
- le?vperm $out7,$out7,$out7,$inpperm
- stvx_u $out6,$x60,$out
- vxor $out6,$in6,$rndkey0
- stvx_u $out7,$x70,$out
- addi $out,$out,0x80
- vxor $out7,$in7,$rndkey0
-
- mtctr $rounds
- beq Loop_cbc_dec8x # did $len-=128 borrow?
-
- addic. $len,$len,128
- beq Lcbc_dec8x_done
- nop
- nop
-
-Loop_cbc_dec8x_tail: # up to 7 "words" tail...
- vncipher $out1,$out1,v24
- vncipher $out2,$out2,v24
- vncipher $out3,$out3,v24
- vncipher $out4,$out4,v24
- vncipher $out5,$out5,v24
- vncipher $out6,$out6,v24
- vncipher $out7,$out7,v24
- lvx v24,$x20,$key_ # round[3]
- addi $key_,$key_,0x20
-
- vncipher $out1,$out1,v25
- vncipher $out2,$out2,v25
- vncipher $out3,$out3,v25
- vncipher $out4,$out4,v25
- vncipher $out5,$out5,v25
- vncipher $out6,$out6,v25
- vncipher $out7,$out7,v25
- lvx v25,$x10,$key_ # round[4]
- bdnz Loop_cbc_dec8x_tail
-
- vncipher $out1,$out1,v24
- vncipher $out2,$out2,v24
- vncipher $out3,$out3,v24
- vncipher $out4,$out4,v24
- vncipher $out5,$out5,v24
- vncipher $out6,$out6,v24
- vncipher $out7,$out7,v24
-
- vncipher $out1,$out1,v25
- vncipher $out2,$out2,v25
- vncipher $out3,$out3,v25
- vncipher $out4,$out4,v25
- vncipher $out5,$out5,v25
- vncipher $out6,$out6,v25
- vncipher $out7,$out7,v25
-
- vncipher $out1,$out1,v26
- vncipher $out2,$out2,v26
- vncipher $out3,$out3,v26
- vncipher $out4,$out4,v26
- vncipher $out5,$out5,v26
- vncipher $out6,$out6,v26
- vncipher $out7,$out7,v26
-
- vncipher $out1,$out1,v27
- vncipher $out2,$out2,v27
- vncipher $out3,$out3,v27
- vncipher $out4,$out4,v27
- vncipher $out5,$out5,v27
- vncipher $out6,$out6,v27
- vncipher $out7,$out7,v27
-
- vncipher $out1,$out1,v28
- vncipher $out2,$out2,v28
- vncipher $out3,$out3,v28
- vncipher $out4,$out4,v28
- vncipher $out5,$out5,v28
- vncipher $out6,$out6,v28
- vncipher $out7,$out7,v28
-
- vncipher $out1,$out1,v29
- vncipher $out2,$out2,v29
- vncipher $out3,$out3,v29
- vncipher $out4,$out4,v29
- vncipher $out5,$out5,v29
- vncipher $out6,$out6,v29
- vncipher $out7,$out7,v29
-
- vncipher $out1,$out1,v30
- vxor $ivec,$ivec,v31 # last round key
- vncipher $out2,$out2,v30
- vxor $in1,$in1,v31
- vncipher $out3,$out3,v30
- vxor $in2,$in2,v31
- vncipher $out4,$out4,v30
- vxor $in3,$in3,v31
- vncipher $out5,$out5,v30
- vxor $in4,$in4,v31
- vncipher $out6,$out6,v30
- vxor $in5,$in5,v31
- vncipher $out7,$out7,v30
- vxor $in6,$in6,v31
-
- cmplwi $len,32 # switch($len)
- blt Lcbc_dec8x_one
- nop
- beq Lcbc_dec8x_two
- cmplwi $len,64
- blt Lcbc_dec8x_three
- nop
- beq Lcbc_dec8x_four
- cmplwi $len,96
- blt Lcbc_dec8x_five
- nop
- beq Lcbc_dec8x_six
-
-Lcbc_dec8x_seven:
- vncipherlast $out1,$out1,$ivec
- vncipherlast $out2,$out2,$in1
- vncipherlast $out3,$out3,$in2
- vncipherlast $out4,$out4,$in3
- vncipherlast $out5,$out5,$in4
- vncipherlast $out6,$out6,$in5
- vncipherlast $out7,$out7,$in6
- vmr $ivec,$in7
-
- le?vperm $out1,$out1,$out1,$inpperm
- le?vperm $out2,$out2,$out2,$inpperm
- stvx_u $out1,$x00,$out
- le?vperm $out3,$out3,$out3,$inpperm
- stvx_u $out2,$x10,$out
- le?vperm $out4,$out4,$out4,$inpperm
- stvx_u $out3,$x20,$out
- le?vperm $out5,$out5,$out5,$inpperm
- stvx_u $out4,$x30,$out
- le?vperm $out6,$out6,$out6,$inpperm
- stvx_u $out5,$x40,$out
- le?vperm $out7,$out7,$out7,$inpperm
- stvx_u $out6,$x50,$out
- stvx_u $out7,$x60,$out
- addi $out,$out,0x70
- b Lcbc_dec8x_done
-
-.align 5
-Lcbc_dec8x_six:
- vncipherlast $out2,$out2,$ivec
- vncipherlast $out3,$out3,$in2
- vncipherlast $out4,$out4,$in3
- vncipherlast $out5,$out5,$in4
- vncipherlast $out6,$out6,$in5
- vncipherlast $out7,$out7,$in6
- vmr $ivec,$in7
-
- le?vperm $out2,$out2,$out2,$inpperm
- le?vperm $out3,$out3,$out3,$inpperm
- stvx_u $out2,$x00,$out
- le?vperm $out4,$out4,$out4,$inpperm
- stvx_u $out3,$x10,$out
- le?vperm $out5,$out5,$out5,$inpperm
- stvx_u $out4,$x20,$out
- le?vperm $out6,$out6,$out6,$inpperm
- stvx_u $out5,$x30,$out
- le?vperm $out7,$out7,$out7,$inpperm
- stvx_u $out6,$x40,$out
- stvx_u $out7,$x50,$out
- addi $out,$out,0x60
- b Lcbc_dec8x_done
-
-.align 5
-Lcbc_dec8x_five:
- vncipherlast $out3,$out3,$ivec
- vncipherlast $out4,$out4,$in3
- vncipherlast $out5,$out5,$in4
- vncipherlast $out6,$out6,$in5
- vncipherlast $out7,$out7,$in6
- vmr $ivec,$in7
-
- le?vperm $out3,$out3,$out3,$inpperm
- le?vperm $out4,$out4,$out4,$inpperm
- stvx_u $out3,$x00,$out
- le?vperm $out5,$out5,$out5,$inpperm
- stvx_u $out4,$x10,$out
- le?vperm $out6,$out6,$out6,$inpperm
- stvx_u $out5,$x20,$out
- le?vperm $out7,$out7,$out7,$inpperm
- stvx_u $out6,$x30,$out
- stvx_u $out7,$x40,$out
- addi $out,$out,0x50
- b Lcbc_dec8x_done
-
-.align 5
-Lcbc_dec8x_four:
- vncipherlast $out4,$out4,$ivec
- vncipherlast $out5,$out5,$in4
- vncipherlast $out6,$out6,$in5
- vncipherlast $out7,$out7,$in6
- vmr $ivec,$in7
-
- le?vperm $out4,$out4,$out4,$inpperm
- le?vperm $out5,$out5,$out5,$inpperm
- stvx_u $out4,$x00,$out
- le?vperm $out6,$out6,$out6,$inpperm
- stvx_u $out5,$x10,$out
- le?vperm $out7,$out7,$out7,$inpperm
- stvx_u $out6,$x20,$out
- stvx_u $out7,$x30,$out
- addi $out,$out,0x40
- b Lcbc_dec8x_done
-
-.align 5
-Lcbc_dec8x_three:
- vncipherlast $out5,$out5,$ivec
- vncipherlast $out6,$out6,$in5
- vncipherlast $out7,$out7,$in6
- vmr $ivec,$in7
-
- le?vperm $out5,$out5,$out5,$inpperm
- le?vperm $out6,$out6,$out6,$inpperm
- stvx_u $out5,$x00,$out
- le?vperm $out7,$out7,$out7,$inpperm
- stvx_u $out6,$x10,$out
- stvx_u $out7,$x20,$out
- addi $out,$out,0x30
- b Lcbc_dec8x_done
-
-.align 5
-Lcbc_dec8x_two:
- vncipherlast $out6,$out6,$ivec
- vncipherlast $out7,$out7,$in6
- vmr $ivec,$in7
-
- le?vperm $out6,$out6,$out6,$inpperm
- le?vperm $out7,$out7,$out7,$inpperm
- stvx_u $out6,$x00,$out
- stvx_u $out7,$x10,$out
- addi $out,$out,0x20
- b Lcbc_dec8x_done
-
-.align 5
-Lcbc_dec8x_one:
- vncipherlast $out7,$out7,$ivec
- vmr $ivec,$in7
-
- le?vperm $out7,$out7,$out7,$inpperm
- stvx_u $out7,0,$out
- addi $out,$out,0x10
-
-Lcbc_dec8x_done:
- le?vperm $ivec,$ivec,$ivec,$inpperm
- stvx_u $ivec,0,$ivp # write [unaligned] iv
-
- li r10,`$FRAME+15`
- li r11,`$FRAME+31`
- stvx $inpperm,r10,$sp # wipe copies of round keys
- addi r10,r10,32
- stvx $inpperm,r11,$sp
- addi r11,r11,32
- stvx $inpperm,r10,$sp
- addi r10,r10,32
- stvx $inpperm,r11,$sp
- addi r11,r11,32
- stvx $inpperm,r10,$sp
- addi r10,r10,32
- stvx $inpperm,r11,$sp
- addi r11,r11,32
- stvx $inpperm,r10,$sp
- addi r10,r10,32
- stvx $inpperm,r11,$sp
- addi r11,r11,32
-
- mtspr 256,$vrsave
- lvx v20,r10,$sp # ABI says so
- addi r10,r10,32
- lvx v21,r11,$sp
- addi r11,r11,32
- lvx v22,r10,$sp
- addi r10,r10,32
- lvx v23,r11,$sp
- addi r11,r11,32
- lvx v24,r10,$sp
- addi r10,r10,32
- lvx v25,r11,$sp
- addi r11,r11,32
- lvx v26,r10,$sp
- addi r10,r10,32
- lvx v27,r11,$sp
- addi r11,r11,32
- lvx v28,r10,$sp
- addi r10,r10,32
- lvx v29,r11,$sp
- addi r11,r11,32
- lvx v30,r10,$sp
- lvx v31,r11,$sp
- $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
- $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
- $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
- $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
- $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
- $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
- addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
- blr
- .long 0
- .byte 0,12,0x14,0,0x80,6,6,0
- .long 0
-.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
-___
-}} }}}
-
-#########################################################################
-{{{ # CTR procedure[s] #
-
-####################### WARNING: Here be dragons! #######################
-#
-# This code is written as 'ctr32', based on a 32-bit counter used
-# upstream. The kernel does *not* use a 32-bit counter. The kernel uses
-# a 128-bit counter.
-#
-# This leads to subtle changes from the upstream code: the counter
-# is incremented with vaddu_q_m rather than vaddu_w_m. This occurs in
-# both the bulk (8 blocks at a time) path, and in the individual block
-# path. Be aware of this when doing updates.
-#
-# See:
-# 1d4aa0b4c181 ("crypto: vmx - Fixing AES-CTR counter bug")
-# 009b30ac7444 ("crypto: vmx - CTR: always increment IV as quadword")
-# https://github.com/openssl/openssl/pull/8942
-#
-#########################################################################
-my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
-my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
-my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
- map("v$_",(4..11));
-my $dat=$tmp;
-
-$code.=<<___;
-.globl .${prefix}_ctr32_encrypt_blocks
- ${UCMP}i $len,1
- bltlr-
-
- lis r0,0xfff0
- mfspr $vrsave,256
- mtspr 256,r0
-
- li $idx,15
- vxor $rndkey0,$rndkey0,$rndkey0
- le?vspltisb $tmp,0x0f
-
- lvx $ivec,0,$ivp # load [unaligned] iv
- lvsl $inpperm,0,$ivp
- lvx $inptail,$idx,$ivp
- vspltisb $one,1
- le?vxor $inpperm,$inpperm,$tmp
- vperm $ivec,$ivec,$inptail,$inpperm
- vsldoi $one,$rndkey0,$one,1
-
- neg r11,$inp
- ?lvsl $keyperm,0,$key # prepare for unaligned key
- lwz $rounds,240($key)
-
- lvsr $inpperm,0,r11 # prepare for unaligned load
- lvx $inptail,0,$inp
- addi $inp,$inp,15 # 15 is not typo
- le?vxor $inpperm,$inpperm,$tmp
-
- srwi $rounds,$rounds,1
- li $idx,16
- subi $rounds,$rounds,1
-
- ${UCMP}i $len,8
- bge _aesp8_ctr32_encrypt8x
-
- ?lvsr $outperm,0,$out # prepare for unaligned store
- vspltisb $outmask,-1
- lvx $outhead,0,$out
- ?vperm $outmask,$rndkey0,$outmask,$outperm
- le?vxor $outperm,$outperm,$tmp
-
- lvx $rndkey0,0,$key
- mtctr $rounds
- lvx $rndkey1,$idx,$key
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $inout,$ivec,$rndkey0
- lvx $rndkey0,$idx,$key
- addi $idx,$idx,16
- b Loop_ctr32_enc
-
-.align 5
-Loop_ctr32_enc:
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vcipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vcipher $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key
- addi $idx,$idx,16
- bdnz Loop_ctr32_enc
-
- vadduqm $ivec,$ivec,$one # Kernel change for 128-bit
- vmr $dat,$inptail
- lvx $inptail,0,$inp
- addi $inp,$inp,16
- subic. $len,$len,1 # blocks--
-
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vcipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key
- vperm $dat,$dat,$inptail,$inpperm
- li $idx,16
- ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
- lvx $rndkey0,0,$key
- vxor $dat,$dat,$rndkey1 # last round key
- vcipherlast $inout,$inout,$dat
-
- lvx $rndkey1,$idx,$key
- addi $idx,$idx,16
- vperm $inout,$inout,$inout,$outperm
- vsel $dat,$outhead,$inout,$outmask
- mtctr $rounds
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vmr $outhead,$inout
- vxor $inout,$ivec,$rndkey0
- lvx $rndkey0,$idx,$key
- addi $idx,$idx,16
- stvx $dat,0,$out
- addi $out,$out,16
- bne Loop_ctr32_enc
-
- addi $out,$out,-1
- lvx $inout,0,$out # redundant in aligned case
- vsel $inout,$outhead,$inout,$outmask
- stvx $inout,0,$out
-
- mtspr 256,$vrsave
- blr
- .long 0
- .byte 0,12,0x14,0,0,0,6,0
- .long 0
-___
-#########################################################################
-{{ # Optimized CTR procedure #
-my $key_="r11";
-my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
-my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
-my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
-my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
- # v26-v31 last 6 round keys
-my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
-my ($two,$three,$four)=($outhead,$outperm,$outmask);
-
-$code.=<<___;
-.align 5
-_aesp8_ctr32_encrypt8x:
- $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
- li r10,`$FRAME+8*16+15`
- li r11,`$FRAME+8*16+31`
- stvx v20,r10,$sp # ABI says so
- addi r10,r10,32
- stvx v21,r11,$sp
- addi r11,r11,32
- stvx v22,r10,$sp
- addi r10,r10,32
- stvx v23,r11,$sp
- addi r11,r11,32
- stvx v24,r10,$sp
- addi r10,r10,32
- stvx v25,r11,$sp
- addi r11,r11,32
- stvx v26,r10,$sp
- addi r10,r10,32
- stvx v27,r11,$sp
- addi r11,r11,32
- stvx v28,r10,$sp
- addi r10,r10,32
- stvx v29,r11,$sp
- addi r11,r11,32
- stvx v30,r10,$sp
- stvx v31,r11,$sp
- li r0,-1
- stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
- li $x10,0x10
- $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
- li $x20,0x20
- $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
- li $x30,0x30
- $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
- li $x40,0x40
- $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
- li $x50,0x50
- $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
- li $x60,0x60
- $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
- li $x70,0x70
- mtspr 256,r0
-
- subi $rounds,$rounds,3 # -4 in total
-
- lvx $rndkey0,$x00,$key # load key schedule
- lvx v30,$x10,$key
- addi $key,$key,0x20
- lvx v31,$x00,$key
- ?vperm $rndkey0,$rndkey0,v30,$keyperm
- addi $key_,$sp,$FRAME+15
- mtctr $rounds
-
-Load_ctr32_enc_key:
- ?vperm v24,v30,v31,$keyperm
- lvx v30,$x10,$key
- addi $key,$key,0x20
- stvx v24,$x00,$key_ # off-load round[1]
- ?vperm v25,v31,v30,$keyperm
- lvx v31,$x00,$key
- stvx v25,$x10,$key_ # off-load round[2]
- addi $key_,$key_,0x20
- bdnz Load_ctr32_enc_key
-
- lvx v26,$x10,$key
- ?vperm v24,v30,v31,$keyperm
- lvx v27,$x20,$key
- stvx v24,$x00,$key_ # off-load round[3]
- ?vperm v25,v31,v26,$keyperm
- lvx v28,$x30,$key
- stvx v25,$x10,$key_ # off-load round[4]
- addi $key_,$sp,$FRAME+15 # rewind $key_
- ?vperm v26,v26,v27,$keyperm
- lvx v29,$x40,$key
- ?vperm v27,v27,v28,$keyperm
- lvx v30,$x50,$key
- ?vperm v28,v28,v29,$keyperm
- lvx v31,$x60,$key
- ?vperm v29,v29,v30,$keyperm
- lvx $out0,$x70,$key # borrow $out0
- ?vperm v30,v30,v31,$keyperm
- lvx v24,$x00,$key_ # pre-load round[1]
- ?vperm v31,v31,$out0,$keyperm
- lvx v25,$x10,$key_ # pre-load round[2]
-
- vadduqm $two,$one,$one
- subi $inp,$inp,15 # undo "caller"
- $SHL $len,$len,4
-
- vadduqm $out1,$ivec,$one # counter values ...
- vadduqm $out2,$ivec,$two # (do all ctr adds as 128-bit)
- vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
- le?li $idx,8
- vadduqm $out3,$out1,$two
- vxor $out1,$out1,$rndkey0
- le?lvsl $inpperm,0,$idx
- vadduqm $out4,$out2,$two
- vxor $out2,$out2,$rndkey0
- le?vspltisb $tmp,0x0f
- vadduqm $out5,$out3,$two
- vxor $out3,$out3,$rndkey0
- le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
- vadduqm $out6,$out4,$two
- vxor $out4,$out4,$rndkey0
- vadduqm $out7,$out5,$two
- vxor $out5,$out5,$rndkey0
- vadduqm $ivec,$out6,$two # next counter value
- vxor $out6,$out6,$rndkey0
- vxor $out7,$out7,$rndkey0
-
- mtctr $rounds
- b Loop_ctr32_enc8x
-.align 5
-Loop_ctr32_enc8x:
- vcipher $out0,$out0,v24
- vcipher $out1,$out1,v24
- vcipher $out2,$out2,v24
- vcipher $out3,$out3,v24
- vcipher $out4,$out4,v24
- vcipher $out5,$out5,v24
- vcipher $out6,$out6,v24
- vcipher $out7,$out7,v24
-Loop_ctr32_enc8x_middle:
- lvx v24,$x20,$key_ # round[3]
- addi $key_,$key_,0x20
-
- vcipher $out0,$out0,v25
- vcipher $out1,$out1,v25
- vcipher $out2,$out2,v25
- vcipher $out3,$out3,v25
- vcipher $out4,$out4,v25
- vcipher $out5,$out5,v25
- vcipher $out6,$out6,v25
- vcipher $out7,$out7,v25
- lvx v25,$x10,$key_ # round[4]
- bdnz Loop_ctr32_enc8x
-
- subic r11,$len,256 # $len-256, borrow $key_
- vcipher $out0,$out0,v24
- vcipher $out1,$out1,v24
- vcipher $out2,$out2,v24
- vcipher $out3,$out3,v24
- vcipher $out4,$out4,v24
- vcipher $out5,$out5,v24
- vcipher $out6,$out6,v24
- vcipher $out7,$out7,v24
-
- subfe r0,r0,r0 # borrow?-1:0
- vcipher $out0,$out0,v25
- vcipher $out1,$out1,v25
- vcipher $out2,$out2,v25
- vcipher $out3,$out3,v25
- vcipher $out4,$out4,v25
- vcipher $out5,$out5,v25
- vcipher $out6,$out6,v25
- vcipher $out7,$out7,v25
-
- and r0,r0,r11
- addi $key_,$sp,$FRAME+15 # rewind $key_
- vcipher $out0,$out0,v26
- vcipher $out1,$out1,v26
- vcipher $out2,$out2,v26
- vcipher $out3,$out3,v26
- vcipher $out4,$out4,v26
- vcipher $out5,$out5,v26
- vcipher $out6,$out6,v26
- vcipher $out7,$out7,v26
- lvx v24,$x00,$key_ # re-pre-load round[1]
-
- subic $len,$len,129 # $len-=129
- vcipher $out0,$out0,v27
- addi $len,$len,1 # $len-=128 really
- vcipher $out1,$out1,v27
- vcipher $out2,$out2,v27
- vcipher $out3,$out3,v27
- vcipher $out4,$out4,v27
- vcipher $out5,$out5,v27
- vcipher $out6,$out6,v27
- vcipher $out7,$out7,v27
- lvx v25,$x10,$key_ # re-pre-load round[2]
-
- vcipher $out0,$out0,v28
- lvx_u $in0,$x00,$inp # load input
- vcipher $out1,$out1,v28
- lvx_u $in1,$x10,$inp
- vcipher $out2,$out2,v28
- lvx_u $in2,$x20,$inp
- vcipher $out3,$out3,v28
- lvx_u $in3,$x30,$inp
- vcipher $out4,$out4,v28
- lvx_u $in4,$x40,$inp
- vcipher $out5,$out5,v28
- lvx_u $in5,$x50,$inp
- vcipher $out6,$out6,v28
- lvx_u $in6,$x60,$inp
- vcipher $out7,$out7,v28
- lvx_u $in7,$x70,$inp
- addi $inp,$inp,0x80
-
- vcipher $out0,$out0,v29
- le?vperm $in0,$in0,$in0,$inpperm
- vcipher $out1,$out1,v29
- le?vperm $in1,$in1,$in1,$inpperm
- vcipher $out2,$out2,v29
- le?vperm $in2,$in2,$in2,$inpperm
- vcipher $out3,$out3,v29
- le?vperm $in3,$in3,$in3,$inpperm
- vcipher $out4,$out4,v29
- le?vperm $in4,$in4,$in4,$inpperm
- vcipher $out5,$out5,v29
- le?vperm $in5,$in5,$in5,$inpperm
- vcipher $out6,$out6,v29
- le?vperm $in6,$in6,$in6,$inpperm
- vcipher $out7,$out7,v29
- le?vperm $in7,$in7,$in7,$inpperm
-
- add $inp,$inp,r0 # $inp is adjusted in such
- # way that at exit from the
- # loop inX-in7 are loaded
- # with last "words"
- subfe. r0,r0,r0 # borrow?-1:0
- vcipher $out0,$out0,v30
- vxor $in0,$in0,v31 # xor with last round key
- vcipher $out1,$out1,v30
- vxor $in1,$in1,v31
- vcipher $out2,$out2,v30
- vxor $in2,$in2,v31
- vcipher $out3,$out3,v30
- vxor $in3,$in3,v31
- vcipher $out4,$out4,v30
- vxor $in4,$in4,v31
- vcipher $out5,$out5,v30
- vxor $in5,$in5,v31
- vcipher $out6,$out6,v30
- vxor $in6,$in6,v31
- vcipher $out7,$out7,v30
- vxor $in7,$in7,v31
-
- bne Lctr32_enc8x_break # did $len-129 borrow?
-
- vcipherlast $in0,$out0,$in0
- vcipherlast $in1,$out1,$in1
- vadduqm $out1,$ivec,$one # counter values ...
- vcipherlast $in2,$out2,$in2
- vadduqm $out2,$ivec,$two
- vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
- vcipherlast $in3,$out3,$in3
- vadduqm $out3,$out1,$two
- vxor $out1,$out1,$rndkey0
- vcipherlast $in4,$out4,$in4
- vadduqm $out4,$out2,$two
- vxor $out2,$out2,$rndkey0
- vcipherlast $in5,$out5,$in5
- vadduqm $out5,$out3,$two
- vxor $out3,$out3,$rndkey0
- vcipherlast $in6,$out6,$in6
- vadduqm $out6,$out4,$two
- vxor $out4,$out4,$rndkey0
- vcipherlast $in7,$out7,$in7
- vadduqm $out7,$out5,$two
- vxor $out5,$out5,$rndkey0
- le?vperm $in0,$in0,$in0,$inpperm
- vadduqm $ivec,$out6,$two # next counter value
- vxor $out6,$out6,$rndkey0
- le?vperm $in1,$in1,$in1,$inpperm
- vxor $out7,$out7,$rndkey0
- mtctr $rounds
-
- vcipher $out0,$out0,v24
- stvx_u $in0,$x00,$out
- le?vperm $in2,$in2,$in2,$inpperm
- vcipher $out1,$out1,v24
- stvx_u $in1,$x10,$out
- le?vperm $in3,$in3,$in3,$inpperm
- vcipher $out2,$out2,v24
- stvx_u $in2,$x20,$out
- le?vperm $in4,$in4,$in4,$inpperm
- vcipher $out3,$out3,v24
- stvx_u $in3,$x30,$out
- le?vperm $in5,$in5,$in5,$inpperm
- vcipher $out4,$out4,v24
- stvx_u $in4,$x40,$out
- le?vperm $in6,$in6,$in6,$inpperm
- vcipher $out5,$out5,v24
- stvx_u $in5,$x50,$out
- le?vperm $in7,$in7,$in7,$inpperm
- vcipher $out6,$out6,v24
- stvx_u $in6,$x60,$out
- vcipher $out7,$out7,v24
- stvx_u $in7,$x70,$out
- addi $out,$out,0x80
-
- b Loop_ctr32_enc8x_middle
-
-.align 5
-Lctr32_enc8x_break:
- cmpwi $len,-0x60
- blt Lctr32_enc8x_one
- nop
- beq Lctr32_enc8x_two
- cmpwi $len,-0x40
- blt Lctr32_enc8x_three
- nop
- beq Lctr32_enc8x_four
- cmpwi $len,-0x20
- blt Lctr32_enc8x_five
- nop
- beq Lctr32_enc8x_six
- cmpwi $len,0x00
- blt Lctr32_enc8x_seven
-
-Lctr32_enc8x_eight:
- vcipherlast $out0,$out0,$in0
- vcipherlast $out1,$out1,$in1
- vcipherlast $out2,$out2,$in2
- vcipherlast $out3,$out3,$in3
- vcipherlast $out4,$out4,$in4
- vcipherlast $out5,$out5,$in5
- vcipherlast $out6,$out6,$in6
- vcipherlast $out7,$out7,$in7
-
- le?vperm $out0,$out0,$out0,$inpperm
- le?vperm $out1,$out1,$out1,$inpperm
- stvx_u $out0,$x00,$out
- le?vperm $out2,$out2,$out2,$inpperm
- stvx_u $out1,$x10,$out
- le?vperm $out3,$out3,$out3,$inpperm
- stvx_u $out2,$x20,$out
- le?vperm $out4,$out4,$out4,$inpperm
- stvx_u $out3,$x30,$out
- le?vperm $out5,$out5,$out5,$inpperm
- stvx_u $out4,$x40,$out
- le?vperm $out6,$out6,$out6,$inpperm
- stvx_u $out5,$x50,$out
- le?vperm $out7,$out7,$out7,$inpperm
- stvx_u $out6,$x60,$out
- stvx_u $out7,$x70,$out
- addi $out,$out,0x80
- b Lctr32_enc8x_done
-
-.align 5
-Lctr32_enc8x_seven:
- vcipherlast $out0,$out0,$in1
- vcipherlast $out1,$out1,$in2
- vcipherlast $out2,$out2,$in3
- vcipherlast $out3,$out3,$in4
- vcipherlast $out4,$out4,$in5
- vcipherlast $out5,$out5,$in6
- vcipherlast $out6,$out6,$in7
-
- le?vperm $out0,$out0,$out0,$inpperm
- le?vperm $out1,$out1,$out1,$inpperm
- stvx_u $out0,$x00,$out
- le?vperm $out2,$out2,$out2,$inpperm
- stvx_u $out1,$x10,$out
- le?vperm $out3,$out3,$out3,$inpperm
- stvx_u $out2,$x20,$out
- le?vperm $out4,$out4,$out4,$inpperm
- stvx_u $out3,$x30,$out
- le?vperm $out5,$out5,$out5,$inpperm
- stvx_u $out4,$x40,$out
- le?vperm $out6,$out6,$out6,$inpperm
- stvx_u $out5,$x50,$out
- stvx_u $out6,$x60,$out
- addi $out,$out,0x70
- b Lctr32_enc8x_done
-
-.align 5
-Lctr32_enc8x_six:
- vcipherlast $out0,$out0,$in2
- vcipherlast $out1,$out1,$in3
- vcipherlast $out2,$out2,$in4
- vcipherlast $out3,$out3,$in5
- vcipherlast $out4,$out4,$in6
- vcipherlast $out5,$out5,$in7
-
- le?vperm $out0,$out0,$out0,$inpperm
- le?vperm $out1,$out1,$out1,$inpperm
- stvx_u $out0,$x00,$out
- le?vperm $out2,$out2,$out2,$inpperm
- stvx_u $out1,$x10,$out
- le?vperm $out3,$out3,$out3,$inpperm
- stvx_u $out2,$x20,$out
- le?vperm $out4,$out4,$out4,$inpperm
- stvx_u $out3,$x30,$out
- le?vperm $out5,$out5,$out5,$inpperm
- stvx_u $out4,$x40,$out
- stvx_u $out5,$x50,$out
- addi $out,$out,0x60
- b Lctr32_enc8x_done
-
-.align 5
-Lctr32_enc8x_five:
- vcipherlast $out0,$out0,$in3
- vcipherlast $out1,$out1,$in4
- vcipherlast $out2,$out2,$in5
- vcipherlast $out3,$out3,$in6
- vcipherlast $out4,$out4,$in7
-
- le?vperm $out0,$out0,$out0,$inpperm
- le?vperm $out1,$out1,$out1,$inpperm
- stvx_u $out0,$x00,$out
- le?vperm $out2,$out2,$out2,$inpperm
- stvx_u $out1,$x10,$out
- le?vperm $out3,$out3,$out3,$inpperm
- stvx_u $out2,$x20,$out
- le?vperm $out4,$out4,$out4,$inpperm
- stvx_u $out3,$x30,$out
- stvx_u $out4,$x40,$out
- addi $out,$out,0x50
- b Lctr32_enc8x_done
-
-.align 5
-Lctr32_enc8x_four:
- vcipherlast $out0,$out0,$in4
- vcipherlast $out1,$out1,$in5
- vcipherlast $out2,$out2,$in6
- vcipherlast $out3,$out3,$in7
-
- le?vperm $out0,$out0,$out0,$inpperm
- le?vperm $out1,$out1,$out1,$inpperm
- stvx_u $out0,$x00,$out
- le?vperm $out2,$out2,$out2,$inpperm
- stvx_u $out1,$x10,$out
- le?vperm $out3,$out3,$out3,$inpperm
- stvx_u $out2,$x20,$out
- stvx_u $out3,$x30,$out
- addi $out,$out,0x40
- b Lctr32_enc8x_done
-
-.align 5
-Lctr32_enc8x_three:
- vcipherlast $out0,$out0,$in5
- vcipherlast $out1,$out1,$in6
- vcipherlast $out2,$out2,$in7
-
- le?vperm $out0,$out0,$out0,$inpperm
- le?vperm $out1,$out1,$out1,$inpperm
- stvx_u $out0,$x00,$out
- le?vperm $out2,$out2,$out2,$inpperm
- stvx_u $out1,$x10,$out
- stvx_u $out2,$x20,$out
- addi $out,$out,0x30
- b Lctr32_enc8x_done
-
-.align 5
-Lctr32_enc8x_two:
- vcipherlast $out0,$out0,$in6
- vcipherlast $out1,$out1,$in7
-
- le?vperm $out0,$out0,$out0,$inpperm
- le?vperm $out1,$out1,$out1,$inpperm
- stvx_u $out0,$x00,$out
- stvx_u $out1,$x10,$out
- addi $out,$out,0x20
- b Lctr32_enc8x_done
-
-.align 5
-Lctr32_enc8x_one:
- vcipherlast $out0,$out0,$in7
-
- le?vperm $out0,$out0,$out0,$inpperm
- stvx_u $out0,0,$out
- addi $out,$out,0x10
-
-Lctr32_enc8x_done:
- li r10,`$FRAME+15`
- li r11,`$FRAME+31`
- stvx $inpperm,r10,$sp # wipe copies of round keys
- addi r10,r10,32
- stvx $inpperm,r11,$sp
- addi r11,r11,32
- stvx $inpperm,r10,$sp
- addi r10,r10,32
- stvx $inpperm,r11,$sp
- addi r11,r11,32
- stvx $inpperm,r10,$sp
- addi r10,r10,32
- stvx $inpperm,r11,$sp
- addi r11,r11,32
- stvx $inpperm,r10,$sp
- addi r10,r10,32
- stvx $inpperm,r11,$sp
- addi r11,r11,32
-
- mtspr 256,$vrsave
- lvx v20,r10,$sp # ABI says so
- addi r10,r10,32
- lvx v21,r11,$sp
- addi r11,r11,32
- lvx v22,r10,$sp
- addi r10,r10,32
- lvx v23,r11,$sp
- addi r11,r11,32
- lvx v24,r10,$sp
- addi r10,r10,32
- lvx v25,r11,$sp
- addi r11,r11,32
- lvx v26,r10,$sp
- addi r10,r10,32
- lvx v27,r11,$sp
- addi r11,r11,32
- lvx v28,r10,$sp
- addi r10,r10,32
- lvx v29,r11,$sp
- addi r11,r11,32
- lvx v30,r10,$sp
- lvx v31,r11,$sp
- $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
- $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
- $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
- $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
- $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
- $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
- addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
- blr
- .long 0
- .byte 0,12,0x14,0,0x80,6,6,0
- .long 0
-.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
-___
-}} }}}
-
-#########################################################################
-{{{ # XTS procedures #
-# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
-# const AES_KEY *key1, const AES_KEY *key2, #
-# [const] unsigned char iv[16]); #
-# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
-# input tweak value is assumed to be encrypted already, and last tweak #
-# value, one suitable for consecutive call on same chunk of data, is #
-# written back to original buffer. In addition, in "tweak chaining" #
-# mode only complete input blocks are processed. #
-
-my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
-my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
-my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
-my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
-my $taillen = $key2;
-
- ($inp,$idx) = ($idx,$inp); # reassign
-
-$code.=<<___;
-.globl .${prefix}_xts_encrypt
- mr $inp,r3 # reassign
- li r3,-1
- ${UCMP}i $len,16
- bltlr-
-
- lis r0,0xfff0
- mfspr r12,256 # save vrsave
- li r11,0
- mtspr 256,r0
-
- vspltisb $seven,0x07 # 0x070707..07
- le?lvsl $leperm,r11,r11
- le?vspltisb $tmp,0x0f
- le?vxor $leperm,$leperm,$seven
-
- li $idx,15
- lvx $tweak,0,$ivp # load [unaligned] iv
- lvsl $inpperm,0,$ivp
- lvx $inptail,$idx,$ivp
- le?vxor $inpperm,$inpperm,$tmp
- vperm $tweak,$tweak,$inptail,$inpperm
-
- neg r11,$inp
- lvsr $inpperm,0,r11 # prepare for unaligned load
- lvx $inout,0,$inp
- addi $inp,$inp,15 # 15 is not typo
- le?vxor $inpperm,$inpperm,$tmp
-
- ${UCMP}i $key2,0 # key2==NULL?
- beq Lxts_enc_no_key2
-
- ?lvsl $keyperm,0,$key2 # prepare for unaligned key
- lwz $rounds,240($key2)
- srwi $rounds,$rounds,1
- subi $rounds,$rounds,1
- li $idx,16
-
- lvx $rndkey0,0,$key2
- lvx $rndkey1,$idx,$key2
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $tweak,$tweak,$rndkey0
- lvx $rndkey0,$idx,$key2
- addi $idx,$idx,16
- mtctr $rounds
-
-Ltweak_xts_enc:
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vcipher $tweak,$tweak,$rndkey1
- lvx $rndkey1,$idx,$key2
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vcipher $tweak,$tweak,$rndkey0
- lvx $rndkey0,$idx,$key2
- addi $idx,$idx,16
- bdnz Ltweak_xts_enc
-
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vcipher $tweak,$tweak,$rndkey1
- lvx $rndkey1,$idx,$key2
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vcipherlast $tweak,$tweak,$rndkey0
-
- li $ivp,0 # don't chain the tweak
- b Lxts_enc
-
-Lxts_enc_no_key2:
- li $idx,-16
- and $len,$len,$idx # in "tweak chaining"
- # mode only complete
- # blocks are processed
-Lxts_enc:
- lvx $inptail,0,$inp
- addi $inp,$inp,16
-
- ?lvsl $keyperm,0,$key1 # prepare for unaligned key
- lwz $rounds,240($key1)
- srwi $rounds,$rounds,1
- subi $rounds,$rounds,1
- li $idx,16
-
- vslb $eighty7,$seven,$seven # 0x808080..80
- vor $eighty7,$eighty7,$seven # 0x878787..87
- vspltisb $tmp,1 # 0x010101..01
- vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
-
- ${UCMP}i $len,96
- bge _aesp8_xts_encrypt6x
-
- andi. $taillen,$len,15
- subic r0,$len,32
- subi $taillen,$taillen,16
- subfe r0,r0,r0
- and r0,r0,$taillen
- add $inp,$inp,r0
-
- lvx $rndkey0,0,$key1
- lvx $rndkey1,$idx,$key1
- addi $idx,$idx,16
- vperm $inout,$inout,$inptail,$inpperm
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $inout,$inout,$tweak
- vxor $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key1
- addi $idx,$idx,16
- mtctr $rounds
- b Loop_xts_enc
-
-.align 5
-Loop_xts_enc:
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vcipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key1
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vcipher $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key1
- addi $idx,$idx,16
- bdnz Loop_xts_enc
-
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vcipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key1
- li $idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $rndkey0,$rndkey0,$tweak
- vcipherlast $output,$inout,$rndkey0
-
- le?vperm $tmp,$output,$output,$leperm
- be?nop
- le?stvx_u $tmp,0,$out
- be?stvx_u $output,0,$out
- addi $out,$out,16
-
- subic. $len,$len,16
- beq Lxts_enc_done
-
- vmr $inout,$inptail
- lvx $inptail,0,$inp
- addi $inp,$inp,16
- lvx $rndkey0,0,$key1
- lvx $rndkey1,$idx,$key1
- addi $idx,$idx,16
-
- subic r0,$len,32
- subfe r0,r0,r0
- and r0,r0,$taillen
- add $inp,$inp,r0
-
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- vand $tmp,$tmp,$eighty7
- vxor $tweak,$tweak,$tmp
-
- vperm $inout,$inout,$inptail,$inpperm
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $inout,$inout,$tweak
- vxor $output,$output,$rndkey0 # just in case $len<16
- vxor $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key1
- addi $idx,$idx,16
-
- mtctr $rounds
- ${UCMP}i $len,16
- bge Loop_xts_enc
-
- vxor $output,$output,$tweak
- lvsr $inpperm,0,$len # $inpperm is no longer needed
- vxor $inptail,$inptail,$inptail # $inptail is no longer needed
- vspltisb $tmp,-1
- vperm $inptail,$inptail,$tmp,$inpperm
- vsel $inout,$inout,$output,$inptail
-
- subi r11,$out,17
- subi $out,$out,16
- mtctr $len
- li $len,16
-Loop_xts_enc_steal:
- lbzu r0,1(r11)
- stb r0,16(r11)
- bdnz Loop_xts_enc_steal
-
- mtctr $rounds
- b Loop_xts_enc # one more time...
-
-Lxts_enc_done:
- ${UCMP}i $ivp,0
- beq Lxts_enc_ret
-
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- vand $tmp,$tmp,$eighty7
- vxor $tweak,$tweak,$tmp
-
- le?vperm $tweak,$tweak,$tweak,$leperm
- stvx_u $tweak,0,$ivp
-
-Lxts_enc_ret:
- mtspr 256,r12 # restore vrsave
- li r3,0
- blr
- .long 0
- .byte 0,12,0x04,0,0x80,6,6,0
- .long 0
-.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
-
-.globl .${prefix}_xts_decrypt
- mr $inp,r3 # reassign
- li r3,-1
- ${UCMP}i $len,16
- bltlr-
-
- lis r0,0xfff8
- mfspr r12,256 # save vrsave
- li r11,0
- mtspr 256,r0
-
- andi. r0,$len,15
- neg r0,r0
- andi. r0,r0,16
- sub $len,$len,r0
-
- vspltisb $seven,0x07 # 0x070707..07
- le?lvsl $leperm,r11,r11
- le?vspltisb $tmp,0x0f
- le?vxor $leperm,$leperm,$seven
-
- li $idx,15
- lvx $tweak,0,$ivp # load [unaligned] iv
- lvsl $inpperm,0,$ivp
- lvx $inptail,$idx,$ivp
- le?vxor $inpperm,$inpperm,$tmp
- vperm $tweak,$tweak,$inptail,$inpperm
-
- neg r11,$inp
- lvsr $inpperm,0,r11 # prepare for unaligned load
- lvx $inout,0,$inp
- addi $inp,$inp,15 # 15 is not typo
- le?vxor $inpperm,$inpperm,$tmp
-
- ${UCMP}i $key2,0 # key2==NULL?
- beq Lxts_dec_no_key2
-
- ?lvsl $keyperm,0,$key2 # prepare for unaligned key
- lwz $rounds,240($key2)
- srwi $rounds,$rounds,1
- subi $rounds,$rounds,1
- li $idx,16
-
- lvx $rndkey0,0,$key2
- lvx $rndkey1,$idx,$key2
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $tweak,$tweak,$rndkey0
- lvx $rndkey0,$idx,$key2
- addi $idx,$idx,16
- mtctr $rounds
-
-Ltweak_xts_dec:
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vcipher $tweak,$tweak,$rndkey1
- lvx $rndkey1,$idx,$key2
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vcipher $tweak,$tweak,$rndkey0
- lvx $rndkey0,$idx,$key2
- addi $idx,$idx,16
- bdnz Ltweak_xts_dec
-
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vcipher $tweak,$tweak,$rndkey1
- lvx $rndkey1,$idx,$key2
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vcipherlast $tweak,$tweak,$rndkey0
-
- li $ivp,0 # don't chain the tweak
- b Lxts_dec
-
-Lxts_dec_no_key2:
- neg $idx,$len
- andi. $idx,$idx,15
- add $len,$len,$idx # in "tweak chaining"
- # mode only complete
- # blocks are processed
-Lxts_dec:
- lvx $inptail,0,$inp
- addi $inp,$inp,16
-
- ?lvsl $keyperm,0,$key1 # prepare for unaligned key
- lwz $rounds,240($key1)
- srwi $rounds,$rounds,1
- subi $rounds,$rounds,1
- li $idx,16
-
- vslb $eighty7,$seven,$seven # 0x808080..80
- vor $eighty7,$eighty7,$seven # 0x878787..87
- vspltisb $tmp,1 # 0x010101..01
- vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
-
- ${UCMP}i $len,96
- bge _aesp8_xts_decrypt6x
-
- lvx $rndkey0,0,$key1
- lvx $rndkey1,$idx,$key1
- addi $idx,$idx,16
- vperm $inout,$inout,$inptail,$inpperm
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $inout,$inout,$tweak
- vxor $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key1
- addi $idx,$idx,16
- mtctr $rounds
-
- ${UCMP}i $len,16
- blt Ltail_xts_dec
- be?b Loop_xts_dec
-
-.align 5
-Loop_xts_dec:
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vncipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key1
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vncipher $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key1
- addi $idx,$idx,16
- bdnz Loop_xts_dec
-
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vncipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key1
- li $idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $rndkey0,$rndkey0,$tweak
- vncipherlast $output,$inout,$rndkey0
-
- le?vperm $tmp,$output,$output,$leperm
- be?nop
- le?stvx_u $tmp,0,$out
- be?stvx_u $output,0,$out
- addi $out,$out,16
-
- subic. $len,$len,16
- beq Lxts_dec_done
-
- vmr $inout,$inptail
- lvx $inptail,0,$inp
- addi $inp,$inp,16
- lvx $rndkey0,0,$key1
- lvx $rndkey1,$idx,$key1
- addi $idx,$idx,16
-
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- vand $tmp,$tmp,$eighty7
- vxor $tweak,$tweak,$tmp
-
- vperm $inout,$inout,$inptail,$inpperm
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $inout,$inout,$tweak
- vxor $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key1
- addi $idx,$idx,16
-
- mtctr $rounds
- ${UCMP}i $len,16
- bge Loop_xts_dec
-
-Ltail_xts_dec:
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak1,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- vand $tmp,$tmp,$eighty7
- vxor $tweak1,$tweak1,$tmp
-
- subi $inp,$inp,16
- add $inp,$inp,$len
-
- vxor $inout,$inout,$tweak # :-(
- vxor $inout,$inout,$tweak1 # :-)
-
-Loop_xts_dec_short:
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vncipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key1
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vncipher $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key1
- addi $idx,$idx,16
- bdnz Loop_xts_dec_short
-
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vncipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key1
- li $idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $rndkey0,$rndkey0,$tweak1
- vncipherlast $output,$inout,$rndkey0
-
- le?vperm $tmp,$output,$output,$leperm
- be?nop
- le?stvx_u $tmp,0,$out
- be?stvx_u $output,0,$out
-
- vmr $inout,$inptail
- lvx $inptail,0,$inp
- #addi $inp,$inp,16
- lvx $rndkey0,0,$key1
- lvx $rndkey1,$idx,$key1
- addi $idx,$idx,16
- vperm $inout,$inout,$inptail,$inpperm
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
-
- lvsr $inpperm,0,$len # $inpperm is no longer needed
- vxor $inptail,$inptail,$inptail # $inptail is no longer needed
- vspltisb $tmp,-1
- vperm $inptail,$inptail,$tmp,$inpperm
- vsel $inout,$inout,$output,$inptail
-
- vxor $rndkey0,$rndkey0,$tweak
- vxor $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key1
- addi $idx,$idx,16
-
- subi r11,$out,1
- mtctr $len
- li $len,16
-Loop_xts_dec_steal:
- lbzu r0,1(r11)
- stb r0,16(r11)
- bdnz Loop_xts_dec_steal
-
- mtctr $rounds
- b Loop_xts_dec # one more time...
-
-Lxts_dec_done:
- ${UCMP}i $ivp,0
- beq Lxts_dec_ret
-
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- vand $tmp,$tmp,$eighty7
- vxor $tweak,$tweak,$tmp
-
- le?vperm $tweak,$tweak,$tweak,$leperm
- stvx_u $tweak,0,$ivp
-
-Lxts_dec_ret:
- mtspr 256,r12 # restore vrsave
- li r3,0
- blr
- .long 0
- .byte 0,12,0x04,0,0x80,6,6,0
- .long 0
-.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
-___
-#########################################################################
-{{ # Optimized XTS procedures #
-my $key_=$key2;
-my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
- $x00=0 if ($flavour =~ /osx/);
-my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
-my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
-my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
-my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
- # v26-v31 last 6 round keys
-my ($keyperm)=($out0); # aliases with "caller", redundant assignment
-my $taillen=$x70;
-
-$code.=<<___;
-.align 5
-_aesp8_xts_encrypt6x:
- $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
- mflr r11
- li r7,`$FRAME+8*16+15`
- li r3,`$FRAME+8*16+31`
- $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
- stvx v20,r7,$sp # ABI says so
- addi r7,r7,32
- stvx v21,r3,$sp
- addi r3,r3,32
- stvx v22,r7,$sp
- addi r7,r7,32
- stvx v23,r3,$sp
- addi r3,r3,32
- stvx v24,r7,$sp
- addi r7,r7,32
- stvx v25,r3,$sp
- addi r3,r3,32
- stvx v26,r7,$sp
- addi r7,r7,32
- stvx v27,r3,$sp
- addi r3,r3,32
- stvx v28,r7,$sp
- addi r7,r7,32
- stvx v29,r3,$sp
- addi r3,r3,32
- stvx v30,r7,$sp
- stvx v31,r3,$sp
- li r0,-1
- stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
- li $x10,0x10
- $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
- li $x20,0x20
- $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
- li $x30,0x30
- $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
- li $x40,0x40
- $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
- li $x50,0x50
- $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
- li $x60,0x60
- $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
- li $x70,0x70
- mtspr 256,r0
-
- xxlor 2, 32+$eighty7, 32+$eighty7
- vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87
- xxlor 1, 32+$eighty7, 32+$eighty7
-
- # Load XOR Lconsts.
- mr $x70, r6
- bl Lconsts
- lxvw4x 0, $x40, r6 # load XOR contents
- mr r6, $x70
- li $x70,0x70
-
- subi $rounds,$rounds,3 # -4 in total
-
- lvx $rndkey0,$x00,$key1 # load key schedule
- lvx v30,$x10,$key1
- addi $key1,$key1,0x20
- lvx v31,$x00,$key1
- ?vperm $rndkey0,$rndkey0,v30,$keyperm
- addi $key_,$sp,$FRAME+15
- mtctr $rounds
-
-Load_xts_enc_key:
- ?vperm v24,v30,v31,$keyperm
- lvx v30,$x10,$key1
- addi $key1,$key1,0x20
- stvx v24,$x00,$key_ # off-load round[1]
- ?vperm v25,v31,v30,$keyperm
- lvx v31,$x00,$key1
- stvx v25,$x10,$key_ # off-load round[2]
- addi $key_,$key_,0x20
- bdnz Load_xts_enc_key
-
- lvx v26,$x10,$key1
- ?vperm v24,v30,v31,$keyperm
- lvx v27,$x20,$key1
- stvx v24,$x00,$key_ # off-load round[3]
- ?vperm v25,v31,v26,$keyperm
- lvx v28,$x30,$key1
- stvx v25,$x10,$key_ # off-load round[4]
- addi $key_,$sp,$FRAME+15 # rewind $key_
- ?vperm v26,v26,v27,$keyperm
- lvx v29,$x40,$key1
- ?vperm v27,v27,v28,$keyperm
- lvx v30,$x50,$key1
- ?vperm v28,v28,v29,$keyperm
- lvx v31,$x60,$key1
- ?vperm v29,v29,v30,$keyperm
- lvx $twk5,$x70,$key1 # borrow $twk5
- ?vperm v30,v30,v31,$keyperm
- lvx v24,$x00,$key_ # pre-load round[1]
- ?vperm v31,v31,$twk5,$keyperm
- lvx v25,$x10,$key_ # pre-load round[2]
-
- # Switch to use the following codes with 0x010101..87 to generate tweak.
- # eighty7 = 0x010101..87
- # vsrab tmp, tweak, seven # next tweak value, right shift 7 bits
- # vand tmp, tmp, eighty7 # last byte with carry
- # vaddubm tweak, tweak, tweak # left shift 1 bit (x2)
- # xxlor vsx, 0, 0
- # vpermxor tweak, tweak, tmp, vsx
-
- vperm $in0,$inout,$inptail,$inpperm
- subi $inp,$inp,31 # undo "caller"
- vxor $twk0,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- vand $tmp,$tmp,$eighty7
- vxor $out0,$in0,$twk0
- xxlor 32+$in1, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in1
-
- lvx_u $in1,$x10,$inp
- vxor $twk1,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- le?vperm $in1,$in1,$in1,$leperm
- vand $tmp,$tmp,$eighty7
- vxor $out1,$in1,$twk1
- xxlor 32+$in2, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in2
-
- lvx_u $in2,$x20,$inp
- andi. $taillen,$len,15
- vxor $twk2,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- le?vperm $in2,$in2,$in2,$leperm
- vand $tmp,$tmp,$eighty7
- vxor $out2,$in2,$twk2
- xxlor 32+$in3, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in3
-
- lvx_u $in3,$x30,$inp
- sub $len,$len,$taillen
- vxor $twk3,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- le?vperm $in3,$in3,$in3,$leperm
- vand $tmp,$tmp,$eighty7
- vxor $out3,$in3,$twk3
- xxlor 32+$in4, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in4
-
- lvx_u $in4,$x40,$inp
- subi $len,$len,0x60
- vxor $twk4,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- le?vperm $in4,$in4,$in4,$leperm
- vand $tmp,$tmp,$eighty7
- vxor $out4,$in4,$twk4
- xxlor 32+$in5, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in5
-
- lvx_u $in5,$x50,$inp
- addi $inp,$inp,0x60
- vxor $twk5,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- le?vperm $in5,$in5,$in5,$leperm
- vand $tmp,$tmp,$eighty7
- vxor $out5,$in5,$twk5
- xxlor 32+$in0, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in0
-
- vxor v31,v31,$rndkey0
- mtctr $rounds
- b Loop_xts_enc6x
-
-.align 5
-Loop_xts_enc6x:
- vcipher $out0,$out0,v24
- vcipher $out1,$out1,v24
- vcipher $out2,$out2,v24
- vcipher $out3,$out3,v24
- vcipher $out4,$out4,v24
- vcipher $out5,$out5,v24
- lvx v24,$x20,$key_ # round[3]
- addi $key_,$key_,0x20
-
- vcipher $out0,$out0,v25
- vcipher $out1,$out1,v25
- vcipher $out2,$out2,v25
- vcipher $out3,$out3,v25
- vcipher $out4,$out4,v25
- vcipher $out5,$out5,v25
- lvx v25,$x10,$key_ # round[4]
- bdnz Loop_xts_enc6x
-
- xxlor 32+$eighty7, 1, 1 # 0x010101..87
-
- subic $len,$len,96 # $len-=96
- vxor $in0,$twk0,v31 # xor with last round key
- vcipher $out0,$out0,v24
- vcipher $out1,$out1,v24
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk0,$tweak,$rndkey0
- vaddubm $tweak,$tweak,$tweak
- vcipher $out2,$out2,v24
- vcipher $out3,$out3,v24
- vcipher $out4,$out4,v24
- vcipher $out5,$out5,v24
-
- subfe. r0,r0,r0 # borrow?-1:0
- vand $tmp,$tmp,$eighty7
- vcipher $out0,$out0,v25
- vcipher $out1,$out1,v25
- xxlor 32+$in1, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in1
- vcipher $out2,$out2,v25
- vcipher $out3,$out3,v25
- vxor $in1,$twk1,v31
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk1,$tweak,$rndkey0
- vcipher $out4,$out4,v25
- vcipher $out5,$out5,v25
-
- and r0,r0,$len
- vaddubm $tweak,$tweak,$tweak
- vcipher $out0,$out0,v26
- vcipher $out1,$out1,v26
- vand $tmp,$tmp,$eighty7
- vcipher $out2,$out2,v26
- vcipher $out3,$out3,v26
- xxlor 32+$in2, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in2
- vcipher $out4,$out4,v26
- vcipher $out5,$out5,v26
-
- add $inp,$inp,r0 # $inp is adjusted in such
- # way that at exit from the
- # loop inX-in5 are loaded
- # with last "words"
- vxor $in2,$twk2,v31
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk2,$tweak,$rndkey0
- vaddubm $tweak,$tweak,$tweak
- vcipher $out0,$out0,v27
- vcipher $out1,$out1,v27
- vcipher $out2,$out2,v27
- vcipher $out3,$out3,v27
- vand $tmp,$tmp,$eighty7
- vcipher $out4,$out4,v27
- vcipher $out5,$out5,v27
-
- addi $key_,$sp,$FRAME+15 # rewind $key_
- xxlor 32+$in3, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in3
- vcipher $out0,$out0,v28
- vcipher $out1,$out1,v28
- vxor $in3,$twk3,v31
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk3,$tweak,$rndkey0
- vcipher $out2,$out2,v28
- vcipher $out3,$out3,v28
- vaddubm $tweak,$tweak,$tweak
- vcipher $out4,$out4,v28
- vcipher $out5,$out5,v28
- lvx v24,$x00,$key_ # re-pre-load round[1]
- vand $tmp,$tmp,$eighty7
-
- vcipher $out0,$out0,v29
- vcipher $out1,$out1,v29
- xxlor 32+$in4, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in4
- vcipher $out2,$out2,v29
- vcipher $out3,$out3,v29
- vxor $in4,$twk4,v31
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk4,$tweak,$rndkey0
- vcipher $out4,$out4,v29
- vcipher $out5,$out5,v29
- lvx v25,$x10,$key_ # re-pre-load round[2]
- vaddubm $tweak,$tweak,$tweak
-
- vcipher $out0,$out0,v30
- vcipher $out1,$out1,v30
- vand $tmp,$tmp,$eighty7
- vcipher $out2,$out2,v30
- vcipher $out3,$out3,v30
- xxlor 32+$in5, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in5
- vcipher $out4,$out4,v30
- vcipher $out5,$out5,v30
- vxor $in5,$twk5,v31
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk5,$tweak,$rndkey0
-
- vcipherlast $out0,$out0,$in0
- lvx_u $in0,$x00,$inp # load next input block
- vaddubm $tweak,$tweak,$tweak
- vcipherlast $out1,$out1,$in1
- lvx_u $in1,$x10,$inp
- vcipherlast $out2,$out2,$in2
- le?vperm $in0,$in0,$in0,$leperm
- lvx_u $in2,$x20,$inp
- vand $tmp,$tmp,$eighty7
- vcipherlast $out3,$out3,$in3
- le?vperm $in1,$in1,$in1,$leperm
- lvx_u $in3,$x30,$inp
- vcipherlast $out4,$out4,$in4
- le?vperm $in2,$in2,$in2,$leperm
- lvx_u $in4,$x40,$inp
- xxlor 10, 32+$in0, 32+$in0
- xxlor 32+$in0, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in0
- xxlor 32+$in0, 10, 10
- vcipherlast $tmp,$out5,$in5 # last block might be needed
- # in stealing mode
- le?vperm $in3,$in3,$in3,$leperm
- lvx_u $in5,$x50,$inp
- addi $inp,$inp,0x60
- le?vperm $in4,$in4,$in4,$leperm
- le?vperm $in5,$in5,$in5,$leperm
-
- le?vperm $out0,$out0,$out0,$leperm
- le?vperm $out1,$out1,$out1,$leperm
- stvx_u $out0,$x00,$out # store output
- vxor $out0,$in0,$twk0
- le?vperm $out2,$out2,$out2,$leperm
- stvx_u $out1,$x10,$out
- vxor $out1,$in1,$twk1
- le?vperm $out3,$out3,$out3,$leperm
- stvx_u $out2,$x20,$out
- vxor $out2,$in2,$twk2
- le?vperm $out4,$out4,$out4,$leperm
- stvx_u $out3,$x30,$out
- vxor $out3,$in3,$twk3
- le?vperm $out5,$tmp,$tmp,$leperm
- stvx_u $out4,$x40,$out
- vxor $out4,$in4,$twk4
- le?stvx_u $out5,$x50,$out
- be?stvx_u $tmp, $x50,$out
- vxor $out5,$in5,$twk5
- addi $out,$out,0x60
-
- mtctr $rounds
- beq Loop_xts_enc6x # did $len-=96 borrow?
-
- xxlor 32+$eighty7, 2, 2 # 0x010101..87
-
- addic. $len,$len,0x60
- beq Lxts_enc6x_zero
- cmpwi $len,0x20
- blt Lxts_enc6x_one
- nop
- beq Lxts_enc6x_two
- cmpwi $len,0x40
- blt Lxts_enc6x_three
- nop
- beq Lxts_enc6x_four
-
-Lxts_enc6x_five:
- vxor $out0,$in1,$twk0
- vxor $out1,$in2,$twk1
- vxor $out2,$in3,$twk2
- vxor $out3,$in4,$twk3
- vxor $out4,$in5,$twk4
-
- bl _aesp8_xts_enc5x
-
- le?vperm $out0,$out0,$out0,$leperm
- vmr $twk0,$twk5 # unused tweak
- le?vperm $out1,$out1,$out1,$leperm
- stvx_u $out0,$x00,$out # store output
- le?vperm $out2,$out2,$out2,$leperm
- stvx_u $out1,$x10,$out
- le?vperm $out3,$out3,$out3,$leperm
- stvx_u $out2,$x20,$out
- vxor $tmp,$out4,$twk5 # last block prep for stealing
- le?vperm $out4,$out4,$out4,$leperm
- stvx_u $out3,$x30,$out
- stvx_u $out4,$x40,$out
- addi $out,$out,0x50
- bne Lxts_enc6x_steal
- b Lxts_enc6x_done
-
-.align 4
-Lxts_enc6x_four:
- vxor $out0,$in2,$twk0
- vxor $out1,$in3,$twk1
- vxor $out2,$in4,$twk2
- vxor $out3,$in5,$twk3
- vxor $out4,$out4,$out4
-
- bl _aesp8_xts_enc5x
-
- le?vperm $out0,$out0,$out0,$leperm
- vmr $twk0,$twk4 # unused tweak
- le?vperm $out1,$out1,$out1,$leperm
- stvx_u $out0,$x00,$out # store output
- le?vperm $out2,$out2,$out2,$leperm
- stvx_u $out1,$x10,$out
- vxor $tmp,$out3,$twk4 # last block prep for stealing
- le?vperm $out3,$out3,$out3,$leperm
- stvx_u $out2,$x20,$out
- stvx_u $out3,$x30,$out
- addi $out,$out,0x40
- bne Lxts_enc6x_steal
- b Lxts_enc6x_done
-
-.align 4
-Lxts_enc6x_three:
- vxor $out0,$in3,$twk0
- vxor $out1,$in4,$twk1
- vxor $out2,$in5,$twk2
- vxor $out3,$out3,$out3
- vxor $out4,$out4,$out4
-
- bl _aesp8_xts_enc5x
-
- le?vperm $out0,$out0,$out0,$leperm
- vmr $twk0,$twk3 # unused tweak
- le?vperm $out1,$out1,$out1,$leperm
- stvx_u $out0,$x00,$out # store output
- vxor $tmp,$out2,$twk3 # last block prep for stealing
- le?vperm $out2,$out2,$out2,$leperm
- stvx_u $out1,$x10,$out
- stvx_u $out2,$x20,$out
- addi $out,$out,0x30
- bne Lxts_enc6x_steal
- b Lxts_enc6x_done
-
-.align 4
-Lxts_enc6x_two:
- vxor $out0,$in4,$twk0
- vxor $out1,$in5,$twk1
- vxor $out2,$out2,$out2
- vxor $out3,$out3,$out3
- vxor $out4,$out4,$out4
-
- bl _aesp8_xts_enc5x
-
- le?vperm $out0,$out0,$out0,$leperm
- vmr $twk0,$twk2 # unused tweak
- vxor $tmp,$out1,$twk2 # last block prep for stealing
- le?vperm $out1,$out1,$out1,$leperm
- stvx_u $out0,$x00,$out # store output
- stvx_u $out1,$x10,$out
- addi $out,$out,0x20
- bne Lxts_enc6x_steal
- b Lxts_enc6x_done
-
-.align 4
-Lxts_enc6x_one:
- vxor $out0,$in5,$twk0
- nop
-Loop_xts_enc1x:
- vcipher $out0,$out0,v24
- lvx v24,$x20,$key_ # round[3]
- addi $key_,$key_,0x20
-
- vcipher $out0,$out0,v25
- lvx v25,$x10,$key_ # round[4]
- bdnz Loop_xts_enc1x
-
- add $inp,$inp,$taillen
- cmpwi $taillen,0
- vcipher $out0,$out0,v24
-
- subi $inp,$inp,16
- vcipher $out0,$out0,v25
-
- lvsr $inpperm,0,$taillen
- vcipher $out0,$out0,v26
-
- lvx_u $in0,0,$inp
- vcipher $out0,$out0,v27
-
- addi $key_,$sp,$FRAME+15 # rewind $key_
- vcipher $out0,$out0,v28
- lvx v24,$x00,$key_ # re-pre-load round[1]
-
- vcipher $out0,$out0,v29
- lvx v25,$x10,$key_ # re-pre-load round[2]
- vxor $twk0,$twk0,v31
-
- le?vperm $in0,$in0,$in0,$leperm
- vcipher $out0,$out0,v30
-
- vperm $in0,$in0,$in0,$inpperm
- vcipherlast $out0,$out0,$twk0
-
- vmr $twk0,$twk1 # unused tweak
- vxor $tmp,$out0,$twk1 # last block prep for stealing
- le?vperm $out0,$out0,$out0,$leperm
- stvx_u $out0,$x00,$out # store output
- addi $out,$out,0x10
- bne Lxts_enc6x_steal
- b Lxts_enc6x_done
-
-.align 4
-Lxts_enc6x_zero:
- cmpwi $taillen,0
- beq Lxts_enc6x_done
-
- add $inp,$inp,$taillen
- subi $inp,$inp,16
- lvx_u $in0,0,$inp
- lvsr $inpperm,0,$taillen # $in5 is no more
- le?vperm $in0,$in0,$in0,$leperm
- vperm $in0,$in0,$in0,$inpperm
- vxor $tmp,$tmp,$twk0
-Lxts_enc6x_steal:
- vxor $in0,$in0,$twk0
- vxor $out0,$out0,$out0
- vspltisb $out1,-1
- vperm $out0,$out0,$out1,$inpperm
- vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
-
- subi r30,$out,17
- subi $out,$out,16
- mtctr $taillen
-Loop_xts_enc6x_steal:
- lbzu r0,1(r30)
- stb r0,16(r30)
- bdnz Loop_xts_enc6x_steal
-
- li $taillen,0
- mtctr $rounds
- b Loop_xts_enc1x # one more time...
-
-.align 4
-Lxts_enc6x_done:
- ${UCMP}i $ivp,0
- beq Lxts_enc6x_ret
-
- vxor $tweak,$twk0,$rndkey0
- le?vperm $tweak,$tweak,$tweak,$leperm
- stvx_u $tweak,0,$ivp
-
-Lxts_enc6x_ret:
- mtlr r11
- li r10,`$FRAME+15`
- li r11,`$FRAME+31`
- stvx $seven,r10,$sp # wipe copies of round keys
- addi r10,r10,32
- stvx $seven,r11,$sp
- addi r11,r11,32
- stvx $seven,r10,$sp
- addi r10,r10,32
- stvx $seven,r11,$sp
- addi r11,r11,32
- stvx $seven,r10,$sp
- addi r10,r10,32
- stvx $seven,r11,$sp
- addi r11,r11,32
- stvx $seven,r10,$sp
- addi r10,r10,32
- stvx $seven,r11,$sp
- addi r11,r11,32
-
- mtspr 256,$vrsave
- lvx v20,r10,$sp # ABI says so
- addi r10,r10,32
- lvx v21,r11,$sp
- addi r11,r11,32
- lvx v22,r10,$sp
- addi r10,r10,32
- lvx v23,r11,$sp
- addi r11,r11,32
- lvx v24,r10,$sp
- addi r10,r10,32
- lvx v25,r11,$sp
- addi r11,r11,32
- lvx v26,r10,$sp
- addi r10,r10,32
- lvx v27,r11,$sp
- addi r11,r11,32
- lvx v28,r10,$sp
- addi r10,r10,32
- lvx v29,r11,$sp
- addi r11,r11,32
- lvx v30,r10,$sp
- lvx v31,r11,$sp
- $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
- $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
- $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
- $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
- $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
- $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
- addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
- blr
- .long 0
- .byte 0,12,0x04,1,0x80,6,6,0
- .long 0
-
-.align 5
-_aesp8_xts_enc5x:
- vcipher $out0,$out0,v24
- vcipher $out1,$out1,v24
- vcipher $out2,$out2,v24
- vcipher $out3,$out3,v24
- vcipher $out4,$out4,v24
- lvx v24,$x20,$key_ # round[3]
- addi $key_,$key_,0x20
-
- vcipher $out0,$out0,v25
- vcipher $out1,$out1,v25
- vcipher $out2,$out2,v25
- vcipher $out3,$out3,v25
- vcipher $out4,$out4,v25
- lvx v25,$x10,$key_ # round[4]
- bdnz _aesp8_xts_enc5x
-
- add $inp,$inp,$taillen
- cmpwi $taillen,0
- vcipher $out0,$out0,v24
- vcipher $out1,$out1,v24
- vcipher $out2,$out2,v24
- vcipher $out3,$out3,v24
- vcipher $out4,$out4,v24
-
- subi $inp,$inp,16
- vcipher $out0,$out0,v25
- vcipher $out1,$out1,v25
- vcipher $out2,$out2,v25
- vcipher $out3,$out3,v25
- vcipher $out4,$out4,v25
- vxor $twk0,$twk0,v31
-
- vcipher $out0,$out0,v26
- lvsr $inpperm,r0,$taillen # $in5 is no more
- vcipher $out1,$out1,v26
- vcipher $out2,$out2,v26
- vcipher $out3,$out3,v26
- vcipher $out4,$out4,v26
- vxor $in1,$twk1,v31
-
- vcipher $out0,$out0,v27
- lvx_u $in0,0,$inp
- vcipher $out1,$out1,v27
- vcipher $out2,$out2,v27
- vcipher $out3,$out3,v27
- vcipher $out4,$out4,v27
- vxor $in2,$twk2,v31
-
- addi $key_,$sp,$FRAME+15 # rewind $key_
- vcipher $out0,$out0,v28
- vcipher $out1,$out1,v28
- vcipher $out2,$out2,v28
- vcipher $out3,$out3,v28
- vcipher $out4,$out4,v28
- lvx v24,$x00,$key_ # re-pre-load round[1]
- vxor $in3,$twk3,v31
-
- vcipher $out0,$out0,v29
- le?vperm $in0,$in0,$in0,$leperm
- vcipher $out1,$out1,v29
- vcipher $out2,$out2,v29
- vcipher $out3,$out3,v29
- vcipher $out4,$out4,v29
- lvx v25,$x10,$key_ # re-pre-load round[2]
- vxor $in4,$twk4,v31
-
- vcipher $out0,$out0,v30
- vperm $in0,$in0,$in0,$inpperm
- vcipher $out1,$out1,v30
- vcipher $out2,$out2,v30
- vcipher $out3,$out3,v30
- vcipher $out4,$out4,v30
-
- vcipherlast $out0,$out0,$twk0
- vcipherlast $out1,$out1,$in1
- vcipherlast $out2,$out2,$in2
- vcipherlast $out3,$out3,$in3
- vcipherlast $out4,$out4,$in4
- blr
- .long 0
- .byte 0,12,0x14,0,0,0,0,0
-
-.align 5
-_aesp8_xts_decrypt6x:
- $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
- mflr r11
- li r7,`$FRAME+8*16+15`
- li r3,`$FRAME+8*16+31`
- $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
- stvx v20,r7,$sp # ABI says so
- addi r7,r7,32
- stvx v21,r3,$sp
- addi r3,r3,32
- stvx v22,r7,$sp
- addi r7,r7,32
- stvx v23,r3,$sp
- addi r3,r3,32
- stvx v24,r7,$sp
- addi r7,r7,32
- stvx v25,r3,$sp
- addi r3,r3,32
- stvx v26,r7,$sp
- addi r7,r7,32
- stvx v27,r3,$sp
- addi r3,r3,32
- stvx v28,r7,$sp
- addi r7,r7,32
- stvx v29,r3,$sp
- addi r3,r3,32
- stvx v30,r7,$sp
- stvx v31,r3,$sp
- li r0,-1
- stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
- li $x10,0x10
- $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
- li $x20,0x20
- $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
- li $x30,0x30
- $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
- li $x40,0x40
- $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
- li $x50,0x50
- $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
- li $x60,0x60
- $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
- li $x70,0x70
- mtspr 256,r0
-
- xxlor 2, 32+$eighty7, 32+$eighty7
- vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87
- xxlor 1, 32+$eighty7, 32+$eighty7
-
- # Load XOR Lconsts.
- mr $x70, r6
- bl Lconsts
- lxvw4x 0, $x40, r6 # load XOR contents
- mr r6, $x70
- li $x70,0x70
-
- subi $rounds,$rounds,3 # -4 in total
-
- lvx $rndkey0,$x00,$key1 # load key schedule
- lvx v30,$x10,$key1
- addi $key1,$key1,0x20
- lvx v31,$x00,$key1
- ?vperm $rndkey0,$rndkey0,v30,$keyperm
- addi $key_,$sp,$FRAME+15
- mtctr $rounds
-
-Load_xts_dec_key:
- ?vperm v24,v30,v31,$keyperm
- lvx v30,$x10,$key1
- addi $key1,$key1,0x20
- stvx v24,$x00,$key_ # off-load round[1]
- ?vperm v25,v31,v30,$keyperm
- lvx v31,$x00,$key1
- stvx v25,$x10,$key_ # off-load round[2]
- addi $key_,$key_,0x20
- bdnz Load_xts_dec_key
-
- lvx v26,$x10,$key1
- ?vperm v24,v30,v31,$keyperm
- lvx v27,$x20,$key1
- stvx v24,$x00,$key_ # off-load round[3]
- ?vperm v25,v31,v26,$keyperm
- lvx v28,$x30,$key1
- stvx v25,$x10,$key_ # off-load round[4]
- addi $key_,$sp,$FRAME+15 # rewind $key_
- ?vperm v26,v26,v27,$keyperm
- lvx v29,$x40,$key1
- ?vperm v27,v27,v28,$keyperm
- lvx v30,$x50,$key1
- ?vperm v28,v28,v29,$keyperm
- lvx v31,$x60,$key1
- ?vperm v29,v29,v30,$keyperm
- lvx $twk5,$x70,$key1 # borrow $twk5
- ?vperm v30,v30,v31,$keyperm
- lvx v24,$x00,$key_ # pre-load round[1]
- ?vperm v31,v31,$twk5,$keyperm
- lvx v25,$x10,$key_ # pre-load round[2]
-
- vperm $in0,$inout,$inptail,$inpperm
- subi $inp,$inp,31 # undo "caller"
- vxor $twk0,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- vand $tmp,$tmp,$eighty7
- vxor $out0,$in0,$twk0
- xxlor 32+$in1, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in1
-
- lvx_u $in1,$x10,$inp
- vxor $twk1,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- le?vperm $in1,$in1,$in1,$leperm
- vand $tmp,$tmp,$eighty7
- vxor $out1,$in1,$twk1
- xxlor 32+$in2, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in2
-
- lvx_u $in2,$x20,$inp
- andi. $taillen,$len,15
- vxor $twk2,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- le?vperm $in2,$in2,$in2,$leperm
- vand $tmp,$tmp,$eighty7
- vxor $out2,$in2,$twk2
- xxlor 32+$in3, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in3
-
- lvx_u $in3,$x30,$inp
- sub $len,$len,$taillen
- vxor $twk3,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- le?vperm $in3,$in3,$in3,$leperm
- vand $tmp,$tmp,$eighty7
- vxor $out3,$in3,$twk3
- xxlor 32+$in4, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in4
-
- lvx_u $in4,$x40,$inp
- subi $len,$len,0x60
- vxor $twk4,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- le?vperm $in4,$in4,$in4,$leperm
- vand $tmp,$tmp,$eighty7
- vxor $out4,$in4,$twk4
- xxlor 32+$in5, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in5
-
- lvx_u $in5,$x50,$inp
- addi $inp,$inp,0x60
- vxor $twk5,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- le?vperm $in5,$in5,$in5,$leperm
- vand $tmp,$tmp,$eighty7
- vxor $out5,$in5,$twk5
- xxlor 32+$in0, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in0
-
- vxor v31,v31,$rndkey0
- mtctr $rounds
- b Loop_xts_dec6x
-
-.align 5
-Loop_xts_dec6x:
- vncipher $out0,$out0,v24
- vncipher $out1,$out1,v24
- vncipher $out2,$out2,v24
- vncipher $out3,$out3,v24
- vncipher $out4,$out4,v24
- vncipher $out5,$out5,v24
- lvx v24,$x20,$key_ # round[3]
- addi $key_,$key_,0x20
-
- vncipher $out0,$out0,v25
- vncipher $out1,$out1,v25
- vncipher $out2,$out2,v25
- vncipher $out3,$out3,v25
- vncipher $out4,$out4,v25
- vncipher $out5,$out5,v25
- lvx v25,$x10,$key_ # round[4]
- bdnz Loop_xts_dec6x
-
- xxlor 32+$eighty7, 1, 1 # 0x010101..87
-
- subic $len,$len,96 # $len-=96
- vxor $in0,$twk0,v31 # xor with last round key
- vncipher $out0,$out0,v24
- vncipher $out1,$out1,v24
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk0,$tweak,$rndkey0
- vaddubm $tweak,$tweak,$tweak
- vncipher $out2,$out2,v24
- vncipher $out3,$out3,v24
- vncipher $out4,$out4,v24
- vncipher $out5,$out5,v24
-
- subfe. r0,r0,r0 # borrow?-1:0
- vand $tmp,$tmp,$eighty7
- vncipher $out0,$out0,v25
- vncipher $out1,$out1,v25
- xxlor 32+$in1, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in1
- vncipher $out2,$out2,v25
- vncipher $out3,$out3,v25
- vxor $in1,$twk1,v31
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk1,$tweak,$rndkey0
- vncipher $out4,$out4,v25
- vncipher $out5,$out5,v25
-
- and r0,r0,$len
- vaddubm $tweak,$tweak,$tweak
- vncipher $out0,$out0,v26
- vncipher $out1,$out1,v26
- vand $tmp,$tmp,$eighty7
- vncipher $out2,$out2,v26
- vncipher $out3,$out3,v26
- xxlor 32+$in2, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in2
- vncipher $out4,$out4,v26
- vncipher $out5,$out5,v26
-
- add $inp,$inp,r0 # $inp is adjusted in such
- # way that at exit from the
- # loop inX-in5 are loaded
- # with last "words"
- vxor $in2,$twk2,v31
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk2,$tweak,$rndkey0
- vaddubm $tweak,$tweak,$tweak
- vncipher $out0,$out0,v27
- vncipher $out1,$out1,v27
- vncipher $out2,$out2,v27
- vncipher $out3,$out3,v27
- vand $tmp,$tmp,$eighty7
- vncipher $out4,$out4,v27
- vncipher $out5,$out5,v27
-
- addi $key_,$sp,$FRAME+15 # rewind $key_
- xxlor 32+$in3, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in3
- vncipher $out0,$out0,v28
- vncipher $out1,$out1,v28
- vxor $in3,$twk3,v31
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk3,$tweak,$rndkey0
- vncipher $out2,$out2,v28
- vncipher $out3,$out3,v28
- vaddubm $tweak,$tweak,$tweak
- vncipher $out4,$out4,v28
- vncipher $out5,$out5,v28
- lvx v24,$x00,$key_ # re-pre-load round[1]
- vand $tmp,$tmp,$eighty7
-
- vncipher $out0,$out0,v29
- vncipher $out1,$out1,v29
- xxlor 32+$in4, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in4
- vncipher $out2,$out2,v29
- vncipher $out3,$out3,v29
- vxor $in4,$twk4,v31
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk4,$tweak,$rndkey0
- vncipher $out4,$out4,v29
- vncipher $out5,$out5,v29
- lvx v25,$x10,$key_ # re-pre-load round[2]
- vaddubm $tweak,$tweak,$tweak
-
- vncipher $out0,$out0,v30
- vncipher $out1,$out1,v30
- vand $tmp,$tmp,$eighty7
- vncipher $out2,$out2,v30
- vncipher $out3,$out3,v30
- xxlor 32+$in5, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in5
- vncipher $out4,$out4,v30
- vncipher $out5,$out5,v30
- vxor $in5,$twk5,v31
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk5,$tweak,$rndkey0
-
- vncipherlast $out0,$out0,$in0
- lvx_u $in0,$x00,$inp # load next input block
- vaddubm $tweak,$tweak,$tweak
- vncipherlast $out1,$out1,$in1
- lvx_u $in1,$x10,$inp
- vncipherlast $out2,$out2,$in2
- le?vperm $in0,$in0,$in0,$leperm
- lvx_u $in2,$x20,$inp
- vand $tmp,$tmp,$eighty7
- vncipherlast $out3,$out3,$in3
- le?vperm $in1,$in1,$in1,$leperm
- lvx_u $in3,$x30,$inp
- vncipherlast $out4,$out4,$in4
- le?vperm $in2,$in2,$in2,$leperm
- lvx_u $in4,$x40,$inp
- xxlor 10, 32+$in0, 32+$in0
- xxlor 32+$in0, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in0
- xxlor 32+$in0, 10, 10
- vncipherlast $out5,$out5,$in5
- le?vperm $in3,$in3,$in3,$leperm
- lvx_u $in5,$x50,$inp
- addi $inp,$inp,0x60
- le?vperm $in4,$in4,$in4,$leperm
- le?vperm $in5,$in5,$in5,$leperm
-
- le?vperm $out0,$out0,$out0,$leperm
- le?vperm $out1,$out1,$out1,$leperm
- stvx_u $out0,$x00,$out # store output
- vxor $out0,$in0,$twk0
- le?vperm $out2,$out2,$out2,$leperm
- stvx_u $out1,$x10,$out
- vxor $out1,$in1,$twk1
- le?vperm $out3,$out3,$out3,$leperm
- stvx_u $out2,$x20,$out
- vxor $out2,$in2,$twk2
- le?vperm $out4,$out4,$out4,$leperm
- stvx_u $out3,$x30,$out
- vxor $out3,$in3,$twk3
- le?vperm $out5,$out5,$out5,$leperm
- stvx_u $out4,$x40,$out
- vxor $out4,$in4,$twk4
- stvx_u $out5,$x50,$out
- vxor $out5,$in5,$twk5
- addi $out,$out,0x60
-
- mtctr $rounds
- beq Loop_xts_dec6x # did $len-=96 borrow?
-
- xxlor 32+$eighty7, 2, 2 # 0x010101..87
-
- addic. $len,$len,0x60
- beq Lxts_dec6x_zero
- cmpwi $len,0x20
- blt Lxts_dec6x_one
- nop
- beq Lxts_dec6x_two
- cmpwi $len,0x40
- blt Lxts_dec6x_three
- nop
- beq Lxts_dec6x_four
-
-Lxts_dec6x_five:
- vxor $out0,$in1,$twk0
- vxor $out1,$in2,$twk1
- vxor $out2,$in3,$twk2
- vxor $out3,$in4,$twk3
- vxor $out4,$in5,$twk4
-
- bl _aesp8_xts_dec5x
-
- le?vperm $out0,$out0,$out0,$leperm
- vmr $twk0,$twk5 # unused tweak
- vxor $twk1,$tweak,$rndkey0
- le?vperm $out1,$out1,$out1,$leperm
- stvx_u $out0,$x00,$out # store output
- vxor $out0,$in0,$twk1
- le?vperm $out2,$out2,$out2,$leperm
- stvx_u $out1,$x10,$out
- le?vperm $out3,$out3,$out3,$leperm
- stvx_u $out2,$x20,$out
- le?vperm $out4,$out4,$out4,$leperm
- stvx_u $out3,$x30,$out
- stvx_u $out4,$x40,$out
- addi $out,$out,0x50
- bne Lxts_dec6x_steal
- b Lxts_dec6x_done
-
-.align 4
-Lxts_dec6x_four:
- vxor $out0,$in2,$twk0
- vxor $out1,$in3,$twk1
- vxor $out2,$in4,$twk2
- vxor $out3,$in5,$twk3
- vxor $out4,$out4,$out4
-
- bl _aesp8_xts_dec5x
-
- le?vperm $out0,$out0,$out0,$leperm
- vmr $twk0,$twk4 # unused tweak
- vmr $twk1,$twk5
- le?vperm $out1,$out1,$out1,$leperm
- stvx_u $out0,$x00,$out # store output
- vxor $out0,$in0,$twk5
- le?vperm $out2,$out2,$out2,$leperm
- stvx_u $out1,$x10,$out
- le?vperm $out3,$out3,$out3,$leperm
- stvx_u $out2,$x20,$out
- stvx_u $out3,$x30,$out
- addi $out,$out,0x40
- bne Lxts_dec6x_steal
- b Lxts_dec6x_done
-
-.align 4
-Lxts_dec6x_three:
- vxor $out0,$in3,$twk0
- vxor $out1,$in4,$twk1
- vxor $out2,$in5,$twk2
- vxor $out3,$out3,$out3
- vxor $out4,$out4,$out4
-
- bl _aesp8_xts_dec5x
-
- le?vperm $out0,$out0,$out0,$leperm
- vmr $twk0,$twk3 # unused tweak
- vmr $twk1,$twk4
- le?vperm $out1,$out1,$out1,$leperm
- stvx_u $out0,$x00,$out # store output
- vxor $out0,$in0,$twk4
- le?vperm $out2,$out2,$out2,$leperm
- stvx_u $out1,$x10,$out
- stvx_u $out2,$x20,$out
- addi $out,$out,0x30
- bne Lxts_dec6x_steal
- b Lxts_dec6x_done
-
-.align 4
-Lxts_dec6x_two:
- vxor $out0,$in4,$twk0
- vxor $out1,$in5,$twk1
- vxor $out2,$out2,$out2
- vxor $out3,$out3,$out3
- vxor $out4,$out4,$out4
-
- bl _aesp8_xts_dec5x
-
- le?vperm $out0,$out0,$out0,$leperm
- vmr $twk0,$twk2 # unused tweak
- vmr $twk1,$twk3
- le?vperm $out1,$out1,$out1,$leperm
- stvx_u $out0,$x00,$out # store output
- vxor $out0,$in0,$twk3
- stvx_u $out1,$x10,$out
- addi $out,$out,0x20
- bne Lxts_dec6x_steal
- b Lxts_dec6x_done
-
-.align 4
-Lxts_dec6x_one:
- vxor $out0,$in5,$twk0
- nop
-Loop_xts_dec1x:
- vncipher $out0,$out0,v24
- lvx v24,$x20,$key_ # round[3]
- addi $key_,$key_,0x20
-
- vncipher $out0,$out0,v25
- lvx v25,$x10,$key_ # round[4]
- bdnz Loop_xts_dec1x
-
- subi r0,$taillen,1
- vncipher $out0,$out0,v24
-
- andi. r0,r0,16
- cmpwi $taillen,0
- vncipher $out0,$out0,v25
-
- sub $inp,$inp,r0
- vncipher $out0,$out0,v26
-
- lvx_u $in0,0,$inp
- vncipher $out0,$out0,v27
-
- addi $key_,$sp,$FRAME+15 # rewind $key_
- vncipher $out0,$out0,v28
- lvx v24,$x00,$key_ # re-pre-load round[1]
-
- vncipher $out0,$out0,v29
- lvx v25,$x10,$key_ # re-pre-load round[2]
- vxor $twk0,$twk0,v31
-
- le?vperm $in0,$in0,$in0,$leperm
- vncipher $out0,$out0,v30
-
- mtctr $rounds
- vncipherlast $out0,$out0,$twk0
-
- vmr $twk0,$twk1 # unused tweak
- vmr $twk1,$twk2
- le?vperm $out0,$out0,$out0,$leperm
- stvx_u $out0,$x00,$out # store output
- addi $out,$out,0x10
- vxor $out0,$in0,$twk2
- bne Lxts_dec6x_steal
- b Lxts_dec6x_done
-
-.align 4
-Lxts_dec6x_zero:
- cmpwi $taillen,0
- beq Lxts_dec6x_done
-
- lvx_u $in0,0,$inp
- le?vperm $in0,$in0,$in0,$leperm
- vxor $out0,$in0,$twk1
-Lxts_dec6x_steal:
- vncipher $out0,$out0,v24
- lvx v24,$x20,$key_ # round[3]
- addi $key_,$key_,0x20
-
- vncipher $out0,$out0,v25
- lvx v25,$x10,$key_ # round[4]
- bdnz Lxts_dec6x_steal
-
- add $inp,$inp,$taillen
- vncipher $out0,$out0,v24
-
- cmpwi $taillen,0
- vncipher $out0,$out0,v25
-
- lvx_u $in0,0,$inp
- vncipher $out0,$out0,v26
-
- lvsr $inpperm,0,$taillen # $in5 is no more
- vncipher $out0,$out0,v27
-
- addi $key_,$sp,$FRAME+15 # rewind $key_
- vncipher $out0,$out0,v28
- lvx v24,$x00,$key_ # re-pre-load round[1]
-
- vncipher $out0,$out0,v29
- lvx v25,$x10,$key_ # re-pre-load round[2]
- vxor $twk1,$twk1,v31
-
- le?vperm $in0,$in0,$in0,$leperm
- vncipher $out0,$out0,v30
-
- vperm $in0,$in0,$in0,$inpperm
- vncipherlast $tmp,$out0,$twk1
-
- le?vperm $out0,$tmp,$tmp,$leperm
- le?stvx_u $out0,0,$out
- be?stvx_u $tmp,0,$out
-
- vxor $out0,$out0,$out0
- vspltisb $out1,-1
- vperm $out0,$out0,$out1,$inpperm
- vsel $out0,$in0,$tmp,$out0
- vxor $out0,$out0,$twk0
-
- subi r30,$out,1
- mtctr $taillen
-Loop_xts_dec6x_steal:
- lbzu r0,1(r30)
- stb r0,16(r30)
- bdnz Loop_xts_dec6x_steal
-
- li $taillen,0
- mtctr $rounds
- b Loop_xts_dec1x # one more time...
-
-.align 4
-Lxts_dec6x_done:
- ${UCMP}i $ivp,0
- beq Lxts_dec6x_ret
-
- vxor $tweak,$twk0,$rndkey0
- le?vperm $tweak,$tweak,$tweak,$leperm
- stvx_u $tweak,0,$ivp
-
-Lxts_dec6x_ret:
- mtlr r11
- li r10,`$FRAME+15`
- li r11,`$FRAME+31`
- stvx $seven,r10,$sp # wipe copies of round keys
- addi r10,r10,32
- stvx $seven,r11,$sp
- addi r11,r11,32
- stvx $seven,r10,$sp
- addi r10,r10,32
- stvx $seven,r11,$sp
- addi r11,r11,32
- stvx $seven,r10,$sp
- addi r10,r10,32
- stvx $seven,r11,$sp
- addi r11,r11,32
- stvx $seven,r10,$sp
- addi r10,r10,32
- stvx $seven,r11,$sp
- addi r11,r11,32
-
- mtspr 256,$vrsave
- lvx v20,r10,$sp # ABI says so
- addi r10,r10,32
- lvx v21,r11,$sp
- addi r11,r11,32
- lvx v22,r10,$sp
- addi r10,r10,32
- lvx v23,r11,$sp
- addi r11,r11,32
- lvx v24,r10,$sp
- addi r10,r10,32
- lvx v25,r11,$sp
- addi r11,r11,32
- lvx v26,r10,$sp
- addi r10,r10,32
- lvx v27,r11,$sp
- addi r11,r11,32
- lvx v28,r10,$sp
- addi r10,r10,32
- lvx v29,r11,$sp
- addi r11,r11,32
- lvx v30,r10,$sp
- lvx v31,r11,$sp
- $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
- $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
- $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
- $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
- $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
- $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
- addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
- blr
- .long 0
- .byte 0,12,0x04,1,0x80,6,6,0
- .long 0
-
-.align 5
-_aesp8_xts_dec5x:
- vncipher $out0,$out0,v24
- vncipher $out1,$out1,v24
- vncipher $out2,$out2,v24
- vncipher $out3,$out3,v24
- vncipher $out4,$out4,v24
- lvx v24,$x20,$key_ # round[3]
- addi $key_,$key_,0x20
-
- vncipher $out0,$out0,v25
- vncipher $out1,$out1,v25
- vncipher $out2,$out2,v25
- vncipher $out3,$out3,v25
- vncipher $out4,$out4,v25
- lvx v25,$x10,$key_ # round[4]
- bdnz _aesp8_xts_dec5x
-
- subi r0,$taillen,1
- vncipher $out0,$out0,v24
- vncipher $out1,$out1,v24
- vncipher $out2,$out2,v24
- vncipher $out3,$out3,v24
- vncipher $out4,$out4,v24
-
- andi. r0,r0,16
- cmpwi $taillen,0
- vncipher $out0,$out0,v25
- vncipher $out1,$out1,v25
- vncipher $out2,$out2,v25
- vncipher $out3,$out3,v25
- vncipher $out4,$out4,v25
- vxor $twk0,$twk0,v31
-
- sub $inp,$inp,r0
- vncipher $out0,$out0,v26
- vncipher $out1,$out1,v26
- vncipher $out2,$out2,v26
- vncipher $out3,$out3,v26
- vncipher $out4,$out4,v26
- vxor $in1,$twk1,v31
-
- vncipher $out0,$out0,v27
- lvx_u $in0,0,$inp
- vncipher $out1,$out1,v27
- vncipher $out2,$out2,v27
- vncipher $out3,$out3,v27
- vncipher $out4,$out4,v27
- vxor $in2,$twk2,v31
-
- addi $key_,$sp,$FRAME+15 # rewind $key_
- vncipher $out0,$out0,v28
- vncipher $out1,$out1,v28
- vncipher $out2,$out2,v28
- vncipher $out3,$out3,v28
- vncipher $out4,$out4,v28
- lvx v24,$x00,$key_ # re-pre-load round[1]
- vxor $in3,$twk3,v31
-
- vncipher $out0,$out0,v29
- le?vperm $in0,$in0,$in0,$leperm
- vncipher $out1,$out1,v29
- vncipher $out2,$out2,v29
- vncipher $out3,$out3,v29
- vncipher $out4,$out4,v29
- lvx v25,$x10,$key_ # re-pre-load round[2]
- vxor $in4,$twk4,v31
-
- vncipher $out0,$out0,v30
- vncipher $out1,$out1,v30
- vncipher $out2,$out2,v30
- vncipher $out3,$out3,v30
- vncipher $out4,$out4,v30
-
- vncipherlast $out0,$out0,$twk0
- vncipherlast $out1,$out1,$in1
- vncipherlast $out2,$out2,$in2
- vncipherlast $out3,$out3,$in3
- vncipherlast $out4,$out4,$in4
- mtctr $rounds
- blr
- .long 0
- .byte 0,12,0x14,0,0,0,0,0
-___
-}} }}}
-
-my $consts=1;
-foreach(split("\n",$code)) {
- s/\`([^\`]*)\`/eval($1)/geo;
-
- # constants table endian-specific conversion
- if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
- my $conv=$3;
- my @bytes=();
-
- # convert to endian-agnostic format
- if ($1 eq "long") {
- foreach (split(/,\s*/,$2)) {
- my $l = /^0/?oct:int;
- push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
- }
- } else {
- @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
- }
-
- # little-endian conversion
- if ($flavour =~ /le$/o) {
- SWITCH: for($conv) {
- /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
- /\?rev/ && do { @bytes=reverse(@bytes); last; };
- }
- }
-
- #emit
- print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
- next;
- }
- $consts=0 if (m/Lconsts:/o); # end of table
-
- # instructions prefixed with '?' are endian-specific and need
- # to be adjusted accordingly...
- if ($flavour =~ /le$/o) { # little-endian
- s/le\?//o or
- s/be\?/#be#/o or
- s/\?lvsr/lvsl/o or
- s/\?lvsl/lvsr/o or
- s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
- s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
- s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
- } else { # big-endian
- s/le\?/#le#/o or
- s/be\?//o or
- s/\?([a-z]+)/$1/o;
- }
-
- print $_,"\n";
-}
-
-close STDOUT;
diff --git a/drivers/crypto/vmx/ghash.c b/drivers/crypto/vmx/ghash.c
deleted file mode 100644
index 77eca20bc7ac..000000000000
--- a/drivers/crypto/vmx/ghash.c
+++ /dev/null
@@ -1,185 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GHASH routines supporting VMX instructions on the Power 8
- *
- * Copyright (C) 2015, 2019 International Business Machines Inc.
- *
- * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com>
- *
- * Extended by Daniel Axtens <dja@axtens.net> to replace the fallback
- * mechanism. The new approach is based on arm64 code, which is:
- * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/types.h>
-#include <linux/err.h>
-#include <linux/crypto.h>
-#include <linux/delay.h>
-#include <asm/simd.h>
-#include <asm/switch_to.h>
-#include <crypto/aes.h>
-#include <crypto/ghash.h>
-#include <crypto/scatterwalk.h>
-#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
-#include <crypto/b128ops.h>
-#include "aesp8-ppc.h"
-
-void gcm_init_p8(u128 htable[16], const u64 Xi[2]);
-void gcm_gmult_p8(u64 Xi[2], const u128 htable[16]);
-void gcm_ghash_p8(u64 Xi[2], const u128 htable[16],
- const u8 *in, size_t len);
-
-struct p8_ghash_ctx {
- /* key used by vector asm */
- u128 htable[16];
- /* key used by software fallback */
- be128 key;
-};
-
-struct p8_ghash_desc_ctx {
- u64 shash[2];
- u8 buffer[GHASH_DIGEST_SIZE];
- int bytes;
-};
-
-static int p8_ghash_init(struct shash_desc *desc)
-{
- struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
-
- dctx->bytes = 0;
- memset(dctx->shash, 0, GHASH_DIGEST_SIZE);
- return 0;
-}
-
-static int p8_ghash_setkey(struct crypto_shash *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(tfm));
-
- if (keylen != GHASH_BLOCK_SIZE)
- return -EINVAL;
-
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
- gcm_init_p8(ctx->htable, (const u64 *) key);
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
-
- memcpy(&ctx->key, key, GHASH_BLOCK_SIZE);
-
- return 0;
-}
-
-static inline void __ghash_block(struct p8_ghash_ctx *ctx,
- struct p8_ghash_desc_ctx *dctx)
-{
- if (crypto_simd_usable()) {
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
- gcm_ghash_p8(dctx->shash, ctx->htable,
- dctx->buffer, GHASH_DIGEST_SIZE);
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
- } else {
- crypto_xor((u8 *)dctx->shash, dctx->buffer, GHASH_BLOCK_SIZE);
- gf128mul_lle((be128 *)dctx->shash, &ctx->key);
- }
-}
-
-static inline void __ghash_blocks(struct p8_ghash_ctx *ctx,
- struct p8_ghash_desc_ctx *dctx,
- const u8 *src, unsigned int srclen)
-{
- if (crypto_simd_usable()) {
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
- gcm_ghash_p8(dctx->shash, ctx->htable,
- src, srclen);
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
- } else {
- while (srclen >= GHASH_BLOCK_SIZE) {
- crypto_xor((u8 *)dctx->shash, src, GHASH_BLOCK_SIZE);
- gf128mul_lle((be128 *)dctx->shash, &ctx->key);
- srclen -= GHASH_BLOCK_SIZE;
- src += GHASH_BLOCK_SIZE;
- }
- }
-}
-
-static int p8_ghash_update(struct shash_desc *desc,
- const u8 *src, unsigned int srclen)
-{
- unsigned int len;
- struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm));
- struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
-
- if (dctx->bytes) {
- if (dctx->bytes + srclen < GHASH_DIGEST_SIZE) {
- memcpy(dctx->buffer + dctx->bytes, src,
- srclen);
- dctx->bytes += srclen;
- return 0;
- }
- memcpy(dctx->buffer + dctx->bytes, src,
- GHASH_DIGEST_SIZE - dctx->bytes);
-
- __ghash_block(ctx, dctx);
-
- src += GHASH_DIGEST_SIZE - dctx->bytes;
- srclen -= GHASH_DIGEST_SIZE - dctx->bytes;
- dctx->bytes = 0;
- }
- len = srclen & ~(GHASH_DIGEST_SIZE - 1);
- if (len) {
- __ghash_blocks(ctx, dctx, src, len);
- src += len;
- srclen -= len;
- }
- if (srclen) {
- memcpy(dctx->buffer, src, srclen);
- dctx->bytes = srclen;
- }
- return 0;
-}
-
-static int p8_ghash_final(struct shash_desc *desc, u8 *out)
-{
- int i;
- struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm));
- struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
-
- if (dctx->bytes) {
- for (i = dctx->bytes; i < GHASH_DIGEST_SIZE; i++)
- dctx->buffer[i] = 0;
- __ghash_block(ctx, dctx);
- dctx->bytes = 0;
- }
- memcpy(out, dctx->shash, GHASH_DIGEST_SIZE);
- return 0;
-}
-
-struct shash_alg p8_ghash_alg = {
- .digestsize = GHASH_DIGEST_SIZE,
- .init = p8_ghash_init,
- .update = p8_ghash_update,
- .final = p8_ghash_final,
- .setkey = p8_ghash_setkey,
- .descsize = sizeof(struct p8_ghash_desc_ctx)
- + sizeof(struct ghash_desc_ctx),
- .base = {
- .cra_name = "ghash",
- .cra_driver_name = "p8_ghash",
- .cra_priority = 1000,
- .cra_blocksize = GHASH_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct p8_ghash_ctx),
- .cra_module = THIS_MODULE,
- },
-};
diff --git a/drivers/crypto/vmx/ghashp8-ppc.pl b/drivers/crypto/vmx/ghashp8-ppc.pl
deleted file mode 100644
index 041e633c214f..000000000000
--- a/drivers/crypto/vmx/ghashp8-ppc.pl
+++ /dev/null
@@ -1,243 +0,0 @@
-#!/usr/bin/env perl
-# SPDX-License-Identifier: GPL-2.0
-
-# This code is taken from the OpenSSL project but the author (Andy Polyakov)
-# has relicensed it under the GPLv2. Therefore this program is free software;
-# you can redistribute it and/or modify it under the terms of the GNU General
-# Public License version 2 as published by the Free Software Foundation.
-#
-# The original headers, including the original license headers, are
-# included below for completeness.
-
-# ====================================================================
-# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see https://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-#
-# GHASH for PowerISA v2.07.
-#
-# July 2014
-#
-# Accurate performance measurements are problematic, because it's
-# always virtualized setup with possibly throttled processor.
-# Relative comparison is therefore more informative. This initial
-# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
-# faster than "4-bit" integer-only compiler-generated 64-bit code.
-# "Initial version" means that there is room for futher improvement.
-
-$flavour=shift;
-$output =shift;
-
-if ($flavour =~ /64/) {
- $SIZE_T=8;
- $LRSAVE=2*$SIZE_T;
- $STU="stdu";
- $POP="ld";
- $PUSH="std";
-} elsif ($flavour =~ /32/) {
- $SIZE_T=4;
- $LRSAVE=$SIZE_T;
- $STU="stwu";
- $POP="lwz";
- $PUSH="stw";
-} else { die "nonsense $flavour"; }
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
-die "can't locate ppc-xlate.pl";
-
-open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
-
-my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block
-
-my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
-my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
-my $vrsave="r12";
-
-$code=<<___;
-.machine "any"
-
-.text
-
-.globl .gcm_init_p8
- lis r0,0xfff0
- li r8,0x10
- mfspr $vrsave,256
- li r9,0x20
- mtspr 256,r0
- li r10,0x30
- lvx_u $H,0,r4 # load H
- le?xor r7,r7,r7
- le?addi r7,r7,0x8 # need a vperm start with 08
- le?lvsr 5,0,r7
- le?vspltisb 6,0x0f
- le?vxor 5,5,6 # set a b-endian mask
- le?vperm $H,$H,$H,5
-
- vspltisb $xC2,-16 # 0xf0
- vspltisb $t0,1 # one
- vaddubm $xC2,$xC2,$xC2 # 0xe0
- vxor $zero,$zero,$zero
- vor $xC2,$xC2,$t0 # 0xe1
- vsldoi $xC2,$xC2,$zero,15 # 0xe1...
- vsldoi $t1,$zero,$t0,1 # ...1
- vaddubm $xC2,$xC2,$xC2 # 0xc2...
- vspltisb $t2,7
- vor $xC2,$xC2,$t1 # 0xc2....01
- vspltb $t1,$H,0 # most significant byte
- vsl $H,$H,$t0 # H<<=1
- vsrab $t1,$t1,$t2 # broadcast carry bit
- vand $t1,$t1,$xC2
- vxor $H,$H,$t1 # twisted H
-
- vsldoi $H,$H,$H,8 # twist even more ...
- vsldoi $xC2,$zero,$xC2,8 # 0xc2.0
- vsldoi $Hl,$zero,$H,8 # ... and split
- vsldoi $Hh,$H,$zero,8
-
- stvx_u $xC2,0,r3 # save pre-computed table
- stvx_u $Hl,r8,r3
- stvx_u $H, r9,r3
- stvx_u $Hh,r10,r3
-
- mtspr 256,$vrsave
- blr
- .long 0
- .byte 0,12,0x14,0,0,0,2,0
- .long 0
-.size .gcm_init_p8,.-.gcm_init_p8
-
-.globl .gcm_gmult_p8
- lis r0,0xfff8
- li r8,0x10
- mfspr $vrsave,256
- li r9,0x20
- mtspr 256,r0
- li r10,0x30
- lvx_u $IN,0,$Xip # load Xi
-
- lvx_u $Hl,r8,$Htbl # load pre-computed table
- le?lvsl $lemask,r0,r0
- lvx_u $H, r9,$Htbl
- le?vspltisb $t0,0x07
- lvx_u $Hh,r10,$Htbl
- le?vxor $lemask,$lemask,$t0
- lvx_u $xC2,0,$Htbl
- le?vperm $IN,$IN,$IN,$lemask
- vxor $zero,$zero,$zero
-
- vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
- vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
- vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
-
- vpmsumd $t2,$Xl,$xC2 # 1st phase
-
- vsldoi $t0,$Xm,$zero,8
- vsldoi $t1,$zero,$Xm,8
- vxor $Xl,$Xl,$t0
- vxor $Xh,$Xh,$t1
-
- vsldoi $Xl,$Xl,$Xl,8
- vxor $Xl,$Xl,$t2
-
- vsldoi $t1,$Xl,$Xl,8 # 2nd phase
- vpmsumd $Xl,$Xl,$xC2
- vxor $t1,$t1,$Xh
- vxor $Xl,$Xl,$t1
-
- le?vperm $Xl,$Xl,$Xl,$lemask
- stvx_u $Xl,0,$Xip # write out Xi
-
- mtspr 256,$vrsave
- blr
- .long 0
- .byte 0,12,0x14,0,0,0,2,0
- .long 0
-.size .gcm_gmult_p8,.-.gcm_gmult_p8
-
-.globl .gcm_ghash_p8
- lis r0,0xfff8
- li r8,0x10
- mfspr $vrsave,256
- li r9,0x20
- mtspr 256,r0
- li r10,0x30
- lvx_u $Xl,0,$Xip # load Xi
-
- lvx_u $Hl,r8,$Htbl # load pre-computed table
- le?lvsl $lemask,r0,r0
- lvx_u $H, r9,$Htbl
- le?vspltisb $t0,0x07
- lvx_u $Hh,r10,$Htbl
- le?vxor $lemask,$lemask,$t0
- lvx_u $xC2,0,$Htbl
- le?vperm $Xl,$Xl,$Xl,$lemask
- vxor $zero,$zero,$zero
-
- lvx_u $IN,0,$inp
- addi $inp,$inp,16
- subi $len,$len,16
- le?vperm $IN,$IN,$IN,$lemask
- vxor $IN,$IN,$Xl
- b Loop
-
-.align 5
-Loop:
- subic $len,$len,16
- vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
- subfe. r0,r0,r0 # borrow?-1:0
- vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
- and r0,r0,$len
- vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
- add $inp,$inp,r0
-
- vpmsumd $t2,$Xl,$xC2 # 1st phase
-
- vsldoi $t0,$Xm,$zero,8
- vsldoi $t1,$zero,$Xm,8
- vxor $Xl,$Xl,$t0
- vxor $Xh,$Xh,$t1
-
- vsldoi $Xl,$Xl,$Xl,8
- vxor $Xl,$Xl,$t2
- lvx_u $IN,0,$inp
- addi $inp,$inp,16
-
- vsldoi $t1,$Xl,$Xl,8 # 2nd phase
- vpmsumd $Xl,$Xl,$xC2
- le?vperm $IN,$IN,$IN,$lemask
- vxor $t1,$t1,$Xh
- vxor $IN,$IN,$t1
- vxor $IN,$IN,$Xl
- beq Loop # did $len-=16 borrow?
-
- vxor $Xl,$Xl,$t1
- le?vperm $Xl,$Xl,$Xl,$lemask
- stvx_u $Xl,0,$Xip # write out Xi
-
- mtspr 256,$vrsave
- blr
- .long 0
- .byte 0,12,0x14,0,0,0,4,0
- .long 0
-.size .gcm_ghash_p8,.-.gcm_ghash_p8
-
-.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
-.align 2
-___
-
-foreach (split("\n",$code)) {
- if ($flavour =~ /le$/o) { # little-endian
- s/le\?//o or
- s/be\?/#be#/o;
- } else {
- s/le\?/#le#/o or
- s/be\?//o;
- }
- print $_,"\n";
-}
-
-close STDOUT; # enforce flush
diff --git a/drivers/crypto/vmx/ppc-xlate.pl b/drivers/crypto/vmx/ppc-xlate.pl
deleted file mode 100644
index b583898c11ae..000000000000
--- a/drivers/crypto/vmx/ppc-xlate.pl
+++ /dev/null
@@ -1,231 +0,0 @@
-#!/usr/bin/env perl
-# SPDX-License-Identifier: GPL-2.0
-
-# PowerPC assembler distiller by <appro>.
-
-my $flavour = shift;
-my $output = shift;
-open STDOUT,">$output" || die "can't open $output: $!";
-
-my %GLOBALS;
-my $dotinlocallabels=($flavour=~/linux/)?1:0;
-my $elfv2abi=(($flavour =~ /linux-ppc64le/) or ($flavour =~ /linux-ppc64-elfv2/))?1:0;
-my $dotfunctions=($elfv2abi=~1)?0:1;
-
-################################################################
-# directives which need special treatment on different platforms
-################################################################
-my $globl = sub {
- my $junk = shift;
- my $name = shift;
- my $global = \$GLOBALS{$name};
- my $ret;
-
- $name =~ s|^[\.\_]||;
-
- SWITCH: for ($flavour) {
- /aix/ && do { $name = ".$name";
- last;
- };
- /osx/ && do { $name = "_$name";
- last;
- };
- /linux/
- && do { $ret = "_GLOBAL($name)";
- last;
- };
- }
-
- $ret = ".globl $name\nalign 5\n$name:" if (!$ret);
- $$global = $name;
- $ret;
-};
-my $text = sub {
- my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text";
- $ret = ".abiversion 2\n".$ret if ($elfv2abi);
- $ret;
-};
-my $machine = sub {
- my $junk = shift;
- my $arch = shift;
- if ($flavour =~ /osx/)
- { $arch =~ s/\"//g;
- $arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any");
- }
- ".machine $arch";
-};
-my $size = sub {
- if ($flavour =~ /linux/)
- { shift;
- my $name = shift; $name =~ s|^[\.\_]||;
- my $ret = ".size $name,.-".($dotfunctions?".":"").$name;
- $ret .= "\n.size .$name,.-.$name" if ($dotfunctions);
- $ret;
- }
- else
- { ""; }
-};
-my $asciz = sub {
- shift;
- my $line = join(",",@_);
- if ($line =~ /^"(.*)"$/)
- { ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; }
- else
- { ""; }
-};
-my $quad = sub {
- shift;
- my @ret;
- my ($hi,$lo);
- for (@_) {
- if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io)
- { $hi=$1?"0x$1":"0"; $lo="0x$2"; }
- elsif (/^([0-9]+)$/o)
- { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl
- else
- { $hi=undef; $lo=$_; }
-
- if (defined($hi))
- { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); }
- else
- { push(@ret,".quad $lo"); }
- }
- join("\n",@ret);
-};
-
-################################################################
-# simplified mnemonics not handled by at least one assembler
-################################################################
-my $cmplw = sub {
- my $f = shift;
- my $cr = 0; $cr = shift if ($#_>1);
- # Some out-of-date 32-bit GNU assembler just can't handle cmplw...
- ($flavour =~ /linux.*32/) ?
- " .long ".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 :
- " cmplw ".join(',',$cr,@_);
-};
-my $bdnz = sub {
- my $f = shift;
- my $bo = $f=~/[\+\-]/ ? 16+9 : 16; # optional "to be taken" hint
- " bc $bo,0,".shift;
-} if ($flavour!~/linux/);
-my $bltlr = sub {
- my $f = shift;
- my $bo = $f=~/\-/ ? 12+2 : 12; # optional "not to be taken" hint
- ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
- " .long ".sprintf "0x%x",19<<26|$bo<<21|16<<1 :
- " bclr $bo,0";
-};
-my $bnelr = sub {
- my $f = shift;
- my $bo = $f=~/\-/ ? 4+2 : 4; # optional "not to be taken" hint
- ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
- " .long ".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 :
- " bclr $bo,2";
-};
-my $beqlr = sub {
- my $f = shift;
- my $bo = $f=~/-/ ? 12+2 : 12; # optional "not to be taken" hint
- ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
- " .long ".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 :
- " bclr $bo,2";
-};
-# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two
-# arguments is 64, with "operand out of range" error.
-my $extrdi = sub {
- my ($f,$ra,$rs,$n,$b) = @_;
- $b = ($b+$n)&63; $n = 64-$n;
- " rldicl $ra,$rs,$b,$n";
-};
-my $vmr = sub {
- my ($f,$vx,$vy) = @_;
- " vor $vx,$vy,$vy";
-};
-
-# Some ABIs specify vrsave, special-purpose register #256, as reserved
-# for system use.
-my $no_vrsave = ($elfv2abi);
-my $mtspr = sub {
- my ($f,$idx,$ra) = @_;
- if ($idx == 256 && $no_vrsave) {
- " or $ra,$ra,$ra";
- } else {
- " mtspr $idx,$ra";
- }
-};
-my $mfspr = sub {
- my ($f,$rd,$idx) = @_;
- if ($idx == 256 && $no_vrsave) {
- " li $rd,-1";
- } else {
- " mfspr $rd,$idx";
- }
-};
-
-# PowerISA 2.06 stuff
-sub vsxmem_op {
- my ($f, $vrt, $ra, $rb, $op) = @_;
- " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1);
-}
-# made-up unaligned memory reference AltiVec/VMX instructions
-my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x
-my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x
-my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx
-my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx
-my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x
-my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x
-
-# PowerISA 2.07 stuff
-sub vcrypto_op {
- my ($f, $vrt, $vra, $vrb, $op) = @_;
- " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op;
-}
-my $vcipher = sub { vcrypto_op(@_, 1288); };
-my $vcipherlast = sub { vcrypto_op(@_, 1289); };
-my $vncipher = sub { vcrypto_op(@_, 1352); };
-my $vncipherlast= sub { vcrypto_op(@_, 1353); };
-my $vsbox = sub { vcrypto_op(@_, 0, 1480); };
-my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); };
-my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); };
-my $vpmsumb = sub { vcrypto_op(@_, 1032); };
-my $vpmsumd = sub { vcrypto_op(@_, 1224); };
-my $vpmsubh = sub { vcrypto_op(@_, 1096); };
-my $vpmsumw = sub { vcrypto_op(@_, 1160); };
-my $vaddudm = sub { vcrypto_op(@_, 192); };
-my $vadduqm = sub { vcrypto_op(@_, 256); };
-
-my $mtsle = sub {
- my ($f, $arg) = @_;
- " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2);
-};
-
-print "#include <asm/ppc_asm.h>\n" if $flavour =~ /linux/;
-
-while($line=<>) {
-
- $line =~ s|[#!;].*$||; # get rid of asm-style comments...
- $line =~ s|/\*.*\*/||; # ... and C-style comments...
- $line =~ s|^\s+||; # ... and skip white spaces in beginning...
- $line =~ s|\s+$||; # ... and at the end
-
- {
- $line =~ s|\b\.L(\w+)|L$1|g; # common denominator for Locallabel
- $line =~ s|\bL(\w+)|\.L$1|g if ($dotinlocallabels);
- }
-
- {
- $line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||;
- my $c = $1; $c = "\t" if ($c eq "");
- my $mnemonic = $2;
- my $f = $3;
- my $opcode = eval("\$$mnemonic");
- $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/);
- if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); }
- elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; }
- }
-
- print $line if ($line);
- print "\n";
-}
-
-close STDOUT;
diff --git a/drivers/crypto/vmx/vmx.c b/drivers/crypto/vmx/vmx.c
deleted file mode 100644
index 7eb713cc87c8..000000000000
--- a/drivers/crypto/vmx/vmx.c
+++ /dev/null
@@ -1,77 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Routines supporting VMX instructions on the Power 8
- *
- * Copyright (C) 2015 International Business Machines Inc.
- *
- * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com>
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/types.h>
-#include <linux/err.h>
-#include <linux/cpufeature.h>
-#include <linux/crypto.h>
-#include <asm/cputable.h>
-#include <crypto/internal/hash.h>
-#include <crypto/internal/skcipher.h>
-
-#include "aesp8-ppc.h"
-
-static int __init p8_init(void)
-{
- int ret;
-
- ret = crypto_register_shash(&p8_ghash_alg);
- if (ret)
- goto err;
-
- ret = crypto_register_alg(&p8_aes_alg);
- if (ret)
- goto err_unregister_ghash;
-
- ret = crypto_register_skcipher(&p8_aes_cbc_alg);
- if (ret)
- goto err_unregister_aes;
-
- ret = crypto_register_skcipher(&p8_aes_ctr_alg);
- if (ret)
- goto err_unregister_aes_cbc;
-
- ret = crypto_register_skcipher(&p8_aes_xts_alg);
- if (ret)
- goto err_unregister_aes_ctr;
-
- return 0;
-
-err_unregister_aes_ctr:
- crypto_unregister_skcipher(&p8_aes_ctr_alg);
-err_unregister_aes_cbc:
- crypto_unregister_skcipher(&p8_aes_cbc_alg);
-err_unregister_aes:
- crypto_unregister_alg(&p8_aes_alg);
-err_unregister_ghash:
- crypto_unregister_shash(&p8_ghash_alg);
-err:
- return ret;
-}
-
-static void __exit p8_exit(void)
-{
- crypto_unregister_skcipher(&p8_aes_xts_alg);
- crypto_unregister_skcipher(&p8_aes_ctr_alg);
- crypto_unregister_skcipher(&p8_aes_cbc_alg);
- crypto_unregister_alg(&p8_aes_alg);
- crypto_unregister_shash(&p8_ghash_alg);
-}
-
-module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, p8_init);
-module_exit(p8_exit);
-
-MODULE_AUTHOR("Marcelo Cerri<mhcerri@br.ibm.com>");
-MODULE_DESCRIPTION("IBM VMX cryptographic acceleration instructions "
- "support on Power 8");
-MODULE_LICENSE("GPL");
-MODULE_VERSION("1.0.0");
-MODULE_IMPORT_NS(CRYPTO_INTERNAL);
diff --git a/drivers/crypto/xilinx/zynqmp-aes-gcm.c b/drivers/crypto/xilinx/zynqmp-aes-gcm.c
index 3c205324b22b..e61405718840 100644
--- a/drivers/crypto/xilinx/zynqmp-aes-gcm.c
+++ b/drivers/crypto/xilinx/zynqmp-aes-gcm.c
@@ -231,7 +231,10 @@ static int zynqmp_handle_aes_req(struct crypto_engine *engine,
err = zynqmp_aes_aead_cipher(areq);
}
+ local_bh_disable();
crypto_finalize_aead_request(engine, areq, err);
+ local_bh_enable();
+
return 0;
}