summaryrefslogtreecommitdiff
path: root/arch/arm/crypto/blake2s-glue.c
diff options
context:
space:
mode:
authorEric Biggers <ebiggers@google.com>2020-12-23 00:09:59 -0800
committerHerbert Xu <herbert@gondor.apana.org.au>2021-01-03 08:41:39 +1100
commit5172d322d34c30fb926b29aeb5a064e1fd8a5e13 (patch)
treedf7558ad0563d58542c628dae9ae1f80e6e1ded0 /arch/arm/crypto/blake2s-glue.c
parentbbda6e0f1303953c855ee3669655a81b69fbe899 (diff)
crypto: arm/blake2s - add ARM scalar optimized BLAKE2s
Add an ARM scalar optimized implementation of BLAKE2s. NEON isn't very useful for BLAKE2s because the BLAKE2s block size is too small for NEON to help. Each NEON instruction would depend on the previous one, resulting in poor performance. With scalar instructions, on the other hand, we can take advantage of ARM's "free" rotations (like I did in chacha-scalar-core.S) to get an implementation get runs much faster than the C implementation. Performance results on Cortex-A7 in cycles per byte using the shash API: 4096-byte messages: blake2s-256-arm: 18.8 blake2s-256-generic: 26.0 500-byte messages: blake2s-256-arm: 20.3 blake2s-256-generic: 27.9 100-byte messages: blake2s-256-arm: 29.7 blake2s-256-generic: 39.2 32-byte messages: blake2s-256-arm: 50.6 blake2s-256-generic: 66.2 Except on very short messages, this is still slower than the NEON implementation of BLAKE2b which I've written; that is 14.0, 16.4, 25.8, and 76.1 cpb on 4096, 500, 100, and 32-byte messages, respectively. However, optimized BLAKE2s is useful for cases where BLAKE2s is used instead of BLAKE2b, such as WireGuard. This new implementation is added in the form of a new module blake2s-arm.ko, which is analogous to blake2s-x86_64.ko in that it provides blake2s_compress_arch() for use by the library API as well as optionally register the algorithms with the shash API. Acked-by: Ard Biesheuvel <ardb@kernel.org> Signed-off-by: Eric Biggers <ebiggers@google.com> Tested-by: Ard Biesheuvel <ardb@kernel.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/arm/crypto/blake2s-glue.c')
-rw-r--r--arch/arm/crypto/blake2s-glue.c78
1 files changed, 78 insertions, 0 deletions
diff --git a/arch/arm/crypto/blake2s-glue.c b/arch/arm/crypto/blake2s-glue.c
new file mode 100644
index 000000000000..f2cc1e5fc9ec
--- /dev/null
+++ b/arch/arm/crypto/blake2s-glue.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * BLAKE2s digest algorithm, ARM scalar implementation
+ *
+ * Copyright 2020 Google LLC
+ */
+
+#include <crypto/internal/blake2s.h>
+#include <crypto/internal/hash.h>
+
+#include <linux/module.h>
+
+/* defined in blake2s-core.S */
+EXPORT_SYMBOL(blake2s_compress_arch);
+
+static int crypto_blake2s_update_arm(struct shash_desc *desc,
+ const u8 *in, unsigned int inlen)
+{
+ return crypto_blake2s_update(desc, in, inlen, blake2s_compress_arch);
+}
+
+static int crypto_blake2s_final_arm(struct shash_desc *desc, u8 *out)
+{
+ return crypto_blake2s_final(desc, out, blake2s_compress_arch);
+}
+
+#define BLAKE2S_ALG(name, driver_name, digest_size) \
+ { \
+ .base.cra_name = name, \
+ .base.cra_driver_name = driver_name, \
+ .base.cra_priority = 200, \
+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \
+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \
+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \
+ .base.cra_module = THIS_MODULE, \
+ .digestsize = digest_size, \
+ .setkey = crypto_blake2s_setkey, \
+ .init = crypto_blake2s_init, \
+ .update = crypto_blake2s_update_arm, \
+ .final = crypto_blake2s_final_arm, \
+ .descsize = sizeof(struct blake2s_state), \
+ }
+
+static struct shash_alg blake2s_arm_algs[] = {
+ BLAKE2S_ALG("blake2s-128", "blake2s-128-arm", BLAKE2S_128_HASH_SIZE),
+ BLAKE2S_ALG("blake2s-160", "blake2s-160-arm", BLAKE2S_160_HASH_SIZE),
+ BLAKE2S_ALG("blake2s-224", "blake2s-224-arm", BLAKE2S_224_HASH_SIZE),
+ BLAKE2S_ALG("blake2s-256", "blake2s-256-arm", BLAKE2S_256_HASH_SIZE),
+};
+
+static int __init blake2s_arm_mod_init(void)
+{
+ return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
+ crypto_register_shashes(blake2s_arm_algs,
+ ARRAY_SIZE(blake2s_arm_algs)) : 0;
+}
+
+static void __exit blake2s_arm_mod_exit(void)
+{
+ if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
+ crypto_unregister_shashes(blake2s_arm_algs,
+ ARRAY_SIZE(blake2s_arm_algs));
+}
+
+module_init(blake2s_arm_mod_init);
+module_exit(blake2s_arm_mod_exit);
+
+MODULE_DESCRIPTION("BLAKE2s digest algorithm, ARM scalar implementation");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
+MODULE_ALIAS_CRYPTO("blake2s-128");
+MODULE_ALIAS_CRYPTO("blake2s-128-arm");
+MODULE_ALIAS_CRYPTO("blake2s-160");
+MODULE_ALIAS_CRYPTO("blake2s-160-arm");
+MODULE_ALIAS_CRYPTO("blake2s-224");
+MODULE_ALIAS_CRYPTO("blake2s-224-arm");
+MODULE_ALIAS_CRYPTO("blake2s-256");
+MODULE_ALIAS_CRYPTO("blake2s-256-arm");