summaryrefslogtreecommitdiff
path: root/arch/x86/crypto/crct10dif-pclmul_glue.c
diff options
context:
space:
mode:
authorEric Biggers <ebiggers@google.com>2019-01-30 20:42:40 -0800
committerHerbert Xu <herbert@gondor.apana.org.au>2019-02-08 15:29:48 +0800
commit0974037fc55cdf27a0297d3487f4fafa57b4a4a9 (patch)
treee11d18ffd8f81ab7fbc924e4aa18497fcff77173 /arch/x86/crypto/crct10dif-pclmul_glue.c
parentf8903b3ead5191d450f21c7388ddc245f76cec0f (diff)
crypto: x86/crct10dif-pcl - cleanup and optimizations
The x86, arm, and arm64 asm implementations of crct10dif are very difficult to understand partly because many of the comments, labels, and macros are named incorrectly: the lengths mentioned are usually off by a factor of two from the actual code. Many other things are unnecessarily convoluted as well, e.g. there are many more fold constants than actually needed and some aren't fully reduced. This series therefore cleans up all these implementations to be much more maintainable. I also made some small optimizations where I saw opportunities, resulting in slightly better performance. This patch cleans up the x86 version. As part of this, I removed support for len < 16 from the x86 assembly; now the glue code falls back to the generic table-based implementation in this case. Due to the overhead of kernel_fpu_begin(), this actually significantly improves performance on these lengths. (And even if kernel_fpu_begin() were free, the generic code is still faster for about len < 11.) This removal also eliminates error-prone special cases and makes the x86, arm32, and arm64 ports of the code match more closely. Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Eric Biggers <ebiggers@google.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/x86/crypto/crct10dif-pclmul_glue.c')
-rw-r--r--arch/x86/crypto/crct10dif-pclmul_glue.c12
1 files changed, 3 insertions, 9 deletions
diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c
index cd4df9322501..0e785c0b2354 100644
--- a/arch/x86/crypto/crct10dif-pclmul_glue.c
+++ b/arch/x86/crypto/crct10dif-pclmul_glue.c
@@ -33,18 +33,12 @@
#include <asm/cpufeatures.h>
#include <asm/cpu_device_id.h>
-asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf,
- size_t len);
+asmlinkage u16 crc_t10dif_pcl(u16 init_crc, const u8 *buf, size_t len);
struct chksum_desc_ctx {
__u16 crc;
};
-/*
- * Steps through buffer one byte at at time, calculates reflected
- * crc using table.
- */
-
static int chksum_init(struct shash_desc *desc)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
@@ -59,7 +53,7 @@ static int chksum_update(struct shash_desc *desc, const u8 *data,
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
- if (irq_fpu_usable()) {
+ if (length >= 16 && irq_fpu_usable()) {
kernel_fpu_begin();
ctx->crc = crc_t10dif_pcl(ctx->crc, data, length);
kernel_fpu_end();
@@ -79,7 +73,7 @@ static int chksum_final(struct shash_desc *desc, u8 *out)
static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len,
u8 *out)
{
- if (irq_fpu_usable()) {
+ if (len >= 16 && irq_fpu_usable()) {
kernel_fpu_begin();
*(__u16 *)out = crc_t10dif_pcl(*crcp, data, len);
kernel_fpu_end();