summaryrefslogtreecommitdiff
path: root/lib/crypto/x86/blake2s-glue.c
diff options
context:
space:
mode:
authorEric Biggers <ebiggers@kernel.org>2025-06-19 12:19:07 -0700
committerEric Biggers <ebiggers@kernel.org>2025-06-30 09:26:20 -0700
commit74750aa78de33794aa9ab55de15da04bd41d1ac8 (patch)
treece3498ed648b95176cff53bc3dd48f96b6575c29 /lib/crypto/x86/blake2s-glue.c
parenta32e93e10067d19dec302220a9124a21573e7e7e (diff)
lib/crypto: x86: Move arch/x86/lib/crypto/ into lib/crypto/
Move the contents of arch/x86/lib/crypto/ into lib/crypto/x86/. The new code organization makes a lot more sense for how this code actually works and is developed. In particular, it makes it possible to build each algorithm as a single module, with better inlining and dead code elimination. For a more detailed explanation, see the patchset which did this for the CRC library code: https://lore.kernel.org/r/20250607200454.73587-1-ebiggers@kernel.org/. Also see the patchset which did this for SHA-512: https://lore.kernel.org/linux-crypto/20250616014019.415791-1-ebiggers@kernel.org/ This is just a preparatory commit, which does the move to get the files into their new location but keeps them building the same way as before. Later commits will make the actual improvements to the way the arch-optimized code is integrated for each algorithm. Add a gitignore entry for the removed directory arch/x86/lib/crypto/ so that people don't accidentally commit leftover generated files. Acked-by: Ard Biesheuvel <ardb@kernel.org> Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com> Reviewed-by: Sohil Mehta <sohil.mehta@intel.com> Link: https://lore.kernel.org/r/20250619191908.134235-9-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Diffstat (limited to 'lib/crypto/x86/blake2s-glue.c')
-rw-r--r--lib/crypto/x86/blake2s-glue.c70
1 files changed, 70 insertions, 0 deletions
diff --git a/lib/crypto/x86/blake2s-glue.c b/lib/crypto/x86/blake2s-glue.c
new file mode 100644
index 000000000000..adc296cd17c9
--- /dev/null
+++ b/lib/crypto/x86/blake2s-glue.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <asm/cpufeature.h>
+#include <asm/fpu/api.h>
+#include <asm/processor.h>
+#include <asm/simd.h>
+#include <crypto/internal/blake2s.h>
+#include <linux/init.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/sizes.h>
+
+asmlinkage void blake2s_compress_ssse3(struct blake2s_state *state,
+ const u8 *block, const size_t nblocks,
+ const u32 inc);
+asmlinkage void blake2s_compress_avx512(struct blake2s_state *state,
+ const u8 *block, const size_t nblocks,
+ const u32 inc);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512);
+
+void blake2s_compress(struct blake2s_state *state, const u8 *block,
+ size_t nblocks, const u32 inc)
+{
+ /* SIMD disables preemption, so relax after processing each page. */
+ BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8);
+
+ if (!static_branch_likely(&blake2s_use_ssse3) || !may_use_simd()) {
+ blake2s_compress_generic(state, block, nblocks, inc);
+ return;
+ }
+
+ do {
+ const size_t blocks = min_t(size_t, nblocks,
+ SZ_4K / BLAKE2S_BLOCK_SIZE);
+
+ kernel_fpu_begin();
+ if (static_branch_likely(&blake2s_use_avx512))
+ blake2s_compress_avx512(state, block, blocks, inc);
+ else
+ blake2s_compress_ssse3(state, block, blocks, inc);
+ kernel_fpu_end();
+
+ nblocks -= blocks;
+ block += blocks * BLAKE2S_BLOCK_SIZE;
+ } while (nblocks);
+}
+EXPORT_SYMBOL(blake2s_compress);
+
+static int __init blake2s_mod_init(void)
+{
+ if (boot_cpu_has(X86_FEATURE_SSSE3))
+ static_branch_enable(&blake2s_use_ssse3);
+
+ if (boot_cpu_has(X86_FEATURE_AVX) &&
+ boot_cpu_has(X86_FEATURE_AVX2) &&
+ boot_cpu_has(X86_FEATURE_AVX512F) &&
+ boot_cpu_has(X86_FEATURE_AVX512VL) &&
+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
+ XFEATURE_MASK_AVX512, NULL))
+ static_branch_enable(&blake2s_use_avx512);
+
+ return 0;
+}
+
+subsys_initcall(blake2s_mod_init);