diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-01-22 19:55:08 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-01-22 19:55:08 -0800 |
commit | 37b33c68b00089a574ebd0a856a5d554eb3001b7 (patch) | |
tree | f773baa072cde798833cc9a733f7f51b40a63afa /arch | |
parent | 9cb2bf599b2ce832127fa61fb430cc00a724d371 (diff) | |
parent | 72914faebaabd77d8a471af4662ca0b938011c49 (diff) |
Merge tag 'crc-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux
Pull CRC updates from Eric Biggers:
- Reorganize the architecture-optimized CRC32 and CRC-T10DIF code to be
directly accessible via the library API, instead of requiring the
crypto API. This is much simpler and more efficient.
- Convert some users such as ext4 to use the CRC32 library API instead
of the crypto API. More conversions like this will come later.
- Add a KUnit test that tests and benchmarks multiple CRC variants.
Remove older, less-comprehensive tests that are made redundant by
this.
- Add an entry to MAINTAINERS for the kernel's CRC library code. I'm
volunteering to maintain it. I have additional cleanups and
optimizations planned for future cycles.
* tag 'crc-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux: (31 commits)
MAINTAINERS: add entry for CRC library
powerpc/crc: delete obsolete crc-vpmsum_test.c
lib/crc32test: delete obsolete crc32test.c
lib/crc16_kunit: delete obsolete crc16_kunit.c
lib/crc_kunit.c: add KUnit test suite for CRC library functions
powerpc/crc-t10dif: expose CRC-T10DIF function through lib
arm64/crc-t10dif: expose CRC-T10DIF function through lib
arm/crc-t10dif: expose CRC-T10DIF function through lib
x86/crc-t10dif: expose CRC-T10DIF function through lib
crypto: crct10dif - expose arch-optimized lib function
lib/crc-t10dif: add support for arch overrides
lib/crc-t10dif: stop wrapping the crypto API
scsi: target: iscsi: switch to using the crc32c library
f2fs: switch to using the crc32 library
jbd2: switch to using the crc32c library
ext4: switch to using the crc32c library
lib/crc32: make crc32c() go directly to lib
bcachefs: Explicitly select CRYPTO from BCACHEFS_FS
x86/crc32: expose CRC32 functions through lib
x86/crc32: update prototype for crc32_pclmul_le_16()
...
Diffstat (limited to 'arch')
96 files changed, 1212 insertions, 2862 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 202397be76d8..3ad4c3c97918 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -7,6 +7,8 @@ config ARM select ARCH_HAS_BINFMT_FLAT select ARCH_HAS_CPU_CACHE_ALIASING select ARCH_HAS_CPU_FINALIZE_INIT if MMU + select ARCH_HAS_CRC32 if KERNEL_MODE_NEON + select ARCH_HAS_CRC_T10DIF if KERNEL_MODE_NEON select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DMA_ALLOC if MMU diff --git a/arch/arm/configs/milbeaut_m10v_defconfig b/arch/arm/configs/milbeaut_m10v_defconfig index f5eeac9c65c3..acd16204f8d7 100644 --- a/arch/arm/configs/milbeaut_m10v_defconfig +++ b/arch/arm/configs/milbeaut_m10v_defconfig @@ -107,7 +107,6 @@ CONFIG_CRYPTO_AES_ARM=m CONFIG_CRYPTO_AES_ARM_BS=m CONFIG_CRYPTO_AES_ARM_CE=m CONFIG_CRYPTO_CHACHA20_NEON=m -CONFIG_CRYPTO_CRC32_ARM_CE=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC_CCITT=m CONFIG_CRC_ITU_T=m diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 758276027dbc..9d4336261e45 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -1306,7 +1306,6 @@ CONFIG_CRYPTO_AES_ARM=m CONFIG_CRYPTO_AES_ARM_BS=m CONFIG_CRYPTO_AES_ARM_CE=m CONFIG_CRYPTO_CHACHA20_NEON=m -CONFIG_CRYPTO_CRC32_ARM_CE=m CONFIG_CRYPTO_DEV_SUN4I_SS=m CONFIG_CRYPTO_DEV_FSL_CAAM=m CONFIG_CRYPTO_DEV_EXYNOS_RNG=m diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 5ff49a5e9afc..32650c8431d9 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -222,30 +222,5 @@ config CRYPTO_CHACHA20_NEON Architecture: arm using: - NEON (Advanced SIMD) extensions -config CRYPTO_CRC32_ARM_CE - tristate "CRC32C and CRC32" - depends on KERNEL_MODE_NEON - depends on CRC32 - select CRYPTO_HASH - help - CRC32c CRC algorithm with the iSCSI polynomial (RFC 3385 and RFC 3720) - and CRC32 CRC algorithm (IEEE 802.3) - - Architecture: arm using: - - CRC and/or PMULL instructions - - Drivers: crc32-arm-ce and crc32c-arm-ce - -config CRYPTO_CRCT10DIF_ARM_CE - tristate "CRCT10DIF" - depends on KERNEL_MODE_NEON - depends on CRC_T10DIF - select CRYPTO_HASH - help - CRC16 CRC algorithm used for the T10 (SCSI) Data Integrity Field (DIF) - - Architecture: arm using: - - PMULL (Polynomial Multiply Long) instructions - endmenu diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 13e62c7c25dc..3d0e23ff9e74 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -20,8 +20,6 @@ obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o -obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) += crct10dif-arm-ce.o -obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) += crc32-arm-ce.o aes-arm-y := aes-cipher-core.o aes-cipher-glue.o aes-arm-bs-y := aes-neonbs-core.o aes-neonbs-glue.o @@ -37,8 +35,6 @@ sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o -crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o -crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o chacha-neon-y := chacha-scalar-core.o chacha-glue.o chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o poly1305-arm-y := poly1305-core.o poly1305-glue.o diff --git a/arch/arm/crypto/crc32-ce-glue.c b/arch/arm/crypto/crc32-ce-glue.c deleted file mode 100644 index 20b4dff13e3a..000000000000 --- a/arch/arm/crypto/crc32-ce-glue.c +++ /dev/null @@ -1,247 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions instructions - * - * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> - */ - -#include <linux/cpufeature.h> -#include <linux/crc32.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/string.h> - -#include <crypto/internal/hash.h> -#include <crypto/internal/simd.h> - -#include <asm/hwcap.h> -#include <asm/neon.h> -#include <asm/simd.h> -#include <linux/unaligned.h> - -#define PMULL_MIN_LEN 64L /* minimum size of buffer - * for crc32_pmull_le_16 */ -#define SCALE_F 16L /* size of NEON register */ - -asmlinkage u32 crc32_pmull_le(const u8 buf[], u32 len, u32 init_crc); -asmlinkage u32 crc32_armv8_le(u32 init_crc, const u8 buf[], u32 len); - -asmlinkage u32 crc32c_pmull_le(const u8 buf[], u32 len, u32 init_crc); -asmlinkage u32 crc32c_armv8_le(u32 init_crc, const u8 buf[], u32 len); - -static u32 (*fallback_crc32)(u32 init_crc, const u8 buf[], u32 len); -static u32 (*fallback_crc32c)(u32 init_crc, const u8 buf[], u32 len); - -static int crc32_cra_init(struct crypto_tfm *tfm) -{ - u32 *key = crypto_tfm_ctx(tfm); - - *key = 0; - return 0; -} - -static int crc32c_cra_init(struct crypto_tfm *tfm) -{ - u32 *key = crypto_tfm_ctx(tfm); - - *key = ~0; - return 0; -} - -static int crc32_setkey(struct crypto_shash *hash, const u8 *key, - unsigned int keylen) -{ - u32 *mctx = crypto_shash_ctx(hash); - - if (keylen != sizeof(u32)) - return -EINVAL; - *mctx = le32_to_cpup((__le32 *)key); - return 0; -} - -static int crc32_init(struct shash_desc *desc) -{ - u32 *mctx = crypto_shash_ctx(desc->tfm); - u32 *crc = shash_desc_ctx(desc); - - *crc = *mctx; - return 0; -} - -static int crc32_update(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - u32 *crc = shash_desc_ctx(desc); - - *crc = crc32_armv8_le(*crc, data, length); - return 0; -} - -static int crc32c_update(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - u32 *crc = shash_desc_ctx(desc); - - *crc = crc32c_armv8_le(*crc, data, length); - return 0; -} - -static int crc32_final(struct shash_desc *desc, u8 *out) -{ - u32 *crc = shash_desc_ctx(desc); - - put_unaligned_le32(*crc, out); - return 0; -} - -static int crc32c_final(struct shash_desc *desc, u8 *out) -{ - u32 *crc = shash_desc_ctx(desc); - - put_unaligned_le32(~*crc, out); - return 0; -} - -static int crc32_pmull_update(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - u32 *crc = shash_desc_ctx(desc); - unsigned int l; - - if (crypto_simd_usable()) { - if ((u32)data % SCALE_F) { - l = min_t(u32, length, SCALE_F - ((u32)data % SCALE_F)); - - *crc = fallback_crc32(*crc, data, l); - - data += l; - length -= l; - } - - if (length >= PMULL_MIN_LEN) { - l = round_down(length, SCALE_F); - - kernel_neon_begin(); - *crc = crc32_pmull_le(data, l, *crc); - kernel_neon_end(); - - data += l; - length -= l; - } - } - - if (length > 0) - *crc = fallback_crc32(*crc, data, length); - - return 0; -} - -static int crc32c_pmull_update(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - u32 *crc = shash_desc_ctx(desc); - unsigned int l; - - if (crypto_simd_usable()) { - if ((u32)data % SCALE_F) { - l = min_t(u32, length, SCALE_F - ((u32)data % SCALE_F)); - - *crc = fallback_crc32c(*crc, data, l); - - data += l; - length -= l; - } - - if (length >= PMULL_MIN_LEN) { - l = round_down(length, SCALE_F); - - kernel_neon_begin(); - *crc = crc32c_pmull_le(data, l, *crc); - kernel_neon_end(); - - data += l; - length -= l; - } - } - - if (length > 0) - *crc = fallback_crc32c(*crc, data, length); - - return 0; -} - -static struct shash_alg crc32_pmull_algs[] = { { - .setkey = crc32_setkey, - .init = crc32_init, - .update = crc32_update, - .final = crc32_final, - .descsize = sizeof(u32), - .digestsize = sizeof(u32), - - .base.cra_ctxsize = sizeof(u32), - .base.cra_init = crc32_cra_init, - .base.cra_name = "crc32", - .base.cra_driver_name = "crc32-arm-ce", - .base.cra_priority = 200, - .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .base.cra_blocksize = 1, - .base.cra_module = THIS_MODULE, -}, { - .setkey = crc32_setkey, - .init = crc32_init, - .update = crc32c_update, - .final = crc32c_final, - .descsize = sizeof(u32), - .digestsize = sizeof(u32), - - .base.cra_ctxsize = sizeof(u32), - .base.cra_init = crc32c_cra_init, - .base.cra_name = "crc32c", - .base.cra_driver_name = "crc32c-arm-ce", - .base.cra_priority = 200, - .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .base.cra_blocksize = 1, - .base.cra_module = THIS_MODULE, -} }; - -static int __init crc32_pmull_mod_init(void) -{ - if (elf_hwcap2 & HWCAP2_PMULL) { - crc32_pmull_algs[0].update = crc32_pmull_update; - crc32_pmull_algs[1].update = crc32c_pmull_update; - - if (elf_hwcap2 & HWCAP2_CRC32) { - fallback_crc32 = crc32_armv8_le; - fallback_crc32c = crc32c_armv8_le; - } else { - fallback_crc32 = crc32_le; - fallback_crc32c = __crc32c_le; - } - } else if (!(elf_hwcap2 & HWCAP2_CRC32)) { - return -ENODEV; - } - - return crypto_register_shashes(crc32_pmull_algs, - ARRAY_SIZE(crc32_pmull_algs)); -} - -static void __exit crc32_pmull_mod_exit(void) -{ - crypto_unregister_shashes(crc32_pmull_algs, - ARRAY_SIZE(crc32_pmull_algs)); -} - -static const struct cpu_feature __maybe_unused crc32_cpu_feature[] = { - { cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { } -}; -MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature); - -module_init(crc32_pmull_mod_init); -module_exit(crc32_pmull_mod_exit); - -MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); -MODULE_DESCRIPTION("Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions"); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS_CRYPTO("crc32"); -MODULE_ALIAS_CRYPTO("crc32c"); diff --git a/arch/arm/crypto/crct10dif-ce-glue.c b/arch/arm/crypto/crct10dif-ce-glue.c deleted file mode 100644 index a8b74523729e..000000000000 --- a/arch/arm/crypto/crct10dif-ce-glue.c +++ /dev/null @@ -1,124 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Accelerated CRC-T10DIF using ARM NEON and Crypto Extensions instructions - * - * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> - */ - -#include <linux/crc-t10dif.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/string.h> - -#include <crypto/internal/hash.h> -#include <crypto/internal/simd.h> - -#include <asm/neon.h> -#include <asm/simd.h> - -#define CRC_T10DIF_PMULL_CHUNK_SIZE 16U - -asmlinkage u16 crc_t10dif_pmull64(u16 init_crc, const u8 *buf, size_t len); -asmlinkage void crc_t10dif_pmull8(u16 init_crc, const u8 *buf, size_t len, - u8 out[16]); - -static int crct10dif_init(struct shash_desc *desc) -{ - u16 *crc = shash_desc_ctx(desc); - - *crc = 0; - return 0; -} - -static int crct10dif_update_ce(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - u16 *crc = shash_desc_ctx(desc); - - if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && crypto_simd_usable()) { - kernel_neon_begin(); - *crc = crc_t10dif_pmull64(*crc, data, length); - kernel_neon_end(); - } else { - *crc = crc_t10dif_generic(*crc, data, length); - } - - return 0; -} - -static int crct10dif_update_neon(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - u16 *crcp = shash_desc_ctx(desc); - u8 buf[16] __aligned(16); - u16 crc = *crcp; - - if (length > CRC_T10DIF_PMULL_CHUNK_SIZE && crypto_simd_usable()) { - kernel_neon_begin(); - crc_t10dif_pmull8(crc, data, length, buf); - kernel_neon_end(); - - crc = 0; - data = buf; - length = sizeof(buf); - } - - *crcp = crc_t10dif_generic(crc, data, length); - return 0; -} - -static int crct10dif_final(struct shash_desc *desc, u8 *out) -{ - u16 *crc = shash_desc_ctx(desc); - - *(u16 *)out = *crc; - return 0; -} - -static struct shash_alg algs[] = {{ - .digestsize = CRC_T10DIF_DIGEST_SIZE, - .init = crct10dif_init, - .update = crct10dif_update_neon, - .final = crct10dif_final, - .descsize = CRC_T10DIF_DIGEST_SIZE, - - .base.cra_name = "crct10dif", - .base.cra_driver_name = "crct10dif-arm-neon", - .base.cra_priority = 150, - .base.cra_blocksize = CRC_T10DIF_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, -}, { - .digestsize = CRC_T10DIF_DIGEST_SIZE, - .init = crct10dif_init, - .update = crct10dif_update_ce, - .final = crct10dif_final, - .descsize = CRC_T10DIF_DIGEST_SIZE, - - .base.cra_name = "crct10dif", - .base.cra_driver_name = "crct10dif-arm-ce", - .base.cra_priority = 200, - .base.cra_blocksize = CRC_T10DIF_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, -}}; - -static int __init crc_t10dif_mod_init(void) -{ - if (!(elf_hwcap & HWCAP_NEON)) - return -ENODEV; - - return crypto_register_shashes(algs, 1 + !!(elf_hwcap2 & HWCAP2_PMULL)); -} - -static void __exit crc_t10dif_mod_exit(void) -{ - crypto_unregister_shashes(algs, 1 + !!(elf_hwcap2 & HWCAP2_PMULL)); -} - -module_init(crc_t10dif_mod_init); -module_exit(crc_t10dif_mod_exit); - -MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); -MODULE_DESCRIPTION("Accelerated CRC-T10DIF using ARM NEON and Crypto Extensions"); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS_CRYPTO("crct10dif"); diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 0ca5aae1bcc3..007874320937 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -45,3 +45,9 @@ ifeq ($(CONFIG_KERNEL_MODE_NEON),y) endif obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o + +obj-$(CONFIG_CRC32_ARCH) += crc32-arm.o +crc32-arm-y := crc32-glue.o crc32-core.o + +obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-arm.o +crc-t10dif-arm-y := crc-t10dif-glue.o crc-t10dif-core.o diff --git a/arch/arm/crypto/crct10dif-ce-core.S b/arch/arm/lib/crc-t10dif-core.S index 2bbf2df9c1e2..2bbf2df9c1e2 100644 --- a/arch/arm/crypto/crct10dif-ce-core.S +++ b/arch/arm/lib/crc-t10dif-core.S diff --git a/arch/arm/lib/crc-t10dif-glue.c b/arch/arm/lib/crc-t10dif-glue.c new file mode 100644 index 000000000000..d24dee62670e --- /dev/null +++ b/arch/arm/lib/crc-t10dif-glue.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Accelerated CRC-T10DIF using ARM NEON and Crypto Extensions instructions + * + * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> + */ + +#include <linux/crc-t10dif.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/string.h> + +#include <crypto/internal/simd.h> + +#include <asm/neon.h> +#include <asm/simd.h> + +static DEFINE_STATIC_KEY_FALSE(have_neon); +static DEFINE_STATIC_KEY_FALSE(have_pmull); + +#define CRC_T10DIF_PMULL_CHUNK_SIZE 16U + +asmlinkage u16 crc_t10dif_pmull64(u16 init_crc, const u8 *buf, size_t len); +asmlinkage void crc_t10dif_pmull8(u16 init_crc, const u8 *buf, size_t len, + u8 out[16]); + +u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length) +{ + if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) { + if (static_branch_likely(&have_pmull)) { + if (crypto_simd_usable()) { + kernel_neon_begin(); + crc = crc_t10dif_pmull64(crc, data, length); + kernel_neon_end(); + return crc; + } + } else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE && + static_branch_likely(&have_neon) && + crypto_simd_usable()) { + u8 buf[16] __aligned(16); + + kernel_neon_begin(); + crc_t10dif_pmull8(crc, data, length, buf); + kernel_neon_end(); + + crc = 0; + data = buf; + length = sizeof(buf); + } + } + return crc_t10dif_generic(crc, data, length); +} +EXPORT_SYMBOL(crc_t10dif_arch); + +static int __init crc_t10dif_arm_init(void) +{ + if (elf_hwcap & HWCAP_NEON) { + static_branch_enable(&have_neon); + if (elf_hwcap2 & HWCAP2_PMULL) + static_branch_enable(&have_pmull); + } + return 0; +} +arch_initcall(crc_t10dif_arm_init); + +static void __exit crc_t10dif_arm_exit(void) +{ +} +module_exit(crc_t10dif_arm_exit); + +bool crc_t10dif_is_optimized(void) +{ + return static_key_enabled(&have_neon); +} +EXPORT_SYMBOL(crc_t10dif_is_optimized); + +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_DESCRIPTION("Accelerated CRC-T10DIF using ARM NEON and Crypto Extensions"); +MODULE_LICENSE("GPL v2"); diff --git a/arch/arm/crypto/crc32-ce-core.S b/arch/arm/lib/crc32-core.S index 88f9edf94e95..6f674f30c70b 100644 --- a/arch/arm/crypto/crc32-ce-core.S +++ b/arch/arm/lib/crc32-core.S @@ -48,7 +48,6 @@ */ #include <linux/linkage.h> -#include <linux/cfi_types.h> #include <asm/assembler.h> .text @@ -297,11 +296,11 @@ ARM_BE8(rev16 r3, r3 ) .endm .align 5 -SYM_TYPED_FUNC_START(crc32_armv8_le) +SYM_FUNC_START(crc32_armv8_le) __crc32 SYM_FUNC_END(crc32_armv8_le) .align 5 -SYM_TYPED_FUNC_START(crc32c_armv8_le) +SYM_FUNC_START(crc32c_armv8_le) __crc32 c SYM_FUNC_END(crc32c_armv8_le) diff --git a/arch/arm/lib/crc32-glue.c b/arch/arm/lib/crc32-glue.c new file mode 100644 index 000000000000..2c30ba3d80e6 --- /dev/null +++ b/arch/arm/lib/crc32-glue.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions instructions + * + * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> + */ + +#include <linux/cpufeature.h> +#include <linux/crc32.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/string.h> + +#include <crypto/internal/simd.h> + +#include <asm/hwcap.h> +#include <asm/neon.h> +#include <asm/simd.h> + +static DEFINE_STATIC_KEY_FALSE(have_crc32); +static DEFINE_STATIC_KEY_FALSE(have_pmull); + +#define PMULL_MIN_LEN 64 /* min size of buffer for pmull functions */ + +asmlinkage u32 crc32_pmull_le(const u8 buf[], u32 len, u32 init_crc); +asmlinkage u32 crc32_armv8_le(u32 init_crc, const u8 buf[], u32 len); + +asmlinkage u32 crc32c_pmull_le(const u8 buf[], u32 len, u32 init_crc); +asmlinkage u32 crc32c_armv8_le(u32 init_crc, const u8 buf[], u32 len); + +static u32 crc32_le_scalar(u32 crc, const u8 *p, size_t len) +{ + if (static_branch_likely(&have_crc32)) + return crc32_armv8_le(crc, p, len); + return crc32_le_base(crc, p, len); +} + +u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) +{ + if (len >= PMULL_MIN_LEN + 15 && + static_branch_likely(&have_pmull) && crypto_simd_usable()) { + size_t n = -(uintptr_t)p & 15; + + /* align p to 16-byte boundary */ + if (n) { + crc = crc32_le_scalar(crc, p, n); + p += n; + len -= n; + } + n = round_down(len, 16); + kernel_neon_begin(); + crc = crc32_pmull_le(p, n, crc); + kernel_neon_end(); + p += n; + len -= n; + } + return crc32_le_scalar(crc, p, len); +} +EXPORT_SYMBOL(crc32_le_arch); + +static u32 crc32c_le_scalar(u32 crc, const u8 *p, size_t len) +{ + if (static_branch_likely(&have_crc32)) + return crc32c_armv8_le(crc, p, len); + return crc32c_le_base(crc, p, len); +} + +u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) +{ + if (len >= PMULL_MIN_LEN + 15 && + static_branch_likely(&have_pmull) && crypto_simd_usable()) { + size_t n = -(uintptr_t)p & 15; + + /* align p to 16-byte boundary */ + if (n) { + crc = crc32c_le_scalar(crc, p, n); + p += n; + len -= n; + } + n = round_down(len, 16); + kernel_neon_begin(); + crc = crc32c_pmull_le(p, n, crc); + kernel_neon_end(); + p += n; + len -= n; + } + return crc32c_le_scalar(crc, p, len); +} +EXPORT_SYMBOL(crc32c_le_arch); + +u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) +{ + return crc32_be_base(crc, p, len); +} +EXPORT_SYMBOL(crc32_be_arch); + +static int __init crc32_arm_init(void) +{ + if (elf_hwcap2 & HWCAP2_CRC32) + static_branch_enable(&have_crc32); + if (elf_hwcap2 & HWCAP2_PMULL) + static_branch_enable(&have_pmull); + return 0; +} +arch_initcall(crc32_arm_init); + +static void __exit crc32_arm_exit(void) +{ +} +module_exit(crc32_arm_exit); + +u32 crc32_optimizations(void) +{ + if (elf_hwcap2 & (HWCAP2_CRC32 | HWCAP2_PMULL)) + return CRC32_LE_OPTIMIZATION | CRC32C_OPTIMIZATION; + return 0; +} +EXPORT_SYMBOL(crc32_optimizations); + +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_DESCRIPTION("Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions"); +MODULE_LICENSE("GPL v2"); diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 83017757be8e..fcdd0ed3eca8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -21,6 +21,8 @@ config ARM64 select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_CACHE_LINE_SIZE select ARCH_HAS_CC_PLATFORM + select ARCH_HAS_CRC32 + select ARCH_HAS_CRC_T10DIF if KERNEL_MODE_NEON select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index c62831e61586..9c0d6b93a3c2 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1698,7 +1698,6 @@ CONFIG_CRYPTO_SM3_ARM64_CE=m CONFIG_CRYPTO_AES_ARM64_CE_BLK=y CONFIG_CRYPTO_AES_ARM64_BS=m CONFIG_CRYPTO_AES_ARM64_CE_CCM=y -CONFIG_CRYPTO_CRCT10DIF_ARM64_CE=m CONFIG_CRYPTO_DEV_SUN8I_CE=m CONFIG_CRYPTO_DEV_FSL_CAAM=m CONFIG_CRYPTO_DEV_FSL_DPAA2_CAAM=m diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index e7d9bd8e4709..5636ab83f22a 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -312,15 +312,5 @@ config CRYPTO_SM4_ARM64_CE_GCM - PMULL (Polynomial Multiply Long) instructions - NEON (Advanced SIMD) extensions -config CRYPTO_CRCT10DIF_ARM64_CE - tristate "CRCT10DIF (PMULL)" - depends on KERNEL_MODE_NEON && CRC_T10DIF - select CRYPTO_HASH - help - CRC16 CRC algorithm used for the T10 (SCSI) Data Integrity Field (DIF) - - Architecture: arm64 using - - PMULL (Polynomial Multiply Long) instructions - endmenu diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index fbe64dce66e0..e7139c4768ce 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -44,9 +44,6 @@ ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o obj-$(CONFIG_CRYPTO_POLYVAL_ARM64_CE) += polyval-ce.o polyval-ce-y := polyval-ce-glue.o polyval-ce-core.o -obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM64_CE) += crct10dif-ce.o -crct10dif-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o - obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o aes-ce-cipher-y := aes-ce-core.o aes-ce-glue.o diff --git a/arch/arm64/crypto/crct10dif-ce-glue.c b/arch/arm64/crypto/crct10dif-ce-glue.c deleted file mode 100644 index 08bcbd884395..000000000000 --- a/arch/arm64/crypto/crct10dif-ce-glue.c +++ /dev/null @@ -1,132 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Accelerated CRC-T10DIF using arm64 NEON and Crypto Extensions instructions - * - * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> - */ - -#include <linux/cpufeature.h> -#include <linux/crc-t10dif.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/string.h> - -#include <crypto/internal/hash.h> -#include <crypto/internal/simd.h> - -#include <asm/neon.h> -#include <asm/simd.h> - -#define CRC_T10DIF_PMULL_CHUNK_SIZE 16U - -asmlinkage void crc_t10dif_pmull_p8(u16 init_crc, const u8 *buf, size_t len, - u8 out[16]); -asmlinkage u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 *buf, size_t len); - -static int crct10dif_init(struct shash_desc *desc) -{ - u16 *crc = shash_desc_ctx(desc); - - *crc = 0; - return 0; -} - -static int crct10dif_update_pmull_p8(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - u16 *crcp = shash_desc_ctx(desc); - u16 crc = *crcp; - u8 buf[16]; - - if (length > CRC_T10DIF_PMULL_CHUNK_SIZE && crypto_simd_usable()) { - kernel_neon_begin(); - crc_t10dif_pmull_p8(crc, data, length, buf); - kernel_neon_end(); - - crc = 0; - data = buf; - length = sizeof(buf); - } - - *crcp = crc_t10dif_generic(crc, data, length); - return 0; -} - -static int crct10dif_update_pmull_p64(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - u16 *crc = shash_desc_ctx(desc); - - if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && crypto_simd_usable()) { - kernel_neon_begin(); - *crc = crc_t10dif_pmull_p64(*crc, data, length); - kernel_neon_end(); - } else { - *crc = crc_t10dif_generic(*crc, data, length); - } - - return 0; -} - -static int crct10dif_final(struct shash_desc *desc, u8 *out) -{ - u16 *crc = shash_desc_ctx(desc); - - *(u16 *)out = *crc; - return 0; -} - -static struct shash_alg crc_t10dif_alg[] = {{ - .digestsize = CRC_T10DIF_DIGEST_SIZE, - .init = crct10dif_init, - .update = crct10dif_update_pmull_p8, - .final = crct10dif_final, - .descsize = CRC_T10DIF_DIGEST_SIZE, - - .base.cra_name = "crct10dif", - .base.cra_driver_name = "crct10dif-arm64-neon", - .base.cra_priority = 150, - .base.cra_blocksize = CRC_T10DIF_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, -}, { - .digestsize = CRC_T10DIF_DIGEST_SIZE, - .init = crct10dif_init, - .update = crct10dif_update_pmull_p64, - .final = crct10dif_final, - .descsize = CRC_T10DIF_DIGEST_SIZE, - - .base.cra_name = "crct10dif", - .base.cra_driver_name = "crct10dif-arm64-ce", - .base.cra_priority = 200, - .base.cra_blocksize = CRC_T10DIF_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, -}}; - -static int __init crc_t10dif_mod_init(void) -{ - if (cpu_have_named_feature(PMULL)) - return crypto_register_shashes(crc_t10dif_alg, - ARRAY_SIZE(crc_t10dif_alg)); - else - /* only register the first array element */ - return crypto_register_shash(crc_t10dif_alg); -} - -static void __exit crc_t10dif_mod_exit(void) -{ - if (cpu_have_named_feature(PMULL)) - crypto_unregister_shashes(crc_t10dif_alg, - ARRAY_SIZE(crc_t10dif_alg)); - else - crypto_unregister_shash(crc_t10dif_alg); -} - -module_cpu_feature_match(ASIMD, crc_t10dif_mod_init); -module_exit(crc_t10dif_mod_exit); - -MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); -MODULE_DESCRIPTION("CRC-T10DIF using arm64 NEON and Crypto Extensions"); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS_CRYPTO("crct10dif"); -MODULE_ALIAS_CRYPTO("crct10dif-arm64-ce"); diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 8e882f479d98..4d49dff721a8 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -13,7 +13,11 @@ endif lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o -obj-$(CONFIG_CRC32) += crc32.o crc32-glue.o +obj-$(CONFIG_CRC32_ARCH) += crc32-arm64.o +crc32-arm64-y := crc32.o crc32-glue.o + +obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-arm64.o +crc-t10dif-arm64-y := crc-t10dif-glue.o crc-t10dif-core.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o diff --git a/arch/arm64/crypto/crct10dif-ce-core.S b/arch/arm64/lib/crc-t10dif-core.S index 87dd6d46224d..87dd6d46224d 100644 --- a/arch/arm64/crypto/crct10dif-ce-core.S +++ b/arch/arm64/lib/crc-t10dif-core.S diff --git a/arch/arm64/lib/crc-t10dif-glue.c b/arch/arm64/lib/crc-t10dif-glue.c new file mode 100644 index 000000000000..dab7e3796232 --- /dev/null +++ b/arch/arm64/lib/crc-t10dif-glue.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Accelerated CRC-T10DIF using arm64 NEON and Crypto Extensions instructions + * + * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> + */ + +#include <linux/cpufeature.h> +#include <linux/crc-t10dif.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/string.h> + +#include <crypto/internal/simd.h> + +#include <asm/neon.h> +#include <asm/simd.h> + +static DEFINE_STATIC_KEY_FALSE(have_asimd); +static DEFINE_STATIC_KEY_FALSE(have_pmull); + +#define CRC_T10DIF_PMULL_CHUNK_SIZE 16U + +asmlinkage void crc_t10dif_pmull_p8(u16 init_crc, const u8 *buf, size_t len, + u8 out[16]); +asmlinkage u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 *buf, size_t len); + +u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length) +{ + if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) { + if (static_branch_likely(&have_pmull)) { + if (crypto_simd_usable()) { + kernel_neon_begin(); + crc = crc_t10dif_pmull_p64(crc, data, length); + kernel_neon_end(); + return crc; + } + } else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE && + static_branch_likely(&have_asimd) && + crypto_simd_usable()) { + u8 buf[16]; + + kernel_neon_begin(); + crc_t10dif_pmull_p8(crc, data, length, buf); + kernel_neon_end(); + + crc = 0; + data = buf; + length = sizeof(buf); + } + } + return crc_t10dif_generic(crc, data, length); +} +EXPORT_SYMBOL(crc_t10dif_arch); + +static int __init crc_t10dif_arm64_init(void) +{ + if (cpu_have_named_feature(ASIMD)) { + static_branch_enable(&have_asimd); + if (cpu_have_named_feature(PMULL)) + static_branch_enable(&have_pmull); + } + return 0; +} +arch_initcall(crc_t10dif_arm64_init); + +static void __exit crc_t10dif_arm64_exit(void) +{ +} +module_exit(crc_t10dif_arm64_exit); + +bool crc_t10dif_is_optimized(void) +{ + return static_key_enabled(&have_asimd); +} +EXPORT_SYMBOL(crc_t10dif_is_optimized); + +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_DESCRIPTION("CRC-T10DIF using arm64 NEON and Crypto Extensions"); +MODULE_LICENSE("GPL v2"); diff --git a/arch/arm64/lib/crc32-glue.c b/arch/arm64/lib/crc32-glue.c index 295ae3e6b997..15c4c9db573e 100644 --- a/arch/arm64/lib/crc32-glue.c +++ b/arch/arm64/lib/crc32-glue.c @@ -2,6 +2,7 @@ #include <linux/crc32.h> #include <linux/linkage.h> +#include <linux/module.h> #include <asm/alternative.h> #include <asm/cpufeature.h> @@ -21,7 +22,7 @@ asmlinkage u32 crc32_le_arm64_4way(u32 crc, unsigned char const *p, size_t len); asmlinkage u32 crc32c_le_arm64_4way(u32 crc, unsigned char const *p, size_t len); asmlinkage u32 crc32_be_arm64_4way(u32 crc, unsigned char const *p, size_t len); -u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len) { if (!alternative_has_cap_likely(ARM64_HAS_CRC32)) return crc32_le_base(crc, p, len); @@ -40,11 +41,12 @@ u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) return crc32_le_arm64(crc, p, len); } +EXPORT_SYMBOL(crc32_le_arch); -u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len) { if (!alternative_has_cap_likely(ARM64_HAS_CRC32)) - return __crc32c_le_base(crc, p, len); + return crc32c_le_base(crc, p, len); if (len >= min_len && cpu_have_named_feature(PMULL) && crypto_simd_usable()) { kernel_neon_begin(); @@ -60,8 +62,9 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) return crc32c_le_arm64(crc, p, len); } +EXPORT_SYMBOL(crc32c_le_arch); -u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len) { if (!alternative_has_cap_likely(ARM64_HAS_CRC32)) return crc32_be_base(crc, p, len); @@ -80,3 +83,17 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) return crc32_be_arm64(crc, p, len); } +EXPORT_SYMBOL(crc32_be_arch); + +u32 crc32_optimizations(void) +{ + if (alternative_has_cap_likely(ARM64_HAS_CRC32)) + return CRC32_LE_OPTIMIZATION | + CRC32_BE_OPTIMIZATION | + CRC32C_OPTIMIZATION; + return 0; +} +EXPORT_SYMBOL(crc32_optimizations); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("arm64-optimized CRC32 functions"); diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 28f0221e22fb..22b531f45ffa 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -15,6 +15,7 @@ config LOONGARCH select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_CPU_FINALIZE_INIT + select ARCH_HAS_CRC32 select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_FAST_MULTIPLIER diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 4dffc90192f7..2052452e241a 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -1040,7 +1040,6 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m -CONFIG_CRYPTO_CRC32_LOONGARCH=m CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_DMA_CMA=y CONFIG_DMA_NUMA_CMA=y diff --git a/arch/loongarch/crypto/Kconfig b/arch/loongarch/crypto/Kconfig index 200a6e8b43b1..a0270b3e5b30 100644 --- a/arch/loongarch/crypto/Kconfig +++ b/arch/loongarch/crypto/Kconfig @@ -2,13 +2,4 @@ menu "Accelerated Cryptographic Algorithms for CPU (loongarch)" -config CRYPTO_CRC32_LOONGARCH - tristate "CRC32c and CRC32" - select CRC32 - select CRYPTO_HASH - help - CRC32c and CRC32 CRC algorithms - - Architecture: LoongArch with CRC32 instructions - endmenu diff --git a/arch/loongarch/crypto/Makefile b/arch/loongarch/crypto/Makefile index d22613d27ce9..ba83755dde2b 100644 --- a/arch/loongarch/crypto/Makefile +++ b/arch/loongarch/crypto/Makefile @@ -2,5 +2,3 @@ # # Makefile for LoongArch crypto files.. # - -obj-$(CONFIG_CRYPTO_CRC32_LOONGARCH) += crc32-loongarch.o diff --git a/arch/loongarch/crypto/crc32-loongarch.c b/arch/loongarch/crypto/crc32-loongarch.c deleted file mode 100644 index b7d9782827f5..000000000000 --- a/arch/loongarch/crypto/crc32-loongarch.c +++ /dev/null @@ -1,300 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * crc32.c - CRC32 and CRC32C using LoongArch crc* instructions - * - * Module based on mips/crypto/crc32-mips.c - * - * Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org> - * Copyright (C) 2018 MIPS Tech, LLC - * Copyright (C) 2020-2023 Loongson Technology Corporation Limited - */ - -#include <linux/module.h> -#include <crypto/internal/hash.h> - -#include <asm/cpu-features.h> -#include <linux/unaligned.h> - -#define _CRC32(crc, value, size, type) \ -do { \ - __asm__ __volatile__( \ - #type ".w." #size ".w" " %0, %1, %0\n\t"\ - : "+r" (crc) \ - : "r" (value) \ - : "memory"); \ -} while (0) - -#define CRC32(crc, value, size) _CRC32(crc, value, size, crc) -#define CRC32C(crc, value, size) _CRC32(crc, value, size, crcc) - -static u32 crc32_loongarch_hw(u32 crc_, const u8 *p, unsigned int len) -{ - u32 crc = crc_; - - while (len >= sizeof(u64)) { - u64 value = get_unaligned_le64(p); - - CRC32(crc, value, d); - p += sizeof(u64); - len -= sizeof(u64); - } - - if (len & sizeof(u32)) { - u32 value = get_unaligned_le32(p); - - CRC32(crc, value, w); - p += sizeof(u32); - } - - if (len & sizeof(u16)) { - u16 value = get_unaligned_le16(p); - - CRC32(crc, value, h); - p += sizeof(u16); - } - - if (len & sizeof(u8)) { - u8 value = *p++; - - CRC32(crc, value, b); - } - - return crc; -} - -static u32 crc32c_loongarch_hw(u32 crc_, const u8 *p, unsigned int len) -{ - u32 crc = crc_; - - while (len >= sizeof(u64)) { - u64 value = get_unaligned_le64(p); - - CRC32C(crc, value, d); - p += sizeof(u64); - len -= sizeof(u64); - } - - if (len & sizeof(u32)) { - u32 value = get_unaligned_le32(p); - - CRC32C(crc, value, w); - p += sizeof(u32); - } - - if (len & sizeof(u16)) { - u16 value = get_unaligned_le16(p); - - CRC32C(crc, value, h); - p += sizeof(u16); - } - - if (len & sizeof(u8)) { - u8 value = *p++; - - CRC32C(crc, value, b); - } - - return crc; -} - -#define CHKSUM_BLOCK_SIZE 1 -#define CHKSUM_DIGEST_SIZE 4 - -struct chksum_ctx { - u32 key; -}; - -struct chksum_desc_ctx { - u32 crc; -}; - -static int chksum_init(struct shash_desc *desc) -{ - struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm); - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - ctx->crc = mctx->key; - - return 0; -} - -/* - * Setting the seed allows arbitrary accumulators and flexible XOR policy - * If your algorithm starts with ~0, then XOR with ~0 before you set the seed. - */ -static int chksum_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) -{ - struct chksum_ctx *mctx = crypto_shash_ctx(tfm); - - if (keylen != sizeof(mctx->key)) - return -EINVAL; - - mctx->key = get_unaligned_le32(key); - - return 0; -} - -static int chksum_update(struct shash_desc *desc, const u8 *data, unsigned int length) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - ctx->crc = crc32_loongarch_hw(ctx->crc, data, length); - return 0; -} - -static int chksumc_update(struct shash_desc *desc, const u8 *data, unsigned int length) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - ctx->crc = crc32c_loongarch_hw(ctx->crc, data, length); - return 0; -} - -static int chksum_final(struct shash_desc *desc, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - put_unaligned_le32(ctx->crc, out); - return 0; -} - -static int chksumc_final(struct shash_desc *desc, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - put_unaligned_le32(~ctx->crc, out); - return 0; -} - -static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out) -{ - put_unaligned_le32(crc32_loongarch_hw(crc, data, len), out); - return 0; -} - -static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *out) -{ - put_unaligned_le32(~crc32c_loongarch_hw(crc, data, len), out); - return 0; -} - -static int chksum_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - return __chksum_finup(ctx->crc, data, len, out); -} - -static int chksumc_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - return __chksumc_finup(ctx->crc, data, len, out); -} - -static int chksum_digest(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out) -{ - struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm); - - return __chksum_finup(mctx->key, data, length, out); -} - -static int chksumc_digest(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out) -{ - struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm); - - return __chksumc_finup(mctx->key, data, length, out); -} - -static int chksum_cra_init(struct crypto_tfm *tfm) -{ - struct chksum_ctx *mctx = crypto_tfm_ctx(tfm); - - mctx->key = 0; - return 0; -} - -static int chksumc_cra_init(struct crypto_tfm *tfm) -{ - struct chksum_ctx *mctx = crypto_tfm_ctx(tfm); - - mctx->key = ~0; - return 0; -} - -static struct shash_alg crc32_alg = { - .digestsize = CHKSUM_DIGEST_SIZE, - .setkey = chksum_setkey, - .init = chksum_init, - .update = chksum_update, - .final = chksum_final, - .finup = chksum_finup, - .digest = chksum_digest, - .descsize = sizeof(struct chksum_desc_ctx), - .base = { - .cra_name = "crc32", - .cra_driver_name = "crc32-loongarch", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize = CHKSUM_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct chksum_ctx), - .cra_module = THIS_MODULE, - .cra_init = chksum_cra_init, - } -}; - -static struct shash_alg crc32c_alg = { - .digestsize = CHKSUM_DIGEST_SIZE, - .setkey = chksum_setkey, - .init = chksum_init, - .update = chksumc_update, - .final = chksumc_final, - .finup = chksumc_finup, - .digest = chksumc_digest, - .descsize = sizeof(struct chksum_desc_ctx), - .base = { - .cra_name = "crc32c", - .cra_driver_name = "crc32c-loongarch", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize = CHKSUM_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct chksum_ctx), - .cra_module = THIS_MODULE, - .cra_init = chksumc_cra_init, - } -}; - -static int __init crc32_mod_init(void) -{ - int err; - - if (!cpu_has(CPU_FEATURE_CRC32)) - return 0; - - err = crypto_register_shash(&crc32_alg); - if (err) - return err; - - err = crypto_register_shash(&crc32c_alg); - if (err) - return err; - - return 0; -} - -static void __exit crc32_mod_exit(void) -{ - if (!cpu_has(CPU_FEATURE_CRC32)) - return; - - crypto_unregister_shash(&crc32_alg); - crypto_unregister_shash(&crc32c_alg); -} - -module_init(crc32_mod_init); -module_exit(crc32_mod_exit); - -MODULE_AUTHOR("Min Zhou <zhoumin@loongson.cn>"); -MODULE_AUTHOR("Huacai Chen <chenhuacai@loongson.cn>"); -MODULE_DESCRIPTION("CRC32 and CRC32C using LoongArch crc* instructions"); -MODULE_LICENSE("GPL v2"); diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile index ccea3bbd4353..fae77809048b 100644 --- a/arch/loongarch/lib/Makefile +++ b/arch/loongarch/lib/Makefile @@ -11,3 +11,5 @@ obj-$(CONFIG_ARCH_SUPPORTS_INT128) += tishift.o obj-$(CONFIG_CPU_HAS_LSX) += xor_simd.o xor_simd_glue.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o + +obj-$(CONFIG_CRC32_ARCH) += crc32-loongarch.o diff --git a/arch/loongarch/lib/crc32-loongarch.c b/arch/loongarch/lib/crc32-loongarch.c new file mode 100644 index 000000000000..8af8113ecd9d --- /dev/null +++ b/arch/loongarch/lib/crc32-loongarch.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * CRC32 and CRC32C using LoongArch crc* instructions + * + * Module based on mips/crypto/crc32-mips.c + * + * Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org> + * Copyright (C) 2018 MIPS Tech, LLC + * Copyright (C) 2020-2023 Loongson Technology Corporation Limited + */ + +#include <asm/cpu-features.h> +#include <linux/crc32.h> +#include <linux/module.h> +#include <linux/unaligned.h> + +#define _CRC32(crc, value, size, type) \ +do { \ + __asm__ __volatile__( \ + #type ".w." #size ".w" " %0, %1, %0\n\t"\ + : "+r" (crc) \ + : "r" (value) \ + : "memory"); \ +} while (0) + +#define CRC32(crc, value, size) _CRC32(crc, value, size, crc) +#define CRC32C(crc, value, size) _CRC32(crc, value, size, crcc) + +static DEFINE_STATIC_KEY_FALSE(have_crc32); + +u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) +{ + if (!static_branch_likely(&have_crc32)) + return crc32_le_base(crc, p, len); + + while (len >= sizeof(u64)) { + u64 value = get_unaligned_le64(p); + + CRC32(crc, value, d); + p += sizeof(u64); + len -= sizeof(u64); + } + + if (len & sizeof(u32)) { + u32 value = get_unaligned_le32(p); + + CRC32(crc, value, w); + p += sizeof(u32); + } + + if (len & sizeof(u16)) { + u16 value = get_unaligned_le16(p); + + CRC32(crc, value, h); + p += sizeof(u16); + } + + if (len & sizeof(u8)) { + u8 value = *p++; + + CRC32(crc, value, b); + } + + return crc; +} +EXPORT_SYMBOL(crc32_le_arch); + +u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) +{ + if (!static_branch_likely(&have_crc32)) + return crc32c_le_base(crc, p, len); + + while (len >= sizeof(u64)) { + u64 value = get_unaligned_le64(p); + + CRC32C(crc, value, d); + p += sizeof(u64); + len -= sizeof(u64); + } + + if (len & sizeof(u32)) { + u32 value = get_unaligned_le32(p); + + CRC32C(crc, value, w); + p += sizeof(u32); + } + + if (len & sizeof(u16)) { + u16 value = get_unaligned_le16(p); + + CRC32C(crc, value, h); + p += sizeof(u16); + } + + if (len & sizeof(u8)) { + u8 value = *p++; + + CRC32C(crc, value, b); + } + + return crc; +} +EXPORT_SYMBOL(crc32c_le_arch); + +u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) +{ + return crc32_be_base(crc, p, len); +} +EXPORT_SYMBOL(crc32_be_arch); + +static int __init crc32_loongarch_init(void) +{ + if (cpu_has_crc32) + static_branch_enable(&have_crc32); + return 0; +} +arch_initcall(crc32_loongarch_init); + +static void __exit crc32_loongarch_exit(void) +{ +} +module_exit(crc32_loongarch_exit); + +u32 crc32_optimizations(void) +{ + if (static_key_enabled(&have_crc32)) + return CRC32_LE_OPTIMIZATION | CRC32C_OPTIMIZATION; + return 0; +} +EXPORT_SYMBOL(crc32_optimizations); + +MODULE_AUTHOR("Min Zhou <zhoumin@loongson.cn>"); +MODULE_AUTHOR("Huacai Chen <chenhuacai@loongson.cn>"); +MODULE_DESCRIPTION("CRC32 and CRC32C using LoongArch crc* instructions"); +MODULE_LICENSE("GPL v2"); diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index c705247e7b5b..e5acb7b52550 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -606,7 +606,6 @@ CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set CONFIG_PRIME_NUMBERS=m -CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m CONFIG_GLOB_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 6d62b9187a58..89fc22636bb5 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -563,7 +563,6 @@ CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set CONFIG_PRIME_NUMBERS=m -CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m CONFIG_GLOB_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index c3c644df852d..9dfba6488b72 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -583,7 +583,6 @@ CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set CONFIG_PRIME_NUMBERS=m -CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m CONFIG_GLOB_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 20261f819691..5250447e249b 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -555,7 +555,6 @@ CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set CONFIG_PRIME_NUMBERS=m -CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m CONFIG_GLOB_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index ce4fe93a0f70..0f99307f9caf 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -565,7 +565,6 @@ CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set CONFIG_PRIME_NUMBERS=m -CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m CONFIG_GLOB_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 040ae75f47c3..36e5f493654a 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -582,7 +582,6 @@ CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set CONFIG_PRIME_NUMBERS=m -CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m CONFIG_GLOB_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 20d877cb4e30..382b0e2e20dc 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -669,7 +669,6 @@ CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set CONFIG_PRIME_NUMBERS=m -CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m CONFIG_GLOB_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 5e1c8d0d3da5..d91ec0cc23c8 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -555,7 +555,6 @@ CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set CONFIG_PRIME_NUMBERS=m -CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m CONFIG_GLOB_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 5d1409e6a137..4cde4ee78a85 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -556,7 +556,6 @@ CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set CONFIG_PRIME_NUMBERS=m -CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m CONFIG_GLOB_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index e4c30e2b9bbb..d49c01e591e1 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -572,7 +572,6 @@ CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set CONFIG_PRIME_NUMBERS=m -CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m CONFIG_GLOB_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 980843a9ea1e..468f2545b93e 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -553,7 +553,6 @@ CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set CONFIG_PRIME_NUMBERS=m -CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m CONFIG_GLOB_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 38681cc6b598..ea38b1f2453d 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -553,7 +553,6 @@ CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set CONFIG_PRIME_NUMBERS=m -CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m CONFIG_GLOB_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 467b10f4361a..a33f05e1ad6d 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1995,11 +1995,11 @@ config CPU_MIPSR5 config CPU_MIPSR6 bool default y if CPU_MIPS32_R6 || CPU_MIPS64_R6 + select ARCH_HAS_CRC32 select CPU_HAS_RIXI select CPU_HAS_DIEI if !CPU_DIEI_BROKEN select HAVE_ARCH_BITREVERSE select MIPS_ASID_BITS_VARIABLE - select MIPS_CRC_SUPPORT select MIPS_SPRAM config TARGET_ISA_REV @@ -2475,9 +2475,6 @@ config MIPS_ASID_BITS config MIPS_ASID_BITS_VARIABLE bool -config MIPS_CRC_SUPPORT - bool - # R4600 erratum. Due to the lack of errata information the exact # technical details aren't known. I've experimentally found that disabling # interrupts during indexed I-cache flushes seems to be sufficient to deal diff --git a/arch/mips/configs/eyeq5_defconfig b/arch/mips/configs/eyeq5_defconfig index ae9a09b16e40..ff7af5dc6d9d 100644 --- a/arch/mips/configs/eyeq5_defconfig +++ b/arch/mips/configs/eyeq5_defconfig @@ -99,7 +99,6 @@ CONFIG_NFS_V4=y CONFIG_NFS_V4_1=y CONFIG_NFS_V4_2=y CONFIG_ROOT_NFS=y -CONFIG_CRYPTO_CRC32_MIPS=y CONFIG_FRAME_WARN=1024 CONFIG_DEBUG_FS=y # CONFIG_RCU_TRACE is not set diff --git a/arch/mips/configs/eyeq6_defconfig b/arch/mips/configs/eyeq6_defconfig index 6597d5e88b33..0afbb45a78e8 100644 --- a/arch/mips/configs/eyeq6_defconfig +++ b/arch/mips/configs/eyeq6_defconfig @@ -102,7 +102,6 @@ CONFIG_NFS_V4=y CONFIG_NFS_V4_1=y CONFIG_NFS_V4_2=y CONFIG_ROOT_NFS=y -CONFIG_CRYPTO_CRC32_MIPS=y CONFIG_FRAME_WARN=1024 CONFIG_DEBUG_FS=y # CONFIG_RCU_TRACE is not set diff --git a/arch/mips/configs/generic/32r6.config b/arch/mips/configs/generic/32r6.config index 1a5d5ea4ab2b..ca606e71f4d0 100644 --- a/arch/mips/configs/generic/32r6.config +++ b/arch/mips/configs/generic/32r6.config @@ -1,4 +1,2 @@ CONFIG_CPU_MIPS32_R6=y CONFIG_HIGHMEM=y - -CONFIG_CRYPTO_CRC32_MIPS=y diff --git a/arch/mips/configs/generic/64r6.config b/arch/mips/configs/generic/64r6.config index 63b4e95f303d..23a300914957 100644 --- a/arch/mips/configs/generic/64r6.config +++ b/arch/mips/configs/generic/64r6.config @@ -4,5 +4,4 @@ CONFIG_MIPS32_O32=y CONFIG_MIPS32_N32=y CONFIG_CPU_HAS_MSA=y -CONFIG_CRYPTO_CRC32_MIPS=y CONFIG_VIRTUALIZATION=y diff --git a/arch/mips/crypto/Kconfig b/arch/mips/crypto/Kconfig index 9003a5c1e879..7decd40c4e20 100644 --- a/arch/mips/crypto/Kconfig +++ b/arch/mips/crypto/Kconfig @@ -2,15 +2,6 @@ menu "Accelerated Cryptographic Algorithms for CPU (mips)" -config CRYPTO_CRC32_MIPS - tristate "CRC32c and CRC32" - depends on MIPS_CRC_SUPPORT - select CRYPTO_HASH - help - CRC32c and CRC32 CRC algorithms - - Architecture: mips - config CRYPTO_POLY1305_MIPS tristate "Hash functions: Poly1305" depends on MIPS diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile index 5e4105cccf9f..fddc88281412 100644 --- a/arch/mips/crypto/Makefile +++ b/arch/mips/crypto/Makefile @@ -3,8 +3,6 @@ # Makefile for MIPS crypto files.. # -obj-$(CONFIG_CRYPTO_CRC32_MIPS) += crc32-mips.o - obj-$(CONFIG_CRYPTO_CHACHA_MIPS) += chacha-mips.o chacha-mips-y := chacha-core.o chacha-glue.o AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots diff --git a/arch/mips/crypto/crc32-mips.c b/arch/mips/crypto/crc32-mips.c deleted file mode 100644 index 90eacf00cfc3..000000000000 --- a/arch/mips/crypto/crc32-mips.c +++ /dev/null @@ -1,354 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * crc32-mips.c - CRC32 and CRC32C using optional MIPSr6 instructions - * - * Module based on arm64/crypto/crc32-arm.c - * - * Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org> - * Copyright (C) 2018 MIPS Tech, LLC - */ - -#include <linux/cpufeature.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/string.h> -#include <asm/mipsregs.h> -#include <linux/unaligned.h> - -#include <crypto/internal/hash.h> - -enum crc_op_size { - b, h, w, d, -}; - -enum crc_type { - crc32, - crc32c, -}; - -#ifndef TOOLCHAIN_SUPPORTS_CRC -#define _ASM_SET_CRC(OP, SZ, TYPE) \ -_ASM_MACRO_3R(OP, rt, rs, rt2, \ - ".ifnc \\rt, \\rt2\n\t" \ - ".error \"invalid operands \\\"" #OP " \\rt,\\rs,\\rt2\\\"\"\n\t" \ - ".endif\n\t" \ - _ASM_INSN_IF_MIPS(0x7c00000f | (__rt << 16) | (__rs << 21) | \ - ((SZ) << 6) | ((TYPE) << 8)) \ - _ASM_INSN32_IF_MM(0x00000030 | (__rs << 16) | (__rt << 21) | \ - ((SZ) << 14) | ((TYPE) << 3))) -#define _ASM_UNSET_CRC(op, SZ, TYPE) ".purgem " #op "\n\t" -#else /* !TOOLCHAIN_SUPPORTS_CRC */ -#define _ASM_SET_CRC(op, SZ, TYPE) ".set\tcrc\n\t" -#define _ASM_UNSET_CRC(op, SZ, TYPE) -#endif - -#define __CRC32(crc, value, op, SZ, TYPE) \ -do { \ - __asm__ __volatile__( \ - ".set push\n\t" \ - _ASM_SET_CRC(op, SZ, TYPE) \ - #op " %0, %1, %0\n\t" \ - _ASM_UNSET_CRC(op, SZ, TYPE) \ - ".set pop" \ - : "+r" (crc) \ - : "r" (value)); \ -} while (0) - -#define _CRC32_crc32b(crc, value) __CRC32(crc, value, crc32b, 0, 0) -#define _CRC32_crc32h(crc, value) __CRC32(crc, value, crc32h, 1, 0) -#define _CRC32_crc32w(crc, value) __CRC32(crc, value, crc32w, 2, 0) -#define _CRC32_crc32d(crc, value) __CRC32(crc, value, crc32d, 3, 0) -#define _CRC32_crc32cb(crc, value) __CRC32(crc, value, crc32cb, 0, 1) -#define _CRC32_crc32ch(crc, value) __CRC32(crc, value, crc32ch, 1, 1) -#define _CRC32_crc32cw(crc, value) __CRC32(crc, value, crc32cw, 2, 1) -#define _CRC32_crc32cd(crc, value) __CRC32(crc, value, crc32cd, 3, 1) - -#define _CRC32(crc, value, size, op) \ - _CRC32_##op##size(crc, value) - -#define CRC32(crc, value, size) \ - _CRC32(crc, value, size, crc32) - -#define CRC32C(crc, value, size) \ - _CRC32(crc, value, size, crc32c) - -static u32 crc32_mips_le_hw(u32 crc_, const u8 *p, unsigned int len) -{ - u32 crc = crc_; - - if (IS_ENABLED(CONFIG_64BIT)) { - for (; len >= sizeof(u64); p += sizeof(u64), len -= sizeof(u64)) { - u64 value = get_unaligned_le64(p); - - CRC32(crc, value, d); - } - - if (len & sizeof(u32)) { - u32 value = get_unaligned_le32(p); - - CRC32(crc, value, w); - p += sizeof(u32); - } - } else { - for (; len >= sizeof(u32); len -= sizeof(u32)) { - u32 value = get_unaligned_le32(p); - - CRC32(crc, value, w); - p += sizeof(u32); - } - } - - if (len & sizeof(u16)) { - u16 value = get_unaligned_le16(p); - - CRC32(crc, value, h); - p += sizeof(u16); - } - - if (len & sizeof(u8)) { - u8 value = *p++; - - CRC32(crc, value, b); - } - - return crc; -} - -static u32 crc32c_mips_le_hw(u32 crc_, const u8 *p, unsigned int len) -{ - u32 crc = crc_; - - if (IS_ENABLED(CONFIG_64BIT)) { - for (; len >= sizeof(u64); p += sizeof(u64), len -= sizeof(u64)) { - u64 value = get_unaligned_le64(p); - - CRC32C(crc, value, d); - } - - if (len & sizeof(u32)) { - u32 value = get_unaligned_le32(p); - - CRC32C(crc, value, w); - p += sizeof(u32); - } - } else { - for (; len >= sizeof(u32); len -= sizeof(u32)) { - u32 value = get_unaligned_le32(p); - - CRC32C(crc, value, w); - p += sizeof(u32); - } - } - - if (len & sizeof(u16)) { - u16 value = get_unaligned_le16(p); - - CRC32C(crc, value, h); - p += sizeof(u16); - } - - if (len & sizeof(u8)) { - u8 value = *p++; - - CRC32C(crc, value, b); - } - return crc; -} - -#define CHKSUM_BLOCK_SIZE 1 -#define CHKSUM_DIGEST_SIZE 4 - -struct chksum_ctx { - u32 key; -}; - -struct chksum_desc_ctx { - u32 crc; -}; - -static int chksum_init(struct shash_desc *desc) -{ - struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm); - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - ctx->crc = mctx->key; - - return 0; -} - -/* - * Setting the seed allows arbitrary accumulators and flexible XOR policy - * If your algorithm starts with ~0, then XOR with ~0 before you set - * the seed. - */ -static int chksum_setkey(struct crypto_shash *tfm, const u8 *key, - unsigned int keylen) -{ - struct chksum_ctx *mctx = crypto_shash_ctx(tfm); - - if (keylen != sizeof(mctx->key)) - return -EINVAL; - mctx->key = get_unaligned_le32(key); - return 0; -} - -static int chksum_update(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - ctx->crc = crc32_mips_le_hw(ctx->crc, data, length); - return 0; -} - -static int chksumc_update(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - ctx->crc = crc32c_mips_le_hw(ctx->crc, data, length); - return 0; -} - -static int chksum_final(struct shash_desc *desc, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - put_unaligned_le32(ctx->crc, out); - return 0; -} - -static int chksumc_final(struct shash_desc *desc, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - put_unaligned_le32(~ctx->crc, out); - return 0; -} - -static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out) -{ - put_unaligned_le32(crc32_mips_le_hw(crc, data, len), out); - return 0; -} - -static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *out) -{ - put_unaligned_le32(~crc32c_mips_le_hw(crc, data, len), out); - return 0; -} - -static int chksum_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - return __chksum_finup(ctx->crc, data, len, out); -} - -static int chksumc_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - return __chksumc_finup(ctx->crc, data, len, out); -} - -static int chksum_digest(struct shash_desc *desc, const u8 *data, - unsigned int length, u8 *out) -{ - struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm); - - return __chksum_finup(mctx->key, data, length, out); -} - -static int chksumc_digest(struct shash_desc *desc, const u8 *data, - unsigned int length, u8 *out) -{ - struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm); - - return __chksumc_finup(mctx->key, data, length, out); -} - -static int chksum_cra_init(struct crypto_tfm *tfm) -{ - struct chksum_ctx *mctx = crypto_tfm_ctx(tfm); - - mctx->key = ~0; - return 0; -} - -static struct shash_alg crc32_alg = { - .digestsize = CHKSUM_DIGEST_SIZE, - .setkey = chksum_setkey, - .init = chksum_init, - .update = chksum_update, - .final = chksum_final, - .finup = chksum_finup, - .digest = chksum_digest, - .descsize = sizeof(struct chksum_desc_ctx), - .base = { - .cra_name = "crc32", - .cra_driver_name = "crc32-mips-hw", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize = CHKSUM_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct chksum_ctx), - .cra_module = THIS_MODULE, - .cra_init = chksum_cra_init, - } -}; - -static struct shash_alg crc32c_alg = { - .digestsize = CHKSUM_DIGEST_SIZE, - .setkey = chksum_setkey, - .init = chksum_init, - .update = chksumc_update, - .final = chksumc_final, - .finup = chksumc_finup, - .digest = chksumc_digest, - .descsize = sizeof(struct chksum_desc_ctx), - .base = { - .cra_name = "crc32c", - .cra_driver_name = "crc32c-mips-hw", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize = CHKSUM_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct chksum_ctx), - .cra_module = THIS_MODULE, - .cra_init = chksum_cra_init, - } -}; - -static int __init crc32_mod_init(void) -{ - int err; - - err = crypto_register_shash(&crc32_alg); - - if (err) - return err; - - err = crypto_register_shash(&crc32c_alg); - - if (err) { - crypto_unregister_shash(&crc32_alg); - return err; - } - - return 0; -} - -static void __exit crc32_mod_exit(void) -{ - crypto_unregister_shash(&crc32_alg); - crypto_unregister_shash(&crc32c_alg); -} - -MODULE_AUTHOR("Marcin Nowakowski <marcin.nowakowski@mips.com"); -MODULE_DESCRIPTION("CRC32 and CRC32C using optional MIPS instructions"); -MODULE_LICENSE("GPL v2"); - -module_cpu_feature_match(MIPS_CRC32, crc32_mod_init); -module_exit(crc32_mod_exit); diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile index 5d5b993cbc2b..9c024e6d5e54 100644 --- a/arch/mips/lib/Makefile +++ b/arch/mips/lib/Makefile @@ -14,5 +14,7 @@ lib-$(CONFIG_GENERIC_CSUM) := $(filter-out csum_partial.o, $(lib-y)) obj-$(CONFIG_CPU_GENERIC_DUMP_TLB) += dump_tlb.o obj-$(CONFIG_CPU_R3000) += r3k_dump_tlb.o +obj-$(CONFIG_CRC32_ARCH) += crc32-mips.o + # libgcc-style stuff needed in the kernel obj-y += bswapsi.o bswapdi.o multi3.o diff --git a/arch/mips/lib/crc32-mips.c b/arch/mips/lib/crc32-mips.c new file mode 100644 index 000000000000..083e5d693a16 --- /dev/null +++ b/arch/mips/lib/crc32-mips.c @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * crc32-mips.c - CRC32 and CRC32C using optional MIPSr6 instructions + * + * Module based on arm64/crypto/crc32-arm.c + * + * Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org> + * Copyright (C) 2018 MIPS Tech, LLC + */ + +#include <linux/cpufeature.h> +#include <linux/crc32.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <asm/mipsregs.h> +#include <linux/unaligned.h> + +enum crc_op_size { + b, h, w, d, +}; + +enum crc_type { + crc32, + crc32c, +}; + +#ifndef TOOLCHAIN_SUPPORTS_CRC +#define _ASM_SET_CRC(OP, SZ, TYPE) \ +_ASM_MACRO_3R(OP, rt, rs, rt2, \ + ".ifnc \\rt, \\rt2\n\t" \ + ".error \"invalid operands \\\"" #OP " \\rt,\\rs,\\rt2\\\"\"\n\t" \ + ".endif\n\t" \ + _ASM_INSN_IF_MIPS(0x7c00000f | (__rt << 16) | (__rs << 21) | \ + ((SZ) << 6) | ((TYPE) << 8)) \ + _ASM_INSN32_IF_MM(0x00000030 | (__rs << 16) | (__rt << 21) | \ + ((SZ) << 14) | ((TYPE) << 3))) +#define _ASM_UNSET_CRC(op, SZ, TYPE) ".purgem " #op "\n\t" +#else /* !TOOLCHAIN_SUPPORTS_CRC */ +#define _ASM_SET_CRC(op, SZ, TYPE) ".set\tcrc\n\t" +#define _ASM_UNSET_CRC(op, SZ, TYPE) +#endif + +#define __CRC32(crc, value, op, SZ, TYPE) \ +do { \ + __asm__ __volatile__( \ + ".set push\n\t" \ + _ASM_SET_CRC(op, SZ, TYPE) \ + #op " %0, %1, %0\n\t" \ + _ASM_UNSET_CRC(op, SZ, TYPE) \ + ".set pop" \ + : "+r" (crc) \ + : "r" (value)); \ +} while (0) + +#define _CRC32_crc32b(crc, value) __CRC32(crc, value, crc32b, 0, 0) +#define _CRC32_crc32h(crc, value) __CRC32(crc, value, crc32h, 1, 0) +#define _CRC32_crc32w(crc, value) __CRC32(crc, value, crc32w, 2, 0) +#define _CRC32_crc32d(crc, value) __CRC32(crc, value, crc32d, 3, 0) +#define _CRC32_crc32cb(crc, value) __CRC32(crc, value, crc32cb, 0, 1) +#define _CRC32_crc32ch(crc, value) __CRC32(crc, value, crc32ch, 1, 1) +#define _CRC32_crc32cw(crc, value) __CRC32(crc, value, crc32cw, 2, 1) +#define _CRC32_crc32cd(crc, value) __CRC32(crc, value, crc32cd, 3, 1) + +#define _CRC32(crc, value, size, op) \ + _CRC32_##op##size(crc, value) + +#define CRC32(crc, value, size) \ + _CRC32(crc, value, size, crc32) + +#define CRC32C(crc, value, size) \ + _CRC32(crc, value, size, crc32c) + +static DEFINE_STATIC_KEY_FALSE(have_crc32); + +u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) +{ + if (!static_branch_likely(&have_crc32)) + return crc32_le_base(crc, p, len); + + if (IS_ENABLED(CONFIG_64BIT)) { + for (; len >= sizeof(u64); p += sizeof(u64), len -= sizeof(u64)) { + u64 value = get_unaligned_le64(p); + + CRC32(crc, value, d); + } + + if (len & sizeof(u32)) { + u32 value = get_unaligned_le32(p); + + CRC32(crc, value, w); + p += sizeof(u32); + } + } else { + for (; len >= sizeof(u32); len -= sizeof(u32)) { + u32 value = get_unaligned_le32(p); + + CRC32(crc, value, w); + p += sizeof(u32); + } + } + + if (len & sizeof(u16)) { + u16 value = get_unaligned_le16(p); + + CRC32(crc, value, h); + p += sizeof(u16); + } + + if (len & sizeof(u8)) { + u8 value = *p++; + + CRC32(crc, value, b); + } + + return crc; +} +EXPORT_SYMBOL(crc32_le_arch); + +u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) +{ + if (!static_branch_likely(&have_crc32)) + return crc32c_le_base(crc, p, len); + + if (IS_ENABLED(CONFIG_64BIT)) { + for (; len >= sizeof(u64); p += sizeof(u64), len -= sizeof(u64)) { + u64 value = get_unaligned_le64(p); + + CRC32C(crc, value, d); + } + + if (len & sizeof(u32)) { + u32 value = get_unaligned_le32(p); + + CRC32C(crc, value, w); + p += sizeof(u32); + } + } else { + for (; len >= sizeof(u32); len -= sizeof(u32)) { + u32 value = get_unaligned_le32(p); + + CRC32C(crc, value, w); + p += sizeof(u32); + } + } + + if (len & sizeof(u16)) { + u16 value = get_unaligned_le16(p); + + CRC32C(crc, value, h); + p += sizeof(u16); + } + + if (len & sizeof(u8)) { + u8 value = *p++; + + CRC32C(crc, value, b); + } + return crc; +} +EXPORT_SYMBOL(crc32c_le_arch); + +u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) +{ + return crc32_be_base(crc, p, len); +} +EXPORT_SYMBOL(crc32_be_arch); + +static int __init crc32_mips_init(void) +{ + if (cpu_have_feature(cpu_feature(MIPS_CRC32))) + static_branch_enable(&have_crc32); + return 0; +} +arch_initcall(crc32_mips_init); + +static void __exit crc32_mips_exit(void) +{ +} +module_exit(crc32_mips_exit); + +u32 crc32_optimizations(void) +{ + if (static_key_enabled(&have_crc32)) + return CRC32_LE_OPTIMIZATION | CRC32C_OPTIMIZATION; + return 0; +} +EXPORT_SYMBOL(crc32_optimizations); + +MODULE_AUTHOR("Marcin Nowakowski <marcin.nowakowski@mips.com"); +MODULE_DESCRIPTION("CRC32 and CRC32C using optional MIPS instructions"); +MODULE_LICENSE("GPL v2"); diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 34b965fa45ac..424f188e62d9 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -127,6 +127,8 @@ config PPC select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_HAS_COPY_MC if PPC64 + select ARCH_HAS_CRC32 if PPC64 && ALTIVEC + select ARCH_HAS_CRC_T10DIF if PPC64 && ALTIVEC select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index ee84ade7a033..6b6d7467fecf 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -320,8 +320,6 @@ CONFIG_XMON=y CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_CRC32C_VPMSUM=m -CONFIG_CRYPTO_CRCT10DIF_VPMSUM=m CONFIG_CRYPTO_MD5_PPC=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA1_PPC=m diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index f39c0d000c43..465eb96c755e 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -389,9 +389,6 @@ CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_LZO=m -CONFIG_CRYPTO_CRC32C_VPMSUM=m -CONFIG_CRYPTO_CRCT10DIF_VPMSUM=m -CONFIG_CRYPTO_VPMSUM_TESTER=m CONFIG_CRYPTO_MD5_PPC=m CONFIG_CRYPTO_SHA1_PPC=m CONFIG_CRYPTO_AES_GCM_P10=m diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig index 951a43726461..5b315e9756b3 100644 --- a/arch/powerpc/crypto/Kconfig +++ b/arch/powerpc/crypto/Kconfig @@ -13,39 +13,6 @@ config CRYPTO_CURVE25519_PPC64 Architecture: PowerPC64 - Little-endian -config CRYPTO_CRC32C_VPMSUM - tristate "CRC32c" - depends on PPC64 && ALTIVEC - select CRYPTO_HASH - select CRC32 - help - CRC32c CRC algorithm with the iSCSI polynomial (RFC 3385 and RFC 3720) - - Architecture: powerpc64 using - - AltiVec extensions - - Enable on POWER8 and newer processors for improved performance. - -config CRYPTO_CRCT10DIF_VPMSUM - tristate "CRC32T10DIF" - depends on PPC64 && ALTIVEC && CRC_T10DIF - select CRYPTO_HASH - help - CRC16 CRC algorithm used for the T10 (SCSI) Data Integrity Field (DIF) - - Architecture: powerpc64 using - - AltiVec extensions - - Enable on POWER8 and newer processors for improved performance. - -config CRYPTO_VPMSUM_TESTER - tristate "CRC32c and CRC32T10DIF hardware acceleration tester" - depends on CRYPTO_CRCT10DIF_VPMSUM && CRYPTO_CRC32C_VPMSUM - help - Stress test for CRC32c and CRCT10DIF algorithms implemented with - powerpc64 AltiVec extensions (POWER8 vpmsum instructions). - Unless you are testing these algorithms, you don't need this. - config CRYPTO_MD5_PPC tristate "Digests: MD5" depends on PPC diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile index 59808592f0a1..9b38f4a7bc15 100644 --- a/arch/powerpc/crypto/Makefile +++ b/arch/powerpc/crypto/Makefile @@ -10,9 +10,6 @@ obj-$(CONFIG_CRYPTO_MD5_PPC) += md5-ppc.o obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o obj-$(CONFIG_CRYPTO_SHA1_PPC_SPE) += sha1-ppc-spe.o obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o -obj-$(CONFIG_CRYPTO_CRC32C_VPMSUM) += crc32c-vpmsum.o -obj-$(CONFIG_CRYPTO_CRCT10DIF_VPMSUM) += crct10dif-vpmsum.o -obj-$(CONFIG_CRYPTO_VPMSUM_TESTER) += crc-vpmsum_test.o obj-$(CONFIG_CRYPTO_AES_GCM_P10) += aes-gcm-p10-crypto.o obj-$(CONFIG_CRYPTO_CHACHA20_P10) += chacha-p10-crypto.o obj-$(CONFIG_CRYPTO_POLY1305_P10) += poly1305-p10-crypto.o @@ -24,8 +21,6 @@ md5-ppc-y := md5-asm.o md5-glue.o sha1-powerpc-y := sha1-powerpc-asm.o sha1.o sha1-ppc-spe-y := sha1-spe-asm.o sha1-spe-glue.o sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o -crc32c-vpmsum-y := crc32c-vpmsum_asm.o crc32c-vpmsum_glue.o -crct10dif-vpmsum-y := crct10dif-vpmsum_asm.o crct10dif-vpmsum_glue.o aes-gcm-p10-crypto-y := aes-gcm-p10-glue.o aes-gcm-p10.o ghashp10-ppc.o aesp10-ppc.o chacha-p10-crypto-y := chacha-p10-glue.o chacha-p10le-8x.o poly1305-p10-crypto-y := poly1305-p10-glue.o poly1305-p10le_64.o diff --git a/arch/powerpc/crypto/crc-vpmsum_test.c b/arch/powerpc/crypto/crc-vpmsum_test.c deleted file mode 100644 index c61a874a3a5c..000000000000 --- a/arch/powerpc/crypto/crc-vpmsum_test.c +++ /dev/null @@ -1,133 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * CRC vpmsum tester - * Copyright 2017 Daniel Axtens, IBM Corporation. - */ - -#include <linux/crc-t10dif.h> -#include <linux/crc32.h> -#include <crypto/internal/hash.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/random.h> -#include <linux/string.h> -#include <linux/kernel.h> -#include <linux/cpufeature.h> -#include <asm/switch_to.h> - -static unsigned long iterations = 10000; - -#define MAX_CRC_LENGTH 65535 - - -static int __init crc_test_init(void) -{ - u16 crc16 = 0, verify16 = 0; - __le32 verify32le = 0; - unsigned char *data; - u32 verify32 = 0; - unsigned long i; - __le32 crc32; - int ret; - - struct crypto_shash *crct10dif_tfm; - struct crypto_shash *crc32c_tfm; - - if (!cpu_has_feature(CPU_FTR_ARCH_207S)) - return -ENODEV; - - data = kmalloc(MAX_CRC_LENGTH, GFP_KERNEL); - if (!data) - return -ENOMEM; - - crct10dif_tfm = crypto_alloc_shash("crct10dif", 0, 0); - - if (IS_ERR(crct10dif_tfm)) { - pr_err("Error allocating crc-t10dif\n"); - goto free_buf; - } - - crc32c_tfm = crypto_alloc_shash("crc32c", 0, 0); - - if (IS_ERR(crc32c_tfm)) { - pr_err("Error allocating crc32c\n"); - goto free_16; - } - - do { - SHASH_DESC_ON_STACK(crct10dif_shash, crct10dif_tfm); - SHASH_DESC_ON_STACK(crc32c_shash, crc32c_tfm); - - crct10dif_shash->tfm = crct10dif_tfm; - ret = crypto_shash_init(crct10dif_shash); - - if (ret) { - pr_err("Error initing crc-t10dif\n"); - goto free_32; - } - - - crc32c_shash->tfm = crc32c_tfm; - ret = crypto_shash_init(crc32c_shash); - - if (ret) { - pr_err("Error initing crc32c\n"); - goto free_32; - } - - pr_info("crc-vpmsum_test begins, %lu iterations\n", iterations); - for (i=0; i<iterations; i++) { - size_t offset = get_random_u32_below(16); - size_t len = get_random_u32_below(MAX_CRC_LENGTH); - - if (len <= offset) - continue; - get_random_bytes(data, len); - len -= offset; - - crypto_shash_update(crct10dif_shash, data+offset, len); - crypto_shash_final(crct10dif_shash, (u8 *)(&crc16)); - verify16 = crc_t10dif_generic(verify16, data+offset, len); - - - if (crc16 != verify16) { - pr_err("FAILURE in CRC16: got 0x%04x expected 0x%04x (len %lu)\n", - crc16, verify16, len); - break; - } - - crypto_shash_update(crc32c_shash, data+offset, len); - crypto_shash_final(crc32c_shash, (u8 *)(&crc32)); - verify32 = le32_to_cpu(verify32le); - verify32le = ~cpu_to_le32(__crc32c_le(~verify32, data+offset, len)); - if (crc32 != verify32le) { - pr_err("FAILURE in CRC32: got 0x%08x expected 0x%08x (len %lu)\n", - crc32, verify32, len); - break; - } - cond_resched(); - } - pr_info("crc-vpmsum_test done, completed %lu iterations\n", i); - } while (0); - -free_32: - crypto_free_shash(crc32c_tfm); - -free_16: - crypto_free_shash(crct10dif_tfm); - -free_buf: - kfree(data); - - return 0; -} - -static void __exit crc_test_exit(void) {} - -module_init(crc_test_init); -module_exit(crc_test_exit); -module_param(iterations, long, 0400); - -MODULE_AUTHOR("Daniel Axtens <dja@axtens.net>"); -MODULE_DESCRIPTION("Vector polynomial multiply-sum CRC tester"); -MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c deleted file mode 100644 index 63760b7dbb76..000000000000 --- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c +++ /dev/null @@ -1,173 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -#include <linux/crc32.h> -#include <crypto/internal/hash.h> -#include <crypto/internal/simd.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/string.h> -#include <linux/kernel.h> -#include <linux/cpufeature.h> -#include <asm/simd.h> -#include <asm/switch_to.h> - -#define CHKSUM_BLOCK_SIZE 1 -#define CHKSUM_DIGEST_SIZE 4 - -#define VMX_ALIGN 16 -#define VMX_ALIGN_MASK (VMX_ALIGN-1) - -#define VECTOR_BREAKPOINT 512 - -u32 __crc32c_vpmsum(u32 crc, unsigned char const *p, size_t len); - -static u32 crc32c_vpmsum(u32 crc, unsigned char const *p, size_t len) -{ - unsigned int prealign; - unsigned int tail; - - if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || !crypto_simd_usable()) - return __crc32c_le(crc, p, len); - - if ((unsigned long)p & VMX_ALIGN_MASK) { - prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK); - crc = __crc32c_le(crc, p, prealign); - len -= prealign; - p += prealign; - } - - if (len & ~VMX_ALIGN_MASK) { - preempt_disable(); - pagefault_disable(); - enable_kernel_altivec(); - crc = __crc32c_vpmsum(crc, p, len & ~VMX_ALIGN_MASK); - disable_kernel_altivec(); - pagefault_enable(); - preempt_enable(); - } - - tail = len & VMX_ALIGN_MASK; - if (tail) { - p += len & ~VMX_ALIGN_MASK; - crc = __crc32c_le(crc, p, tail); - } - - return crc; -} - -static int crc32c_vpmsum_cra_init(struct crypto_tfm *tfm) -{ - u32 *key = crypto_tfm_ctx(tfm); - - *key = ~0; - - return 0; -} - -/* - * Setting the seed allows arbitrary accumulators and flexible XOR policy - * If your algorithm starts with ~0, then XOR with ~0 before you set - * the seed. - */ -static int crc32c_vpmsum_setkey(struct crypto_shash *hash, const u8 *key, - unsigned int keylen) -{ - u32 *mctx = crypto_shash_ctx(hash); - - if (keylen != sizeof(u32)) - return -EINVAL; - *mctx = le32_to_cpup((__le32 *)key); - return 0; -} - -static int crc32c_vpmsum_init(struct shash_desc *desc) -{ - u32 *mctx = crypto_shash_ctx(desc->tfm); - u32 *crcp = shash_desc_ctx(desc); - - *crcp = *mctx; - - return 0; -} - -static int crc32c_vpmsum_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - u32 *crcp = shash_desc_ctx(desc); - - *crcp = crc32c_vpmsum(*crcp, data, len); - - return 0; -} - -static int __crc32c_vpmsum_finup(u32 *crcp, const u8 *data, unsigned int len, - u8 *out) -{ - *(__le32 *)out = ~cpu_to_le32(crc32c_vpmsum(*crcp, data, len)); - - return 0; -} - -static int crc32c_vpmsum_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32c_vpmsum_finup(shash_desc_ctx(desc), data, len, out); -} - -static int crc32c_vpmsum_final(struct shash_desc *desc, u8 *out) -{ - u32 *crcp = shash_desc_ctx(desc); - - *(__le32 *)out = ~cpu_to_le32p(crcp); - - return 0; -} - -static int crc32c_vpmsum_digest(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32c_vpmsum_finup(crypto_shash_ctx(desc->tfm), data, len, - out); -} - -static struct shash_alg alg = { - .setkey = crc32c_vpmsum_setkey, - .init = crc32c_vpmsum_init, - .update = crc32c_vpmsum_update, - .final = crc32c_vpmsum_final, - .finup = crc32c_vpmsum_finup, - .digest = crc32c_vpmsum_digest, - .descsize = sizeof(u32), - .digestsize = CHKSUM_DIGEST_SIZE, - .base = { - .cra_name = "crc32c", - .cra_driver_name = "crc32c-vpmsum", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize = CHKSUM_BLOCK_SIZE, - .cra_ctxsize = sizeof(u32), - .cra_module = THIS_MODULE, - .cra_init = crc32c_vpmsum_cra_init, - } -}; - -static int __init crc32c_vpmsum_mod_init(void) -{ - if (!cpu_has_feature(CPU_FTR_ARCH_207S)) - return -ENODEV; - - return crypto_register_shash(&alg); -} - -static void __exit crc32c_vpmsum_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, crc32c_vpmsum_mod_init); -module_exit(crc32c_vpmsum_mod_fini); - -MODULE_AUTHOR("Anton Blanchard <anton@samba.org>"); -MODULE_DESCRIPTION("CRC32C using vector polynomial multiply-sum instructions"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_CRYPTO("crc32c"); -MODULE_ALIAS_CRYPTO("crc32c-vpmsum"); diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index f14ecab674a3..dd8a4b52a0cc 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -78,4 +78,10 @@ CFLAGS_xor_vmx.o += -mhard-float -maltivec $(call cc-option,-mabi=altivec) # Enable <altivec.h> CFLAGS_xor_vmx.o += -isystem $(shell $(CC) -print-file-name=include) +obj-$(CONFIG_CRC32_ARCH) += crc32-powerpc.o +crc32-powerpc-y := crc32-glue.o crc32c-vpmsum_asm.o + +obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-powerpc.o +crc-t10dif-powerpc-y := crc-t10dif-glue.o crct10dif-vpmsum_asm.o + obj-$(CONFIG_PPC64) += $(obj64-y) diff --git a/arch/powerpc/crypto/crct10dif-vpmsum_glue.c b/arch/powerpc/lib/crc-t10dif-glue.c index 1dc8b6915178..730850dbc51d 100644 --- a/arch/powerpc/crypto/crct10dif-vpmsum_glue.c +++ b/arch/powerpc/lib/crc-t10dif-glue.c @@ -7,7 +7,6 @@ */ #include <linux/crc-t10dif.h> -#include <crypto/internal/hash.h> #include <crypto/internal/simd.h> #include <linux/init.h> #include <linux/module.h> @@ -22,15 +21,18 @@ #define VECTOR_BREAKPOINT 64 +static DEFINE_STATIC_KEY_FALSE(have_vec_crypto); + u32 __crct10dif_vpmsum(u32 crc, unsigned char const *p, size_t len); -static u16 crct10dif_vpmsum(u16 crci, unsigned char const *p, size_t len) +u16 crc_t10dif_arch(u16 crci, const u8 *p, size_t len) { unsigned int prealign; unsigned int tail; u32 crc = crci; - if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || !crypto_simd_usable()) + if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || + !static_branch_likely(&have_vec_crypto) || !crypto_simd_usable()) return crc_t10dif_generic(crc, p, len); if ((unsigned long)p & VMX_ALIGN_MASK) { @@ -60,67 +62,28 @@ static u16 crct10dif_vpmsum(u16 crci, unsigned char const *p, size_t len) return crc & 0xffff; } +EXPORT_SYMBOL(crc_t10dif_arch); -static int crct10dif_vpmsum_init(struct shash_desc *desc) -{ - u16 *crc = shash_desc_ctx(desc); - - *crc = 0; - return 0; -} - -static int crct10dif_vpmsum_update(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - u16 *crc = shash_desc_ctx(desc); - - *crc = crct10dif_vpmsum(*crc, data, length); - - return 0; -} - - -static int crct10dif_vpmsum_final(struct shash_desc *desc, u8 *out) +static int __init crc_t10dif_powerpc_init(void) { - u16 *crcp = shash_desc_ctx(desc); - - *(u16 *)out = *crcp; + if (cpu_has_feature(CPU_FTR_ARCH_207S) && + (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_VEC_CRYPTO)) + static_branch_enable(&have_vec_crypto); return 0; } +arch_initcall(crc_t10dif_powerpc_init); -static struct shash_alg alg = { - .init = crct10dif_vpmsum_init, - .update = crct10dif_vpmsum_update, - .final = crct10dif_vpmsum_final, - .descsize = CRC_T10DIF_DIGEST_SIZE, - .digestsize = CRC_T10DIF_DIGEST_SIZE, - .base = { - .cra_name = "crct10dif", - .cra_driver_name = "crct10dif-vpmsum", - .cra_priority = 200, - .cra_blocksize = CRC_T10DIF_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static int __init crct10dif_vpmsum_mod_init(void) +static void __exit crc_t10dif_powerpc_exit(void) { - if (!cpu_has_feature(CPU_FTR_ARCH_207S)) - return -ENODEV; - - return crypto_register_shash(&alg); } +module_exit(crc_t10dif_powerpc_exit); -static void __exit crct10dif_vpmsum_mod_fini(void) +bool crc_t10dif_is_optimized(void) { - crypto_unregister_shash(&alg); + return static_key_enabled(&have_vec_crypto); } - -module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, crct10dif_vpmsum_mod_init); -module_exit(crct10dif_vpmsum_mod_fini); +EXPORT_SYMBOL(crc_t10dif_is_optimized); MODULE_AUTHOR("Daniel Axtens <dja@axtens.net>"); MODULE_DESCRIPTION("CRCT10DIF using vector polynomial multiply-sum instructions"); MODULE_LICENSE("GPL"); -MODULE_ALIAS_CRYPTO("crct10dif"); -MODULE_ALIAS_CRYPTO("crct10dif-vpmsum"); diff --git a/arch/powerpc/lib/crc32-glue.c b/arch/powerpc/lib/crc32-glue.c new file mode 100644 index 000000000000..79cc954f499f --- /dev/null +++ b/arch/powerpc/lib/crc32-glue.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/crc32.h> +#include <crypto/internal/simd.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/cpufeature.h> +#include <asm/simd.h> +#include <asm/switch_to.h> + +#define VMX_ALIGN 16 +#define VMX_ALIGN_MASK (VMX_ALIGN-1) + +#define VECTOR_BREAKPOINT 512 + +static DEFINE_STATIC_KEY_FALSE(have_vec_crypto); + +u32 __crc32c_vpmsum(u32 crc, const u8 *p, size_t len); + +u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) +{ + return crc32_le_base(crc, p, len); +} +EXPORT_SYMBOL(crc32_le_arch); + +u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) +{ + unsigned int prealign; + unsigned int tail; + + if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || + !static_branch_likely(&have_vec_crypto) || !crypto_simd_usable()) + return crc32c_le_base(crc, p, len); + + if ((unsigned long)p & VMX_ALIGN_MASK) { + prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK); + crc = crc32c_le_base(crc, p, prealign); + len -= prealign; + p += prealign; + } + + if (len & ~VMX_ALIGN_MASK) { + preempt_disable(); + pagefault_disable(); + enable_kernel_altivec(); + crc = __crc32c_vpmsum(crc, p, len & ~VMX_ALIGN_MASK); + disable_kernel_altivec(); + pagefault_enable(); + preempt_enable(); + } + + tail = len & VMX_ALIGN_MASK; + if (tail) { + p += len & ~VMX_ALIGN_MASK; + crc = crc32c_le_base(crc, p, tail); + } + + return crc; +} +EXPORT_SYMBOL(crc32c_le_arch); + +u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) +{ + return crc32_be_base(crc, p, len); +} +EXPORT_SYMBOL(crc32_be_arch); + +static int __init crc32_powerpc_init(void) +{ + if (cpu_has_feature(CPU_FTR_ARCH_207S) && + (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_VEC_CRYPTO)) + static_branch_enable(&have_vec_crypto); + return 0; +} +arch_initcall(crc32_powerpc_init); + +static void __exit crc32_powerpc_exit(void) +{ +} +module_exit(crc32_powerpc_exit); + +u32 crc32_optimizations(void) +{ + if (static_key_enabled(&have_vec_crypto)) + return CRC32C_OPTIMIZATION; + return 0; +} +EXPORT_SYMBOL(crc32_optimizations); + +MODULE_AUTHOR("Anton Blanchard <anton@samba.org>"); +MODULE_DESCRIPTION("CRC32C using vector polynomial multiply-sum instructions"); +MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/crypto/crc32-vpmsum_core.S b/arch/powerpc/lib/crc32-vpmsum_core.S index b0f87f595b26..b0f87f595b26 100644 --- a/arch/powerpc/crypto/crc32-vpmsum_core.S +++ b/arch/powerpc/lib/crc32-vpmsum_core.S diff --git a/arch/powerpc/crypto/crc32c-vpmsum_asm.S b/arch/powerpc/lib/crc32c-vpmsum_asm.S index bf442004ea1f..bf442004ea1f 100644 --- a/arch/powerpc/crypto/crc32c-vpmsum_asm.S +++ b/arch/powerpc/lib/crc32c-vpmsum_asm.S diff --git a/arch/powerpc/crypto/crct10dif-vpmsum_asm.S b/arch/powerpc/lib/crct10dif-vpmsum_asm.S index f0b93a0fe168..f0b93a0fe168 100644 --- a/arch/powerpc/crypto/crct10dif-vpmsum_asm.S +++ b/arch/powerpc/lib/crct10dif-vpmsum_asm.S diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index c736e349f222..7612c52e9b1e 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -24,6 +24,7 @@ config RISCV select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_BINFMT_FLAT + select ARCH_HAS_CRC32 if RISCV_ISA_ZBC select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DEBUG_VM_PGTABLE diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 8eec6b69a875..79368a895fee 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -15,8 +15,7 @@ endif lib-$(CONFIG_MMU) += uaccess.o lib-$(CONFIG_64BIT) += tishift.o lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o -lib-$(CONFIG_RISCV_ISA_ZBC) += crc32.o - +obj-$(CONFIG_CRC32_ARCH) += crc32-riscv.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o lib-$(CONFIG_RISCV_ISA_V) += xor.o lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o diff --git a/arch/riscv/lib/crc32.c b/arch/riscv/lib/crc32-riscv.c index d7dc599af3ef..53d56ab422c7 100644 --- a/arch/riscv/lib/crc32.c +++ b/arch/riscv/lib/crc32-riscv.c @@ -14,6 +14,7 @@ #include <linux/crc32poly.h> #include <linux/crc32.h> #include <linux/byteorder/generic.h> +#include <linux/module.h> /* * Refer to https://www.corsix.org/content/barrett-reduction-polynomials for @@ -217,17 +218,19 @@ legacy: return crc_fb(crc, p, len); } -u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len) { return crc32_le_generic(crc, p, len, CRC32_POLY_LE, CRC32_POLY_QT_LE, crc32_le_base); } +EXPORT_SYMBOL(crc32_le_arch); -u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len) { return crc32_le_generic(crc, p, len, CRC32C_POLY_LE, - CRC32C_POLY_QT_LE, __crc32c_le_base); + CRC32C_POLY_QT_LE, crc32c_le_base); } +EXPORT_SYMBOL(crc32c_le_arch); static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p, size_t len) @@ -253,7 +256,7 @@ static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p, return crc; } -u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len) { size_t offset, head_len, tail_len; unsigned long const *p_ul; @@ -292,3 +295,17 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) legacy: return crc32_be_base(crc, p, len); } +EXPORT_SYMBOL(crc32_be_arch); + +u32 crc32_optimizations(void) +{ + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) + return CRC32_LE_OPTIMIZATION | + CRC32_BE_OPTIMIZATION | + CRC32C_OPTIMIZATION; + return 0; +} +EXPORT_SYMBOL(crc32_optimizations); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Accelerated CRC32 implementation with Zbc extension"); diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 5cbbb7daed44..570558a99359 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -72,6 +72,7 @@ config S390 select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 + select ARCH_HAS_CRC32 select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index d8d227ab82de..931c0a974ed8 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -795,7 +795,6 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m -CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_SHA1_S390=m CONFIG_CRYPTO_SHA256_S390=m @@ -818,7 +817,6 @@ CONFIG_SYSTEM_BLACKLIST_KEYRING=y CONFIG_CORDIC=m CONFIG_CRYPTO_LIB_CURVE25519=m CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m -CONFIG_CRC32_SELFTEST=y CONFIG_CRC4=m CONFIG_CRC7=m CONFIG_CRC8=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 6c2f2bb4fbf8..d521aabc31d7 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -782,7 +782,6 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m -CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_SHA1_S390=m CONFIG_CRYPTO_SHA256_S390=m diff --git a/arch/s390/crypto/Kconfig b/arch/s390/crypto/Kconfig index d3eb3a233693..b760232537f1 100644 --- a/arch/s390/crypto/Kconfig +++ b/arch/s390/crypto/Kconfig @@ -2,18 +2,6 @@ menu "Accelerated Cryptographic Algorithms for CPU (s390)" -config CRYPTO_CRC32_S390 - tristate "CRC32c and CRC32" - depends on S390 - select CRYPTO_HASH - select CRC32 - help - CRC32c and CRC32 CRC algorithms - - Architecture: s390 - - It is available with IBM z13 or later. - config CRYPTO_SHA512_S390 tristate "Hash functions: SHA-384 and SHA-512" depends on S390 diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile index a0cb96937c3d..14dafadbcbed 100644 --- a/arch/s390/crypto/Makefile +++ b/arch/s390/crypto/Makefile @@ -14,9 +14,7 @@ obj-$(CONFIG_CRYPTO_PAES_S390) += paes_s390.o obj-$(CONFIG_CRYPTO_CHACHA_S390) += chacha_s390.o obj-$(CONFIG_S390_PRNG) += prng.o obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o -obj-$(CONFIG_CRYPTO_CRC32_S390) += crc32-vx_s390.o obj-$(CONFIG_CRYPTO_HMAC_S390) += hmac_s390.o obj-y += arch_random.o -crc32-vx_s390-y := crc32-vx.o crc32le-vx.o crc32be-vx.o chacha_s390-y := chacha-glue.o chacha-s390.o diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c deleted file mode 100644 index 89a10337e6ea..000000000000 --- a/arch/s390/crypto/crc32-vx.c +++ /dev/null @@ -1,306 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Crypto-API module for CRC-32 algorithms implemented with the - * z/Architecture Vector Extension Facility. - * - * Copyright IBM Corp. 2015 - * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> - */ -#define KMSG_COMPONENT "crc32-vx" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - -#include <linux/module.h> -#include <linux/cpufeature.h> -#include <linux/crc32.h> -#include <crypto/internal/hash.h> -#include <asm/fpu.h> -#include "crc32-vx.h" - -#define CRC32_BLOCK_SIZE 1 -#define CRC32_DIGEST_SIZE 4 - -#define VX_MIN_LEN 64 -#define VX_ALIGNMENT 16L -#define VX_ALIGN_MASK (VX_ALIGNMENT - 1) - -struct crc_ctx { - u32 key; -}; - -struct crc_desc_ctx { - u32 crc; -}; - -/* - * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension - * - * Creates a function to perform a particular CRC-32 computation. Depending - * on the message buffer, the hardware-accelerated or software implementation - * is used. Note that the message buffer is aligned to improve fetch - * operations of VECTOR LOAD MULTIPLE instructions. - * - */ -#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw) \ - static u32 __pure ___fname(u32 crc, \ - unsigned char const *data, size_t datalen) \ - { \ - unsigned long prealign, aligned, remaining; \ - DECLARE_KERNEL_FPU_ONSTACK16(vxstate); \ - \ - if (datalen < VX_MIN_LEN + VX_ALIGN_MASK) \ - return ___crc32_sw(crc, data, datalen); \ - \ - if ((unsigned long)data & VX_ALIGN_MASK) { \ - prealign = VX_ALIGNMENT - \ - ((unsigned long)data & VX_ALIGN_MASK); \ - datalen -= prealign; \ - crc = ___crc32_sw(crc, data, prealign); \ - data = (void *)((unsigned long)data + prealign); \ - } \ - \ - aligned = datalen & ~VX_ALIGN_MASK; \ - remaining = datalen & VX_ALIGN_MASK; \ - \ - kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW); \ - crc = ___crc32_vx(crc, data, aligned); \ - kernel_fpu_end(&vxstate, KERNEL_VXR_LOW); \ - \ - if (remaining) \ - crc = ___crc32_sw(crc, data + aligned, remaining); \ - \ - return crc; \ - } - -DEFINE_CRC32_VX(crc32_le_vx, crc32_le_vgfm_16, crc32_le) -DEFINE_CRC32_VX(crc32_be_vx, crc32_be_vgfm_16, crc32_be) -DEFINE_CRC32_VX(crc32c_le_vx, crc32c_le_vgfm_16, __crc32c_le) - - -static int crc32_vx_cra_init_zero(struct crypto_tfm *tfm) -{ - struct crc_ctx *mctx = crypto_tfm_ctx(tfm); - - mctx->key = 0; - return 0; -} - -static int crc32_vx_cra_init_invert(struct crypto_tfm *tfm) -{ - struct crc_ctx *mctx = crypto_tfm_ctx(tfm); - - mctx->key = ~0; - return 0; -} - -static int crc32_vx_init(struct shash_desc *desc) -{ - struct crc_ctx *mctx = crypto_shash_ctx(desc->tfm); - struct crc_desc_ctx *ctx = shash_desc_ctx(desc); - - ctx->crc = mctx->key; - return 0; -} - -static int crc32_vx_setkey(struct crypto_shash *tfm, const u8 *newkey, - unsigned int newkeylen) -{ - struct crc_ctx *mctx = crypto_shash_ctx(tfm); - - if (newkeylen != sizeof(mctx->key)) - return -EINVAL; - mctx->key = le32_to_cpu(*(__le32 *)newkey); - return 0; -} - -static int crc32be_vx_setkey(struct crypto_shash *tfm, const u8 *newkey, - unsigned int newkeylen) -{ - struct crc_ctx *mctx = crypto_shash_ctx(tfm); - - if (newkeylen != sizeof(mctx->key)) - return -EINVAL; - mctx->key = be32_to_cpu(*(__be32 *)newkey); - return 0; -} - -static int crc32le_vx_final(struct shash_desc *desc, u8 *out) -{ - struct crc_desc_ctx *ctx = shash_desc_ctx(desc); - - *(__le32 *)out = cpu_to_le32p(&ctx->crc); - return 0; -} - -static int crc32be_vx_final(struct shash_desc *desc, u8 *out) -{ - struct crc_desc_ctx *ctx = shash_desc_ctx(desc); - - *(__be32 *)out = cpu_to_be32p(&ctx->crc); - return 0; -} - -static int crc32c_vx_final(struct shash_desc *desc, u8 *out) -{ - struct crc_desc_ctx *ctx = shash_desc_ctx(desc); - - /* - * Perform a final XOR with 0xFFFFFFFF to be in sync - * with the generic crc32c shash implementation. - */ - *(__le32 *)out = ~cpu_to_le32p(&ctx->crc); - return 0; -} - -static int __crc32le_vx_finup(u32 *crc, const u8 *data, unsigned int len, - u8 *out) -{ - *(__le32 *)out = cpu_to_le32(crc32_le_vx(*crc, data, len)); - return 0; -} - -static int __crc32be_vx_finup(u32 *crc, const u8 *data, unsigned int len, - u8 *out) -{ - *(__be32 *)out = cpu_to_be32(crc32_be_vx(*crc, data, len)); - return 0; -} - -static int __crc32c_vx_finup(u32 *crc, const u8 *data, unsigned int len, - u8 *out) -{ - /* - * Perform a final XOR with 0xFFFFFFFF to be in sync - * with the generic crc32c shash implementation. - */ - *(__le32 *)out = ~cpu_to_le32(crc32c_le_vx(*crc, data, len)); - return 0; -} - - -#define CRC32_VX_FINUP(alg, func) \ - static int alg ## _vx_finup(struct shash_desc *desc, const u8 *data, \ - unsigned int datalen, u8 *out) \ - { \ - return __ ## alg ## _vx_finup(shash_desc_ctx(desc), \ - data, datalen, out); \ - } - -CRC32_VX_FINUP(crc32le, crc32_le_vx) -CRC32_VX_FINUP(crc32be, crc32_be_vx) -CRC32_VX_FINUP(crc32c, crc32c_le_vx) - -#define CRC32_VX_DIGEST(alg, func) \ - static int alg ## _vx_digest(struct shash_desc *desc, const u8 *data, \ - unsigned int len, u8 *out) \ - { \ - return __ ## alg ## _vx_finup(crypto_shash_ctx(desc->tfm), \ - data, len, out); \ - } - -CRC32_VX_DIGEST(crc32le, crc32_le_vx) -CRC32_VX_DIGEST(crc32be, crc32_be_vx) -CRC32_VX_DIGEST(crc32c, crc32c_le_vx) - -#define CRC32_VX_UPDATE(alg, func) \ - static int alg ## _vx_update(struct shash_desc *desc, const u8 *data, \ - unsigned int datalen) \ - { \ - struct crc_desc_ctx *ctx = shash_desc_ctx(desc); \ - ctx->crc = func(ctx->crc, data, datalen); \ - return 0; \ - } - -CRC32_VX_UPDATE(crc32le, crc32_le_vx) -CRC32_VX_UPDATE(crc32be, crc32_be_vx) -CRC32_VX_UPDATE(crc32c, crc32c_le_vx) - - -static struct shash_alg crc32_vx_algs[] = { - /* CRC-32 LE */ - { - .init = crc32_vx_init, - .setkey = crc32_vx_setkey, - .update = crc32le_vx_update, - .final = crc32le_vx_final, - .finup = crc32le_vx_finup, - .digest = crc32le_vx_digest, - .descsize = sizeof(struct crc_desc_ctx), - .digestsize = CRC32_DIGEST_SIZE, - .base = { - .cra_name = "crc32", - .cra_driver_name = "crc32-vx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize = CRC32_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crc_ctx), - .cra_module = THIS_MODULE, - .cra_init = crc32_vx_cra_init_zero, - }, - }, - /* CRC-32 BE */ - { - .init = crc32_vx_init, - .setkey = crc32be_vx_setkey, - .update = crc32be_vx_update, - .final = crc32be_vx_final, - .finup = crc32be_vx_finup, - .digest = crc32be_vx_digest, - .descsize = sizeof(struct crc_desc_ctx), - .digestsize = CRC32_DIGEST_SIZE, - .base = { - .cra_name = "crc32be", - .cra_driver_name = "crc32be-vx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize = CRC32_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crc_ctx), - .cra_module = THIS_MODULE, - .cra_init = crc32_vx_cra_init_zero, - }, - }, - /* CRC-32C LE */ - { - .init = crc32_vx_init, - .setkey = crc32_vx_setkey, - .update = crc32c_vx_update, - .final = crc32c_vx_final, - .finup = crc32c_vx_finup, - .digest = crc32c_vx_digest, - .descsize = sizeof(struct crc_desc_ctx), - .digestsize = CRC32_DIGEST_SIZE, - .base = { - .cra_name = "crc32c", - .cra_driver_name = "crc32c-vx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize = CRC32_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crc_ctx), - .cra_module = THIS_MODULE, - .cra_init = crc32_vx_cra_init_invert, - }, - }, -}; - - -static int __init crc_vx_mod_init(void) -{ - return crypto_register_shashes(crc32_vx_algs, - ARRAY_SIZE(crc32_vx_algs)); -} - -static void __exit crc_vx_mod_exit(void) -{ - crypto_unregister_shashes(crc32_vx_algs, ARRAY_SIZE(crc32_vx_algs)); -} - -module_cpu_feature_match(S390_CPU_FEATURE_VXRS, crc_vx_mod_init); -module_exit(crc_vx_mod_exit); - -MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>"); -MODULE_DESCRIPTION("CRC-32 algorithms using z/Architecture Vector Extension Facility"); -MODULE_LICENSE("GPL"); - -MODULE_ALIAS_CRYPTO("crc32"); -MODULE_ALIAS_CRYPTO("crc32-vx"); -MODULE_ALIAS_CRYPTO("crc32c"); -MODULE_ALIAS_CRYPTO("crc32c-vx"); diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index f43f897d3fc0..14bbfe50033c 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -24,3 +24,6 @@ obj-$(CONFIG_S390_MODULES_SANITY_TEST_HELPERS) += test_modules_helpers.o lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o obj-$(CONFIG_EXPOLINE_EXTERN) += expoline.o + +obj-$(CONFIG_CRC32_ARCH) += crc32-s390.o +crc32-s390-y := crc32-glue.o crc32le-vx.o crc32be-vx.o diff --git a/arch/s390/lib/crc32-glue.c b/arch/s390/lib/crc32-glue.c new file mode 100644 index 000000000000..137080e61f90 --- /dev/null +++ b/arch/s390/lib/crc32-glue.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * CRC-32 implemented with the z/Architecture Vector Extension Facility. + * + * Copyright IBM Corp. 2015 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + */ +#define KMSG_COMPONENT "crc32-vx" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/module.h> +#include <linux/cpufeature.h> +#include <linux/crc32.h> +#include <asm/fpu.h> +#include "crc32-vx.h" + +#define VX_MIN_LEN 64 +#define VX_ALIGNMENT 16L +#define VX_ALIGN_MASK (VX_ALIGNMENT - 1) + +static DEFINE_STATIC_KEY_FALSE(have_vxrs); + +/* + * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension + * + * Creates a function to perform a particular CRC-32 computation. Depending + * on the message buffer, the hardware-accelerated or software implementation + * is used. Note that the message buffer is aligned to improve fetch + * operations of VECTOR LOAD MULTIPLE instructions. + */ +#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw) \ + u32 ___fname(u32 crc, const u8 *data, size_t datalen) \ + { \ + unsigned long prealign, aligned, remaining; \ + DECLARE_KERNEL_FPU_ONSTACK16(vxstate); \ + \ + if (datalen < VX_MIN_LEN + VX_ALIGN_MASK || \ + !static_branch_likely(&have_vxrs)) \ + return ___crc32_sw(crc, data, datalen); \ + \ + if ((unsigned long)data & VX_ALIGN_MASK) { \ + prealign = VX_ALIGNMENT - \ + ((unsigned long)data & VX_ALIGN_MASK); \ + datalen -= prealign; \ + crc = ___crc32_sw(crc, data, prealign); \ + data = (void *)((unsigned long)data + prealign); \ + } \ + \ + aligned = datalen & ~VX_ALIGN_MASK; \ + remaining = datalen & VX_ALIGN_MASK; \ + \ + kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW); \ + crc = ___crc32_vx(crc, data, aligned); \ + kernel_fpu_end(&vxstate, KERNEL_VXR_LOW); \ + \ + if (remaining) \ + crc = ___crc32_sw(crc, data + aligned, remaining); \ + \ + return crc; \ + } \ + EXPORT_SYMBOL(___fname); + +DEFINE_CRC32_VX(crc32_le_arch, crc32_le_vgfm_16, crc32_le_base) +DEFINE_CRC32_VX(crc32_be_arch, crc32_be_vgfm_16, crc32_be_base) +DEFINE_CRC32_VX(crc32c_le_arch, crc32c_le_vgfm_16, crc32c_le_base) + +static int __init crc32_s390_init(void) +{ + if (cpu_have_feature(S390_CPU_FEATURE_VXRS)) + static_branch_enable(&have_vxrs); + return 0; +} +arch_initcall(crc32_s390_init); + +static void __exit crc32_s390_exit(void) +{ +} +module_exit(crc32_s390_exit); + +u32 crc32_optimizations(void) +{ + if (static_key_enabled(&have_vxrs)) + return CRC32_LE_OPTIMIZATION | + CRC32_BE_OPTIMIZATION | + CRC32C_OPTIMIZATION; + return 0; +} +EXPORT_SYMBOL(crc32_optimizations); + +MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>"); +MODULE_DESCRIPTION("CRC-32 algorithms using z/Architecture Vector Extension Facility"); +MODULE_LICENSE("GPL"); diff --git a/arch/s390/crypto/crc32-vx.h b/arch/s390/lib/crc32-vx.h index 652c96e1a822..652c96e1a822 100644 --- a/arch/s390/crypto/crc32-vx.h +++ b/arch/s390/lib/crc32-vx.h diff --git a/arch/s390/crypto/crc32be-vx.c b/arch/s390/lib/crc32be-vx.c index fed7c9c70d05..fed7c9c70d05 100644 --- a/arch/s390/crypto/crc32be-vx.c +++ b/arch/s390/lib/crc32be-vx.c diff --git a/arch/s390/crypto/crc32le-vx.c b/arch/s390/lib/crc32le-vx.c index 2f629f394df7..2f629f394df7 100644 --- a/arch/s390/crypto/crc32le-vx.c +++ b/arch/s390/lib/crc32le-vx.c diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index dcfdb7f1dae9..0f88123925a4 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -110,6 +110,7 @@ config SPARC64 select HAVE_SETUP_PER_CPU_AREA select NEED_PER_CPU_EMBED_FIRST_CHUNK select NEED_PER_CPU_PAGE_FIRST_CHUNK + select ARCH_HAS_CRC32 config ARCH_PROC_KCORE_TEXT def_bool y diff --git a/arch/sparc/crypto/Kconfig b/arch/sparc/crypto/Kconfig index cfe5102b1c68..e858597de89d 100644 --- a/arch/sparc/crypto/Kconfig +++ b/arch/sparc/crypto/Kconfig @@ -16,16 +16,6 @@ config CRYPTO_DES_SPARC64 Architecture: sparc64 -config CRYPTO_CRC32C_SPARC64 - tristate "CRC32c" - depends on SPARC64 - select CRYPTO_HASH - select CRC32 - help - CRC32c CRC algorithm with the iSCSI polynomial (RFC 3385 and RFC 3720) - - Architecture: sparc64 - config CRYPTO_MD5_SPARC64 tristate "Digests: MD5" depends on SPARC64 diff --git a/arch/sparc/crypto/Makefile b/arch/sparc/crypto/Makefile index d257186c27d1..a2d7fca40cb4 100644 --- a/arch/sparc/crypto/Makefile +++ b/arch/sparc/crypto/Makefile @@ -12,8 +12,6 @@ obj-$(CONFIG_CRYPTO_AES_SPARC64) += aes-sparc64.o obj-$(CONFIG_CRYPTO_DES_SPARC64) += des-sparc64.o obj-$(CONFIG_CRYPTO_CAMELLIA_SPARC64) += camellia-sparc64.o -obj-$(CONFIG_CRYPTO_CRC32C_SPARC64) += crc32c-sparc64.o - sha1-sparc64-y := sha1_asm.o sha1_glue.o sha256-sparc64-y := sha256_asm.o sha256_glue.o sha512-sparc64-y := sha512_asm.o sha512_glue.o @@ -22,5 +20,3 @@ md5-sparc64-y := md5_asm.o md5_glue.o aes-sparc64-y := aes_asm.o aes_glue.o des-sparc64-y := des_asm.o des_glue.o camellia-sparc64-y := camellia_asm.o camellia_glue.o - -crc32c-sparc64-y := crc32c_asm.o crc32c_glue.o diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c deleted file mode 100644 index 913b9a09e885..000000000000 --- a/arch/sparc/crypto/crc32c_glue.c +++ /dev/null @@ -1,184 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* Glue code for CRC32C optimized for sparc64 crypto opcodes. - * - * This is based largely upon arch/x86/crypto/crc32c-intel.c - * - * Copyright (C) 2008 Intel Corporation - * Authors: Austin Zhang <austin_zhang@linux.intel.com> - * Kent Liu <kent.liu@intel.com> - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/init.h> -#include <linux/module.h> -#include <linux/string.h> -#include <linux/kernel.h> -#include <linux/crc32.h> - -#include <crypto/internal/hash.h> - -#include <asm/pstate.h> -#include <asm/elf.h> -#include <linux/unaligned.h> - -#include "opcodes.h" - -/* - * Setting the seed allows arbitrary accumulators and flexible XOR policy - * If your algorithm starts with ~0, then XOR with ~0 before you set - * the seed. - */ -static int crc32c_sparc64_setkey(struct crypto_shash *hash, const u8 *key, - unsigned int keylen) -{ - u32 *mctx = crypto_shash_ctx(hash); - - if (keylen != sizeof(u32)) - return -EINVAL; - *mctx = get_unaligned_le32(key); - return 0; -} - -static int crc32c_sparc64_init(struct shash_desc *desc) -{ - u32 *mctx = crypto_shash_ctx(desc->tfm); - u32 *crcp = shash_desc_ctx(desc); - - *crcp = *mctx; - - return 0; -} - -extern void crc32c_sparc64(u32 *crcp, const u64 *data, unsigned int len); - -static u32 crc32c_compute(u32 crc, const u8 *data, unsigned int len) -{ - unsigned int n = -(uintptr_t)data & 7; - - if (n) { - /* Data isn't 8-byte aligned. Align it. */ - n = min(n, len); - crc = __crc32c_le(crc, data, n); - data += n; - len -= n; - } - n = len & ~7U; - if (n) { - crc32c_sparc64(&crc, (const u64 *)data, n); - data += n; - len -= n; - } - if (len) - crc = __crc32c_le(crc, data, len); - return crc; -} - -static int crc32c_sparc64_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - u32 *crcp = shash_desc_ctx(desc); - - *crcp = crc32c_compute(*crcp, data, len); - return 0; -} - -static int __crc32c_sparc64_finup(const u32 *crcp, const u8 *data, - unsigned int len, u8 *out) -{ - put_unaligned_le32(~crc32c_compute(*crcp, data, len), out); - return 0; -} - -static int crc32c_sparc64_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32c_sparc64_finup(shash_desc_ctx(desc), data, len, out); -} - -static int crc32c_sparc64_final(struct shash_desc *desc, u8 *out) -{ - u32 *crcp = shash_desc_ctx(desc); - - put_unaligned_le32(~*crcp, out); - return 0; -} - -static int crc32c_sparc64_digest(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32c_sparc64_finup(crypto_shash_ctx(desc->tfm), data, len, - out); -} - -static int crc32c_sparc64_cra_init(struct crypto_tfm *tfm) -{ - u32 *key = crypto_tfm_ctx(tfm); - - *key = ~0; - - return 0; -} - -#define CHKSUM_BLOCK_SIZE 1 -#define CHKSUM_DIGEST_SIZE 4 - -static struct shash_alg alg = { - .setkey = crc32c_sparc64_setkey, - .init = crc32c_sparc64_init, - .update = crc32c_sparc64_update, - .final = crc32c_sparc64_final, - .finup = crc32c_sparc64_finup, - .digest = crc32c_sparc64_digest, - .descsize = sizeof(u32), - .digestsize = CHKSUM_DIGEST_SIZE, - .base = { - .cra_name = "crc32c", - .cra_driver_name = "crc32c-sparc64", - .cra_priority = SPARC_CR_OPCODE_PRIORITY, - .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize = CHKSUM_BLOCK_SIZE, - .cra_ctxsize = sizeof(u32), - .cra_module = THIS_MODULE, - .cra_init = crc32c_sparc64_cra_init, - } -}; - -static bool __init sparc64_has_crc32c_opcode(void) -{ - unsigned long cfr; - - if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) - return false; - - __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); - if (!(cfr & CFR_CRC32C)) - return false; - - return true; -} - -static int __init crc32c_sparc64_mod_init(void) -{ - if (sparc64_has_crc32c_opcode()) { - pr_info("Using sparc64 crc32c opcode optimized CRC32C implementation\n"); - return crypto_register_shash(&alg); - } - pr_info("sparc64 crc32c opcode not available.\n"); - return -ENODEV; -} - -static void __exit crc32c_sparc64_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -module_init(crc32c_sparc64_mod_init); -module_exit(crc32c_sparc64_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("CRC32c (Castagnoli), sparc64 crc32c opcode accelerated"); - -MODULE_ALIAS_CRYPTO("crc32c"); - -#include "crop_devid.c" diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index ee5091dd67ed..5724d0f356eb 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -53,3 +53,5 @@ lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o obj-$(CONFIG_SPARC64) += iomap.o obj-$(CONFIG_SPARC32) += atomic32.o obj-$(CONFIG_SPARC64) += PeeCeeI.o +obj-$(CONFIG_CRC32_ARCH) += crc32-sparc.o +crc32-sparc-y := crc32_glue.o crc32c_asm.o diff --git a/arch/sparc/lib/crc32_glue.c b/arch/sparc/lib/crc32_glue.c new file mode 100644 index 000000000000..41076d2b1fd2 --- /dev/null +++ b/arch/sparc/lib/crc32_glue.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Glue code for CRC32C optimized for sparc64 crypto opcodes. + * + * This is based largely upon arch/x86/crypto/crc32c-intel.c + * + * Copyright (C) 2008 Intel Corporation + * Authors: Austin Zhang <austin_zhang@linux.intel.com> + * Kent Liu <kent.liu@intel.com> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/crc32.h> +#include <asm/pstate.h> +#include <asm/elf.h> + +static DEFINE_STATIC_KEY_FALSE(have_crc32c_opcode); + +u32 crc32_le_arch(u32 crc, const u8 *data, size_t len) +{ + return crc32_le_base(crc, data, len); +} +EXPORT_SYMBOL(crc32_le_arch); + +void crc32c_sparc64(u32 *crcp, const u64 *data, size_t len); + +u32 crc32c_le_arch(u32 crc, const u8 *data, size_t len) +{ + size_t n = -(uintptr_t)data & 7; + + if (!static_branch_likely(&have_crc32c_opcode)) + return crc32c_le_base(crc, data, len); + + if (n) { + /* Data isn't 8-byte aligned. Align it. */ + n = min(n, len); + crc = crc32c_le_base(crc, data, n); + data += n; + len -= n; + } + n = len & ~7U; + if (n) { + crc32c_sparc64(&crc, (const u64 *)data, n); + data += n; + len -= n; + } + if (len) + crc = crc32c_le_base(crc, data, len); + return crc; +} +EXPORT_SYMBOL(crc32c_le_arch); + +u32 crc32_be_arch(u32 crc, const u8 *data, size_t len) +{ + return crc32_be_base(crc, data, len); +} +EXPORT_SYMBOL(crc32_be_arch); + +static int __init crc32_sparc_init(void) +{ + unsigned long cfr; + + if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) + return 0; + + __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); + if (!(cfr & CFR_CRC32C)) + return 0; + + static_branch_enable(&have_crc32c_opcode); + pr_info("Using sparc64 crc32c opcode optimized CRC32C implementation\n"); + return 0; +} +arch_initcall(crc32_sparc_init); + +static void __exit crc32_sparc_exit(void) +{ +} +module_exit(crc32_sparc_exit); + +u32 crc32_optimizations(void) +{ + if (static_key_enabled(&have_crc32c_opcode)) + return CRC32C_OPTIMIZATION; + return 0; +} +EXPORT_SYMBOL(crc32_optimizations); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("CRC32c (Castagnoli), sparc64 crc32c opcode accelerated"); diff --git a/arch/sparc/crypto/crc32c_asm.S b/arch/sparc/lib/crc32c_asm.S index b8659a479242..ee454fa6aed6 100644 --- a/arch/sparc/crypto/crc32c_asm.S +++ b/arch/sparc/lib/crc32c_asm.S @@ -3,7 +3,7 @@ #include <asm/visasm.h> #include <asm/asi.h> -#include "opcodes.h" +#include "../crypto/opcodes.h" ENTRY(crc32c_sparc64) /* %o0=crc32p, %o1=data_ptr, %o2=len */ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 27c21c9b6a70..9bf9620d8f22 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -76,6 +76,8 @@ config X86 select ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_CPU_PASID if IOMMU_SVA + select ARCH_HAS_CRC32 + select ARCH_HAS_CRC_T10DIF if X86_64 select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig index 3d2e38ba5240..4757bf922075 100644 --- a/arch/x86/crypto/Kconfig +++ b/arch/x86/crypto/Kconfig @@ -492,36 +492,4 @@ config CRYPTO_GHASH_CLMUL_NI_INTEL Architecture: x86_64 using: - CLMUL-NI (carry-less multiplication new instructions) -config CRYPTO_CRC32C_INTEL - tristate "CRC32c (SSE4.2/PCLMULQDQ)" - depends on X86 - select CRYPTO_HASH - help - CRC32c CRC algorithm with the iSCSI polynomial (RFC 3385 and RFC 3720) - - Architecture: x86 (32-bit and 64-bit) using: - - SSE4.2 (Streaming SIMD Extensions 4.2) CRC32 instruction - - PCLMULQDQ (carry-less multiplication) - -config CRYPTO_CRC32_PCLMUL - tristate "CRC32 (PCLMULQDQ)" - depends on X86 - select CRYPTO_HASH - select CRC32 - help - CRC32 CRC algorithm (IEEE 802.3) - - Architecture: x86 (32-bit and 64-bit) using: - - PCLMULQDQ (carry-less multiplication) - -config CRYPTO_CRCT10DIF_PCLMUL - tristate "CRCT10DIF (PCLMULQDQ)" - depends on X86 && 64BIT && CRC_T10DIF - select CRYPTO_HASH - help - CRC16 CRC algorithm used for the T10 (SCSI) Data Integrity Field (DIF) - - Architecture: x86_64 using: - - PCLMULQDQ (carry-less multiplication) - endmenu diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 53b4a277809e..07b00bfca64b 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -75,16 +75,6 @@ ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o obj-$(CONFIG_CRYPTO_POLYVAL_CLMUL_NI) += polyval-clmulni.o polyval-clmulni-y := polyval-clmulni_asm.o polyval-clmulni_glue.o -obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o -crc32c-intel-y := crc32c-intel_glue.o -crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o - -obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o -crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o - -obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o -crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o - obj-$(CONFIG_CRYPTO_POLY1305_X86_64) += poly1305-x86_64.o poly1305-x86_64-y := poly1305-x86_64-cryptogams.o poly1305_glue.o targets += poly1305-x86_64-cryptogams.S diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c deleted file mode 100644 index 9f5e342b9845..000000000000 --- a/arch/x86/crypto/crc32-pclmul_glue.c +++ /dev/null @@ -1,202 +0,0 @@ -/* GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see http://www.gnu.org/licenses - * - * Please visit http://www.xyratex.com/contact if you need additional - * information or have any questions. - * - * GPL HEADER END - */ - -/* - * Copyright 2012 Xyratex Technology Limited - * - * Wrappers for kernel crypto shash api to pclmulqdq crc32 implementation. - */ -#include <linux/init.h> -#include <linux/module.h> -#include <linux/string.h> -#include <linux/kernel.h> -#include <linux/crc32.h> -#include <crypto/internal/hash.h> -#include <crypto/internal/simd.h> - -#include <asm/cpufeatures.h> -#include <asm/cpu_device_id.h> -#include <asm/simd.h> - -#define CHKSUM_BLOCK_SIZE 1 -#define CHKSUM_DIGEST_SIZE 4 - -#define PCLMUL_MIN_LEN 64L /* minimum size of buffer - * for crc32_pclmul_le_16 */ -#define SCALE_F 16L /* size of xmm register */ -#define SCALE_F_MASK (SCALE_F - 1) - -u32 crc32_pclmul_le_16(unsigned char const *buffer, size_t len, u32 crc32); - -static u32 __attribute__((pure)) - crc32_pclmul_le(u32 crc, unsigned char const *p, size_t len) -{ - unsigned int iquotient; - unsigned int iremainder; - unsigned int prealign; - - if (len < PCLMUL_MIN_LEN + SCALE_F_MASK || !crypto_simd_usable()) - return crc32_le(crc, p, len); - - if ((long)p & SCALE_F_MASK) { - /* align p to 16 byte */ - prealign = SCALE_F - ((long)p & SCALE_F_MASK); - - crc = crc32_le(crc, p, prealign); - len -= prealign; - p = (unsigned char *)(((unsigned long)p + SCALE_F_MASK) & - ~SCALE_F_MASK); - } - iquotient = len & (~SCALE_F_MASK); - iremainder = len & SCALE_F_MASK; - - kernel_fpu_begin(); - crc = crc32_pclmul_le_16(p, iquotient, crc); - kernel_fpu_end(); - - if (iremainder) - crc = crc32_le(crc, p + iquotient, iremainder); - - return crc; -} - -static int crc32_pclmul_cra_init(struct crypto_tfm *tfm) -{ - u32 *key = crypto_tfm_ctx(tfm); - - *key = 0; - - return 0; -} - -static int crc32_pclmul_setkey(struct crypto_shash *hash, const u8 *key, - unsigned int keylen) -{ - u32 *mctx = crypto_shash_ctx(hash); - - if (keylen != sizeof(u32)) - return -EINVAL; - *mctx = le32_to_cpup((__le32 *)key); - return 0; -} - -static int crc32_pclmul_init(struct shash_desc *desc) -{ - u32 *mctx = crypto_shash_ctx(desc->tfm); - u32 *crcp = shash_desc_ctx(desc); - - *crcp = *mctx; - - return 0; -} - -static int crc32_pclmul_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - u32 *crcp = shash_desc_ctx(desc); - - *crcp = crc32_pclmul_le(*crcp, data, len); - return 0; -} - -/* No final XOR 0xFFFFFFFF, like crc32_le */ -static int __crc32_pclmul_finup(u32 *crcp, const u8 *data, unsigned int len, - u8 *out) -{ - *(__le32 *)out = cpu_to_le32(crc32_pclmul_le(*crcp, data, len)); - return 0; -} - -static int crc32_pclmul_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32_pclmul_finup(shash_desc_ctx(desc), data, len, out); -} - -static int crc32_pclmul_final(struct shash_desc *desc, u8 *out) -{ - u32 *crcp = shash_desc_ctx(desc); - - *(__le32 *)out = cpu_to_le32p(crcp); - return 0; -} - -static int crc32_pclmul_digest(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32_pclmul_finup(crypto_shash_ctx(desc->tfm), data, len, - out); -} - -static struct shash_alg alg = { - .setkey = crc32_pclmul_setkey, - .init = crc32_pclmul_init, - .update = crc32_pclmul_update, - .final = crc32_pclmul_final, - .finup = crc32_pclmul_finup, - .digest = crc32_pclmul_digest, - .descsize = sizeof(u32), - .digestsize = CHKSUM_DIGEST_SIZE, - .base = { - .cra_name = "crc32", - .cra_driver_name = "crc32-pclmul", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize = CHKSUM_BLOCK_SIZE, - .cra_ctxsize = sizeof(u32), - .cra_module = THIS_MODULE, - .cra_init = crc32_pclmul_cra_init, - } -}; - -static const struct x86_cpu_id crc32pclmul_cpu_id[] = { - X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL), - {} -}; -MODULE_DEVICE_TABLE(x86cpu, crc32pclmul_cpu_id); - - -static int __init crc32_pclmul_mod_init(void) -{ - - if (!x86_match_cpu(crc32pclmul_cpu_id)) { - pr_info("PCLMULQDQ-NI instructions are not detected.\n"); - return -ENODEV; - } - return crypto_register_shash(&alg); -} - -static void __exit crc32_pclmul_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -module_init(crc32_pclmul_mod_init); -module_exit(crc32_pclmul_mod_fini); - -MODULE_AUTHOR("Alexander Boyko <alexander_boyko@xyratex.com>"); -MODULE_DESCRIPTION("CRC32 algorithm (IEEE 802.3) accelerated with PCLMULQDQ"); -MODULE_LICENSE("GPL"); - -MODULE_ALIAS_CRYPTO("crc32"); -MODULE_ALIAS_CRYPTO("crc32-pclmul"); diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c deleted file mode 100644 index 52c5d47ef5a1..000000000000 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ /dev/null @@ -1,250 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal. - * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE) - * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at: - * http://www.intel.com/products/processor/manuals/ - * Intel(R) 64 and IA-32 Architectures Software Developer's Manual - * Volume 2A: Instruction Set Reference, A-M - * - * Copyright (C) 2008 Intel Corporation - * Authors: Austin Zhang <austin_zhang@linux.intel.com> - * Kent Liu <kent.liu@intel.com> - */ -#include <linux/init.h> -#include <linux/module.h> -#include <linux/string.h> -#include <linux/kernel.h> -#include <crypto/internal/hash.h> -#include <crypto/internal/simd.h> - -#include <asm/cpufeatures.h> -#include <asm/cpu_device_id.h> -#include <asm/simd.h> - -#define CHKSUM_BLOCK_SIZE 1 -#define CHKSUM_DIGEST_SIZE 4 - -#define SCALE_F sizeof(unsigned long) - -#ifdef CONFIG_X86_64 -#define CRC32_INST "crc32q %1, %q0" -#else -#define CRC32_INST "crc32l %1, %0" -#endif - -#ifdef CONFIG_X86_64 -/* - * use carryless multiply version of crc32c when buffer - * size is >= 512 to account - * for fpu state save/restore overhead. - */ -#define CRC32C_PCL_BREAKEVEN 512 - -asmlinkage unsigned int crc_pcl(const u8 *buffer, unsigned int len, - unsigned int crc_init); -#endif /* CONFIG_X86_64 */ - -static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length) -{ - while (length--) { - asm("crc32b %1, %0" - : "+r" (crc) : "rm" (*data)); - data++; - } - - return crc; -} - -static u32 __pure crc32c_intel_le_hw(u32 crc, unsigned char const *p, size_t len) -{ - unsigned int iquotient = len / SCALE_F; - unsigned int iremainder = len % SCALE_F; - unsigned long *ptmp = (unsigned long *)p; - - while (iquotient--) { - asm(CRC32_INST - : "+r" (crc) : "rm" (*ptmp)); - ptmp++; - } - - if (iremainder) - crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp, - iremainder); - - return crc; -} - -/* - * Setting the seed allows arbitrary accumulators and flexible XOR policy - * If your algorithm starts with ~0, then XOR with ~0 before you set - * the seed. - */ -static int crc32c_intel_setkey(struct crypto_shash *hash, const u8 *key, - unsigned int keylen) -{ - u32 *mctx = crypto_shash_ctx(hash); - - if (keylen != sizeof(u32)) - return -EINVAL; - *mctx = le32_to_cpup((__le32 *)key); - return 0; -} - -static int crc32c_intel_init(struct shash_desc *desc) -{ - u32 *mctx = crypto_shash_ctx(desc->tfm); - u32 *crcp = shash_desc_ctx(desc); - - *crcp = *mctx; - - return 0; -} - -static int crc32c_intel_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - u32 *crcp = shash_desc_ctx(desc); - - *crcp = crc32c_intel_le_hw(*crcp, data, len); - return 0; -} - -static int __crc32c_intel_finup(u32 *crcp, const u8 *data, unsigned int len, - u8 *out) -{ - *(__le32 *)out = ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len)); - return 0; -} - -static int crc32c_intel_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32c_intel_finup(shash_desc_ctx(desc), data, len, out); -} - -static int crc32c_intel_final(struct shash_desc *desc, u8 *out) -{ - u32 *crcp = shash_desc_ctx(desc); - - *(__le32 *)out = ~cpu_to_le32p(crcp); - return 0; -} - -static int crc32c_intel_digest(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32c_intel_finup(crypto_shash_ctx(desc->tfm), data, len, - out); -} - -static int crc32c_intel_cra_init(struct crypto_tfm *tfm) -{ - u32 *key = crypto_tfm_ctx(tfm); - - *key = ~0; - - return 0; -} - -#ifdef CONFIG_X86_64 -static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - u32 *crcp = shash_desc_ctx(desc); - - /* - * use faster PCL version if datasize is large enough to - * overcome kernel fpu state save/restore overhead - */ - if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) { - kernel_fpu_begin(); - *crcp = crc_pcl(data, len, *crcp); - kernel_fpu_end(); - } else - *crcp = crc32c_intel_le_hw(*crcp, data, len); - return 0; -} - -static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len, - u8 *out) -{ - if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) { - kernel_fpu_begin(); - *(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp)); - kernel_fpu_end(); - } else - *(__le32 *)out = - ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len)); - return 0; -} - -static int crc32c_pcl_intel_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32c_pcl_intel_finup(shash_desc_ctx(desc), data, len, out); -} - -static int crc32c_pcl_intel_digest(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32c_pcl_intel_finup(crypto_shash_ctx(desc->tfm), data, len, - out); -} -#endif /* CONFIG_X86_64 */ - -static struct shash_alg alg = { - .setkey = crc32c_intel_setkey, - .init = crc32c_intel_init, - .update = crc32c_intel_update, - .final = crc32c_intel_final, - .finup = crc32c_intel_finup, - .digest = crc32c_intel_digest, - .descsize = sizeof(u32), - .digestsize = CHKSUM_DIGEST_SIZE, - .base = { - .cra_name = "crc32c", - .cra_driver_name = "crc32c-intel", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize = CHKSUM_BLOCK_SIZE, - .cra_ctxsize = sizeof(u32), - .cra_module = THIS_MODULE, - .cra_init = crc32c_intel_cra_init, - } -}; - -static const struct x86_cpu_id crc32c_cpu_id[] = { - X86_MATCH_FEATURE(X86_FEATURE_XMM4_2, NULL), - {} -}; -MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id); - -static int __init crc32c_intel_mod_init(void) -{ - if (!x86_match_cpu(crc32c_cpu_id)) - return -ENODEV; -#ifdef CONFIG_X86_64 - if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) { - alg.update = crc32c_pcl_intel_update; - alg.finup = crc32c_pcl_intel_finup; - alg.digest = crc32c_pcl_intel_digest; - } -#endif - return crypto_register_shash(&alg); -} - -static void __exit crc32c_intel_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -module_init(crc32c_intel_mod_init); -module_exit(crc32c_intel_mod_fini); - -MODULE_AUTHOR("Austin Zhang <austin.zhang@intel.com>, Kent Liu <kent.liu@intel.com>"); -MODULE_DESCRIPTION("CRC32c (Castagnoli) optimization using Intel Hardware."); -MODULE_LICENSE("GPL"); - -MODULE_ALIAS_CRYPTO("crc32c"); -MODULE_ALIAS_CRYPTO("crc32c-intel"); diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c deleted file mode 100644 index 71291d5af9f4..000000000000 --- a/arch/x86/crypto/crct10dif-pclmul_glue.c +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Cryptographic API. - * - * T10 Data Integrity Field CRC16 Crypto Transform using PCLMULQDQ Instructions - * - * Copyright (C) 2013 Intel Corporation - * Author: Tim Chen <tim.c.chen@linux.intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include <linux/types.h> -#include <linux/module.h> -#include <linux/crc-t10dif.h> -#include <crypto/internal/hash.h> -#include <crypto/internal/simd.h> -#include <linux/init.h> -#include <linux/string.h> -#include <linux/kernel.h> -#include <asm/cpufeatures.h> -#include <asm/cpu_device_id.h> -#include <asm/simd.h> - -asmlinkage u16 crc_t10dif_pcl(u16 init_crc, const u8 *buf, size_t len); - -struct chksum_desc_ctx { - __u16 crc; -}; - -static int chksum_init(struct shash_desc *desc) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - ctx->crc = 0; - - return 0; -} - -static int chksum_update(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - if (length >= 16 && crypto_simd_usable()) { - kernel_fpu_begin(); - ctx->crc = crc_t10dif_pcl(ctx->crc, data, length); - kernel_fpu_end(); - } else - ctx->crc = crc_t10dif_generic(ctx->crc, data, length); - return 0; -} - -static int chksum_final(struct shash_desc *desc, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - *(__u16 *)out = ctx->crc; - return 0; -} - -static int __chksum_finup(__u16 crc, const u8 *data, unsigned int len, u8 *out) -{ - if (len >= 16 && crypto_simd_usable()) { - kernel_fpu_begin(); - *(__u16 *)out = crc_t10dif_pcl(crc, data, len); - kernel_fpu_end(); - } else - *(__u16 *)out = crc_t10dif_generic(crc, data, len); - return 0; -} - -static int chksum_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - return __chksum_finup(ctx->crc, data, len, out); -} - -static int chksum_digest(struct shash_desc *desc, const u8 *data, - unsigned int length, u8 *out) -{ - return __chksum_finup(0, data, length, out); -} - -static struct shash_alg alg = { - .digestsize = CRC_T10DIF_DIGEST_SIZE, - .init = chksum_init, - .update = chksum_update, - .final = chksum_final, - .finup = chksum_finup, - .digest = chksum_digest, - .descsize = sizeof(struct chksum_desc_ctx), - .base = { - .cra_name = "crct10dif", - .cra_driver_name = "crct10dif-pclmul", - .cra_priority = 200, - .cra_blocksize = CRC_T10DIF_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static const struct x86_cpu_id crct10dif_cpu_id[] = { - X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL), - {} -}; -MODULE_DEVICE_TABLE(x86cpu, crct10dif_cpu_id); - -static int __init crct10dif_intel_mod_init(void) -{ - if (!x86_match_cpu(crct10dif_cpu_id)) - return -ENODEV; - - return crypto_register_shash(&alg); -} - -static void __exit crct10dif_intel_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -module_init(crct10dif_intel_mod_init); -module_exit(crct10dif_intel_mod_fini); - -MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>"); -MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with PCLMULQDQ."); -MODULE_LICENSE("GPL"); - -MODULE_ALIAS_CRYPTO("crct10dif"); -MODULE_ALIAS_CRYPTO("crct10dif-pclmul"); diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 98583a9dbab3..8a59c61624c2 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -38,6 +38,13 @@ lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o lib-$(CONFIG_MITIGATION_RETPOLINE) += retpoline.o +obj-$(CONFIG_CRC32_ARCH) += crc32-x86.o +crc32-x86-y := crc32-glue.o crc32-pclmul.o +crc32-x86-$(CONFIG_64BIT) += crc32c-3way.o + +obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-x86.o +crc-t10dif-x86-y := crc-t10dif-glue.o crct10dif-pcl-asm_64.o + obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o obj-y += iomem.o diff --git a/arch/x86/lib/crc-t10dif-glue.c b/arch/x86/lib/crc-t10dif-glue.c new file mode 100644 index 000000000000..13f07ddc9122 --- /dev/null +++ b/arch/x86/lib/crc-t10dif-glue.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * CRC-T10DIF using PCLMULQDQ instructions + * + * Copyright 2024 Google LLC + */ + +#include <asm/cpufeatures.h> +#include <asm/simd.h> +#include <crypto/internal/simd.h> +#include <linux/crc-t10dif.h> +#include <linux/module.h> + +static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq); + +asmlinkage u16 crc_t10dif_pcl(u16 init_crc, const u8 *buf, size_t len); + +u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len) +{ + if (len >= 16 && + static_key_enabled(&have_pclmulqdq) && crypto_simd_usable()) { + kernel_fpu_begin(); + crc = crc_t10dif_pcl(crc, p, len); + kernel_fpu_end(); + return crc; + } + return crc_t10dif_generic(crc, p, len); +} +EXPORT_SYMBOL(crc_t10dif_arch); + +static int __init crc_t10dif_x86_init(void) +{ + if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) + static_branch_enable(&have_pclmulqdq); + return 0; +} +arch_initcall(crc_t10dif_x86_init); + +static void __exit crc_t10dif_x86_exit(void) +{ +} +module_exit(crc_t10dif_x86_exit); + +bool crc_t10dif_is_optimized(void) +{ + return static_key_enabled(&have_pclmulqdq); +} +EXPORT_SYMBOL(crc_t10dif_is_optimized); + +MODULE_DESCRIPTION("CRC-T10DIF using PCLMULQDQ instructions"); +MODULE_LICENSE("GPL"); diff --git a/arch/x86/lib/crc32-glue.c b/arch/x86/lib/crc32-glue.c new file mode 100644 index 000000000000..2dd18a886ded --- /dev/null +++ b/arch/x86/lib/crc32-glue.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * x86-optimized CRC32 functions + * + * Copyright (C) 2008 Intel Corporation + * Copyright 2012 Xyratex Technology Limited + * Copyright 2024 Google LLC + */ + +#include <asm/cpufeatures.h> +#include <asm/simd.h> +#include <crypto/internal/simd.h> +#include <linux/crc32.h> +#include <linux/linkage.h> +#include <linux/module.h> + +/* minimum size of buffer for crc32_pclmul_le_16 */ +#define CRC32_PCLMUL_MIN_LEN 64 + +static DEFINE_STATIC_KEY_FALSE(have_crc32); +static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq); + +u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len); + +u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) +{ + if (len >= CRC32_PCLMUL_MIN_LEN + 15 && + static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) { + size_t n = -(uintptr_t)p & 15; + + /* align p to 16-byte boundary */ + if (n) { + crc = crc32_le_base(crc, p, n); + p += n; + len -= n; + } + n = round_down(len, 16); + kernel_fpu_begin(); + crc = crc32_pclmul_le_16(crc, p, n); + kernel_fpu_end(); + p += n; + len -= n; + } + if (len) + crc = crc32_le_base(crc, p, len); + return crc; +} +EXPORT_SYMBOL(crc32_le_arch); + +#ifdef CONFIG_X86_64 +#define CRC32_INST "crc32q %1, %q0" +#else +#define CRC32_INST "crc32l %1, %0" +#endif + +/* + * Use carryless multiply version of crc32c when buffer size is >= 512 to + * account for FPU state save/restore overhead. + */ +#define CRC32C_PCLMUL_BREAKEVEN 512 + +asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len); + +u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) +{ + size_t num_longs; + + if (!static_branch_likely(&have_crc32)) + return crc32c_le_base(crc, p, len); + + if (IS_ENABLED(CONFIG_X86_64) && len >= CRC32C_PCLMUL_BREAKEVEN && + static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) { + kernel_fpu_begin(); + crc = crc32c_x86_3way(crc, p, len); + kernel_fpu_end(); + return crc; + } + + for (num_longs = len / sizeof(unsigned long); + num_longs != 0; num_longs--, p += sizeof(unsigned long)) + asm(CRC32_INST : "+r" (crc) : "rm" (*(unsigned long *)p)); + + for (len %= sizeof(unsigned long); len; len--, p++) + asm("crc32b %1, %0" : "+r" (crc) : "rm" (*p)); + + return crc; +} +EXPORT_SYMBOL(crc32c_le_arch); + +u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) +{ + return crc32_be_base(crc, p, len); +} +EXPORT_SYMBOL(crc32_be_arch); + +static int __init crc32_x86_init(void) +{ + if (boot_cpu_has(X86_FEATURE_XMM4_2)) + static_branch_enable(&have_crc32); + if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) + static_branch_enable(&have_pclmulqdq); + return 0; +} +arch_initcall(crc32_x86_init); + +static void __exit crc32_x86_exit(void) +{ +} +module_exit(crc32_x86_exit); + +u32 crc32_optimizations(void) +{ + u32 optimizations = 0; + + if (static_key_enabled(&have_crc32)) + optimizations |= CRC32C_OPTIMIZATION; + if (static_key_enabled(&have_pclmulqdq)) + optimizations |= CRC32_LE_OPTIMIZATION; + return optimizations; +} +EXPORT_SYMBOL(crc32_optimizations); + +MODULE_DESCRIPTION("x86-optimized CRC32 functions"); +MODULE_LICENSE("GPL"); diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/lib/crc32-pclmul.S index 5d31137e2c7d..f9637789cac1 100644 --- a/arch/x86/crypto/crc32-pclmul_asm.S +++ b/arch/x86/lib/crc32-pclmul.S @@ -58,13 +58,13 @@ #define CONSTANT %xmm0 #ifdef __x86_64__ -#define BUF %rdi -#define LEN %rsi -#define CRC %edx +#define CRC %edi +#define BUF %rsi +#define LEN %rdx #else -#define BUF %eax -#define LEN %edx -#define CRC %ecx +#define CRC %eax +#define BUF %edx +#define LEN %ecx #endif @@ -72,12 +72,11 @@ .text /** * Calculate crc32 - * BUF - buffer (16 bytes aligned) - * LEN - sizeof buffer (16 bytes aligned), LEN should be grater than 63 * CRC - initial crc32 + * BUF - buffer (16 bytes aligned) + * LEN - sizeof buffer (16 bytes aligned), LEN should be greater than 63 * return %eax crc32 - * uint crc32_pclmul_le_16(unsigned char const *buffer, - * size_t len, uint crc32) + * u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len); */ SYM_FUNC_START(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */ diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/lib/crc32c-3way.S index 752812bc4991..9b8770503bbc 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/lib/crc32c-3way.S @@ -52,15 +52,16 @@ # regular CRC code that does not interleave the CRC instructions. #define SMALL_SIZE 200 -# unsigned int crc_pcl(const u8 *buffer, unsigned int len, unsigned int crc_init); +# u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len); .text -SYM_FUNC_START(crc_pcl) -#define bufp %rdi -#define bufp_d %edi -#define len %esi -#define crc_init %edx -#define crc_init_q %rdx +SYM_FUNC_START(crc32c_x86_3way) +#define crc0 %edi +#define crc0_q %rdi +#define bufp %rsi +#define bufp_d %esi +#define len %rdx +#define len_dw %edx #define n_misaligned %ecx /* overlaps chunk_bytes! */ #define n_misaligned_q %rcx #define chunk_bytes %ecx /* overlaps n_misaligned! */ @@ -85,9 +86,9 @@ SYM_FUNC_START(crc_pcl) .Ldo_align: movq (bufp), %rax add n_misaligned_q, bufp - sub n_misaligned, len + sub n_misaligned_q, len .Lalign_loop: - crc32b %al, crc_init # compute crc32 of 1-byte + crc32b %al, crc0 # compute crc32 of 1-byte shr $8, %rax # get next byte dec n_misaligned jne .Lalign_loop @@ -102,7 +103,7 @@ SYM_FUNC_START(crc_pcl) .Lpartial_block: # Compute floor(len / 24) to get num qwords to process from each lane. - imul $2731, len, %eax # 2731 = ceil(2^16 / 24) + imul $2731, len_dw, %eax # 2731 = ceil(2^16 / 24) shr $16, %eax jmp .Lcrc_3lanes @@ -125,16 +126,16 @@ SYM_FUNC_START(crc_pcl) # Unroll the loop by a factor of 4 to reduce the overhead of the loop # bookkeeping instructions, which can compete with crc32q for the ALUs. .Lcrc_3lanes_4x_loop: - crc32q (bufp), crc_init_q + crc32q (bufp), crc0_q crc32q (bufp,chunk_bytes_q), crc1 crc32q (bufp,chunk_bytes_q,2), crc2 - crc32q 8(bufp), crc_init_q + crc32q 8(bufp), crc0_q crc32q 8(bufp,chunk_bytes_q), crc1 crc32q 8(bufp,chunk_bytes_q,2), crc2 - crc32q 16(bufp), crc_init_q + crc32q 16(bufp), crc0_q crc32q 16(bufp,chunk_bytes_q), crc1 crc32q 16(bufp,chunk_bytes_q,2), crc2 - crc32q 24(bufp), crc_init_q + crc32q 24(bufp), crc0_q crc32q 24(bufp,chunk_bytes_q), crc1 crc32q 24(bufp,chunk_bytes_q,2), crc2 add $32, bufp @@ -146,7 +147,7 @@ SYM_FUNC_START(crc_pcl) jz .Lcrc_3lanes_last_qword .Lcrc_3lanes_1x_loop: - crc32q (bufp), crc_init_q + crc32q (bufp), crc0_q crc32q (bufp,chunk_bytes_q), crc1 crc32q (bufp,chunk_bytes_q,2), crc2 add $8, bufp @@ -154,7 +155,7 @@ SYM_FUNC_START(crc_pcl) jnz .Lcrc_3lanes_1x_loop .Lcrc_3lanes_last_qword: - crc32q (bufp), crc_init_q + crc32q (bufp), crc0_q crc32q (bufp,chunk_bytes_q), crc1 # SKIP crc32q (bufp,chunk_bytes_q,2), crc2 ; Don't do this one yet @@ -165,9 +166,9 @@ SYM_FUNC_START(crc_pcl) lea (K_table-8)(%rip), %rax # first entry is for idx 1 pmovzxdq (%rax,chunk_bytes_q), %xmm0 # 2 consts: K1:K2 lea (chunk_bytes,chunk_bytes,2), %eax # chunk_bytes * 3 - sub %eax, len # len -= chunk_bytes * 3 + sub %rax, len # len -= chunk_bytes * 3 - movq crc_init_q, %xmm1 # CRC for block 1 + movq crc0_q, %xmm1 # CRC for block 1 pclmulqdq $0x00, %xmm0, %xmm1 # Multiply by K2 movq crc1, %xmm2 # CRC for block 2 @@ -176,8 +177,8 @@ SYM_FUNC_START(crc_pcl) pxor %xmm2,%xmm1 movq %xmm1, %rax xor (bufp,chunk_bytes_q,2), %rax - mov crc2, crc_init_q - crc32 %rax, crc_init_q + mov crc2, crc0_q + crc32 %rax, crc0_q lea 8(bufp,chunk_bytes_q,2), bufp ################################################################ @@ -193,34 +194,34 @@ SYM_FUNC_START(crc_pcl) ## 6) Process any remainder without interleaving: ####################################################################### .Lsmall: - test len, len + test len_dw, len_dw jz .Ldone - mov len, %eax + mov len_dw, %eax shr $3, %eax jz .Ldo_dword .Ldo_qwords: - crc32q (bufp), crc_init_q + crc32q (bufp), crc0_q add $8, bufp dec %eax jnz .Ldo_qwords .Ldo_dword: - test $4, len + test $4, len_dw jz .Ldo_word - crc32l (bufp), crc_init + crc32l (bufp), crc0 add $4, bufp .Ldo_word: - test $2, len + test $2, len_dw jz .Ldo_byte - crc32w (bufp), crc_init + crc32w (bufp), crc0 add $2, bufp .Ldo_byte: - test $1, len + test $1, len_dw jz .Ldone - crc32b (bufp), crc_init + crc32b (bufp), crc0 .Ldone: - mov crc_init, %eax + mov crc0, %eax RET -SYM_FUNC_END(crc_pcl) +SYM_FUNC_END(crc32c_x86_3way) .section .rodata, "a", @progbits ################################################################ diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/lib/crct10dif-pcl-asm_64.S index 5286db5b8165..5286db5b8165 100644 --- a/arch/x86/crypto/crct10dif-pcl-asm_64.S +++ b/arch/x86/lib/crct10dif-pcl-asm_64.S |