diff options
Diffstat (limited to 'arch/mips')
29 files changed, 34 insertions, 2492 deletions
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 1e48184ecf1e..934eb961bd0d 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2024,7 +2024,6 @@ config CPU_MIPSR5 config CPU_MIPSR6 bool default y if CPU_MIPS32_R6 || CPU_MIPS64_R6 - select ARCH_HAS_CRC32 select CPU_HAS_RIXI select CPU_HAS_DIEI if !CPU_DIEI_BROKEN select HAVE_ARCH_BITREVERSE diff --git a/arch/mips/alchemy/common/gpiolib.c b/arch/mips/alchemy/common/gpiolib.c index 411f70ceb762..194034eba75f 100644 --- a/arch/mips/alchemy/common/gpiolib.c +++ b/arch/mips/alchemy/common/gpiolib.c @@ -40,9 +40,11 @@ static int gpio2_get(struct gpio_chip *chip, unsigned offset) return !!alchemy_gpio2_get_value(offset + ALCHEMY_GPIO2_BASE); } -static void gpio2_set(struct gpio_chip *chip, unsigned offset, int value) +static int gpio2_set(struct gpio_chip *chip, unsigned offset, int value) { alchemy_gpio2_set_value(offset + ALCHEMY_GPIO2_BASE, value); + + return 0; } static int gpio2_direction_input(struct gpio_chip *chip, unsigned offset) @@ -68,10 +70,12 @@ static int gpio1_get(struct gpio_chip *chip, unsigned offset) return !!alchemy_gpio1_get_value(offset + ALCHEMY_GPIO1_BASE); } -static void gpio1_set(struct gpio_chip *chip, +static int gpio1_set(struct gpio_chip *chip, unsigned offset, int value) { alchemy_gpio1_set_value(offset + ALCHEMY_GPIO1_BASE, value); + + return 0; } static int gpio1_direction_input(struct gpio_chip *chip, unsigned offset) @@ -97,7 +101,7 @@ struct gpio_chip alchemy_gpio_chip[] = { .direction_input = gpio1_direction_input, .direction_output = gpio1_direction_output, .get = gpio1_get, - .set = gpio1_set, + .set_rv = gpio1_set, .to_irq = gpio1_to_irq, .base = ALCHEMY_GPIO1_BASE, .ngpio = ALCHEMY_GPIO1_NUM, @@ -107,7 +111,7 @@ struct gpio_chip alchemy_gpio_chip[] = { .direction_input = gpio2_direction_input, .direction_output = gpio2_direction_output, .get = gpio2_get, - .set = gpio2_set, + .set_rv = gpio2_set, .to_irq = gpio2_to_irq, .base = ALCHEMY_GPIO2_BASE, .ngpio = ALCHEMY_GPIO2_NUM, diff --git a/arch/mips/cavium-octeon/Kconfig b/arch/mips/cavium-octeon/Kconfig index 11f4aa6e80e9..450e979ef5d9 100644 --- a/arch/mips/cavium-octeon/Kconfig +++ b/arch/mips/cavium-octeon/Kconfig @@ -23,12 +23,6 @@ config CAVIUM_OCTEON_CVMSEG_SIZE legally range is from zero to 54 cache blocks (i.e. CVMSEG LM is between zero and 6192 bytes). -config CRYPTO_SHA256_OCTEON - tristate - default CRYPTO_LIB_SHA256 - select CRYPTO_ARCH_HAVE_LIB_SHA256 - select CRYPTO_LIB_SHA256_GENERIC - endif # CPU_CAVIUM_OCTEON if CAVIUM_OCTEON_SOC diff --git a/arch/mips/cavium-octeon/crypto/Makefile b/arch/mips/cavium-octeon/crypto/Makefile index db26c73fa0ed..83f2f5dd93cc 100644 --- a/arch/mips/cavium-octeon/crypto/Makefile +++ b/arch/mips/cavium-octeon/crypto/Makefile @@ -6,6 +6,3 @@ obj-y += octeon-crypto.o obj-$(CONFIG_CRYPTO_MD5_OCTEON) += octeon-md5.o -obj-$(CONFIG_CRYPTO_SHA1_OCTEON) += octeon-sha1.o -obj-$(CONFIG_CRYPTO_SHA256_OCTEON) += octeon-sha256.o -obj-$(CONFIG_CRYPTO_SHA512_OCTEON) += octeon-sha512.o diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.c b/arch/mips/cavium-octeon/crypto/octeon-crypto.c index cfb4a146cf17..0ff8559391f5 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-crypto.c +++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.c @@ -7,12 +7,11 @@ */ #include <asm/cop2.h> +#include <asm/octeon/crypto.h> #include <linux/export.h> #include <linux/interrupt.h> #include <linux/sched/task_stack.h> -#include "octeon-crypto.h" - /** * Enable access to Octeon's COP2 crypto hardware for kernel use. Wrap any * crypto operations in calls to octeon_crypto_enable/disable in order to make diff --git a/arch/mips/cavium-octeon/crypto/octeon-md5.c b/arch/mips/cavium-octeon/crypto/octeon-md5.c index fbc84eb7fedf..a8ce831e2ceb 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-md5.c +++ b/arch/mips/cavium-octeon/crypto/octeon-md5.c @@ -19,6 +19,7 @@ * any later version. */ +#include <asm/octeon/crypto.h> #include <asm/octeon/octeon.h> #include <crypto/internal/hash.h> #include <crypto/md5.h> @@ -27,8 +28,6 @@ #include <linux/string.h> #include <linux/unaligned.h> -#include "octeon-crypto.h" - struct octeon_md5_state { __le32 hash[MD5_HASH_WORDS]; u64 byte_count; diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha1.c b/arch/mips/cavium-octeon/crypto/octeon-sha1.c deleted file mode 100644 index e70f21a473da..000000000000 --- a/arch/mips/cavium-octeon/crypto/octeon-sha1.c +++ /dev/null @@ -1,147 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Cryptographic API. - * - * SHA1 Secure Hash Algorithm. - * - * Adapted for OCTEON by Aaro Koskinen <aaro.koskinen@iki.fi>. - * - * Based on crypto/sha1_generic.c, which is: - * - * Copyright (c) Alan Smithee. - * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> - * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> - */ - -#include <asm/octeon/octeon.h> -#include <crypto/internal/hash.h> -#include <crypto/sha1.h> -#include <crypto/sha1_base.h> -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/module.h> - -#include "octeon-crypto.h" - -/* - * We pass everything as 64-bit. OCTEON can handle misaligned data. - */ - -static void octeon_sha1_store_hash(struct sha1_state *sctx) -{ - u64 *hash = (u64 *)sctx->state; - union { - u32 word[2]; - u64 dword; - } hash_tail = { { sctx->state[4], } }; - - write_octeon_64bit_hash_dword(hash[0], 0); - write_octeon_64bit_hash_dword(hash[1], 1); - write_octeon_64bit_hash_dword(hash_tail.dword, 2); - memzero_explicit(&hash_tail.word[0], sizeof(hash_tail.word[0])); -} - -static void octeon_sha1_read_hash(struct sha1_state *sctx) -{ - u64 *hash = (u64 *)sctx->state; - union { - u32 word[2]; - u64 dword; - } hash_tail; - - hash[0] = read_octeon_64bit_hash_dword(0); - hash[1] = read_octeon_64bit_hash_dword(1); - hash_tail.dword = read_octeon_64bit_hash_dword(2); - sctx->state[4] = hash_tail.word[0]; - memzero_explicit(&hash_tail.dword, sizeof(hash_tail.dword)); -} - -static void octeon_sha1_transform(struct sha1_state *sctx, const u8 *src, - int blocks) -{ - do { - const u64 *block = (const u64 *)src; - - write_octeon_64bit_block_dword(block[0], 0); - write_octeon_64bit_block_dword(block[1], 1); - write_octeon_64bit_block_dword(block[2], 2); - write_octeon_64bit_block_dword(block[3], 3); - write_octeon_64bit_block_dword(block[4], 4); - write_octeon_64bit_block_dword(block[5], 5); - write_octeon_64bit_block_dword(block[6], 6); - octeon_sha1_start(block[7]); - - src += SHA1_BLOCK_SIZE; - } while (--blocks); -} - -static int octeon_sha1_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - struct octeon_cop2_state state; - unsigned long flags; - int remain; - - flags = octeon_crypto_enable(&state); - octeon_sha1_store_hash(sctx); - - remain = sha1_base_do_update_blocks(desc, data, len, - octeon_sha1_transform); - - octeon_sha1_read_hash(sctx); - octeon_crypto_disable(&state, flags); - return remain; -} - -static int octeon_sha1_finup(struct shash_desc *desc, const u8 *src, - unsigned int len, u8 *out) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - struct octeon_cop2_state state; - unsigned long flags; - - flags = octeon_crypto_enable(&state); - octeon_sha1_store_hash(sctx); - - sha1_base_do_finup(desc, src, len, octeon_sha1_transform); - - octeon_sha1_read_hash(sctx); - octeon_crypto_disable(&state, flags); - return sha1_base_finish(desc, out); -} - -static struct shash_alg octeon_sha1_alg = { - .digestsize = SHA1_DIGEST_SIZE, - .init = sha1_base_init, - .update = octeon_sha1_update, - .finup = octeon_sha1_finup, - .descsize = SHA1_STATE_SIZE, - .base = { - .cra_name = "sha1", - .cra_driver_name= "octeon-sha1", - .cra_priority = OCTEON_CR_OPCODE_PRIORITY, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, - .cra_blocksize = SHA1_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static int __init octeon_sha1_mod_init(void) -{ - if (!octeon_has_crypto()) - return -ENOTSUPP; - return crypto_register_shash(&octeon_sha1_alg); -} - -static void __exit octeon_sha1_mod_fini(void) -{ - crypto_unregister_shash(&octeon_sha1_alg); -} - -module_init(octeon_sha1_mod_init); -module_exit(octeon_sha1_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm (OCTEON)"); -MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>"); diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha256.c b/arch/mips/cavium-octeon/crypto/octeon-sha256.c deleted file mode 100644 index f93faaf1f4af..000000000000 --- a/arch/mips/cavium-octeon/crypto/octeon-sha256.c +++ /dev/null @@ -1,73 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * SHA-256 Secure Hash Algorithm. - * - * Adapted for OCTEON by Aaro Koskinen <aaro.koskinen@iki.fi>. - * - * Based on crypto/sha256_generic.c, which is: - * - * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com> - * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> - * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> - * SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com> - */ - -#include <asm/octeon/octeon.h> -#include <crypto/internal/sha2.h> -#include <linux/kernel.h> -#include <linux/module.h> - -#include "octeon-crypto.h" - -/* - * We pass everything as 64-bit. OCTEON can handle misaligned data. - */ - -void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS], - const u8 *data, size_t nblocks) -{ - struct octeon_cop2_state cop2_state; - u64 *state64 = (u64 *)state; - unsigned long flags; - - if (!octeon_has_crypto()) - return sha256_blocks_generic(state, data, nblocks); - - flags = octeon_crypto_enable(&cop2_state); - write_octeon_64bit_hash_dword(state64[0], 0); - write_octeon_64bit_hash_dword(state64[1], 1); - write_octeon_64bit_hash_dword(state64[2], 2); - write_octeon_64bit_hash_dword(state64[3], 3); - - do { - const u64 *block = (const u64 *)data; - - write_octeon_64bit_block_dword(block[0], 0); - write_octeon_64bit_block_dword(block[1], 1); - write_octeon_64bit_block_dword(block[2], 2); - write_octeon_64bit_block_dword(block[3], 3); - write_octeon_64bit_block_dword(block[4], 4); - write_octeon_64bit_block_dword(block[5], 5); - write_octeon_64bit_block_dword(block[6], 6); - octeon_sha256_start(block[7]); - - data += SHA256_BLOCK_SIZE; - } while (--nblocks); - - state64[0] = read_octeon_64bit_hash_dword(0); - state64[1] = read_octeon_64bit_hash_dword(1); - state64[2] = read_octeon_64bit_hash_dword(2); - state64[3] = read_octeon_64bit_hash_dword(3); - octeon_crypto_disable(&cop2_state, flags); -} -EXPORT_SYMBOL_GPL(sha256_blocks_arch); - -bool sha256_is_arch_optimized(void) -{ - return octeon_has_crypto(); -} -EXPORT_SYMBOL_GPL(sha256_is_arch_optimized); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SHA-256 Secure Hash Algorithm (OCTEON)"); -MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>"); diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha512.c b/arch/mips/cavium-octeon/crypto/octeon-sha512.c deleted file mode 100644 index 215311053db3..000000000000 --- a/arch/mips/cavium-octeon/crypto/octeon-sha512.c +++ /dev/null @@ -1,167 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Cryptographic API. - * - * SHA-512 and SHA-384 Secure Hash Algorithm. - * - * Adapted for OCTEON by Aaro Koskinen <aaro.koskinen@iki.fi>. - * - * Based on crypto/sha512_generic.c, which is: - * - * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com> - * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> - * Copyright (c) 2003 Kyle McMartin <kyle@debian.org> - */ - -#include <asm/octeon/octeon.h> -#include <crypto/internal/hash.h> -#include <crypto/sha2.h> -#include <crypto/sha512_base.h> -#include <linux/kernel.h> -#include <linux/module.h> - -#include "octeon-crypto.h" - -/* - * We pass everything as 64-bit. OCTEON can handle misaligned data. - */ - -static void octeon_sha512_store_hash(struct sha512_state *sctx) -{ - write_octeon_64bit_hash_sha512(sctx->state[0], 0); - write_octeon_64bit_hash_sha512(sctx->state[1], 1); - write_octeon_64bit_hash_sha512(sctx->state[2], 2); - write_octeon_64bit_hash_sha512(sctx->state[3], 3); - write_octeon_64bit_hash_sha512(sctx->state[4], 4); - write_octeon_64bit_hash_sha512(sctx->state[5], 5); - write_octeon_64bit_hash_sha512(sctx->state[6], 6); - write_octeon_64bit_hash_sha512(sctx->state[7], 7); -} - -static void octeon_sha512_read_hash(struct sha512_state *sctx) -{ - sctx->state[0] = read_octeon_64bit_hash_sha512(0); - sctx->state[1] = read_octeon_64bit_hash_sha512(1); - sctx->state[2] = read_octeon_64bit_hash_sha512(2); - sctx->state[3] = read_octeon_64bit_hash_sha512(3); - sctx->state[4] = read_octeon_64bit_hash_sha512(4); - sctx->state[5] = read_octeon_64bit_hash_sha512(5); - sctx->state[6] = read_octeon_64bit_hash_sha512(6); - sctx->state[7] = read_octeon_64bit_hash_sha512(7); -} - -static void octeon_sha512_transform(struct sha512_state *sctx, - const u8 *src, int blocks) -{ - do { - const u64 *block = (const u64 *)src; - - write_octeon_64bit_block_sha512(block[0], 0); - write_octeon_64bit_block_sha512(block[1], 1); - write_octeon_64bit_block_sha512(block[2], 2); - write_octeon_64bit_block_sha512(block[3], 3); - write_octeon_64bit_block_sha512(block[4], 4); - write_octeon_64bit_block_sha512(block[5], 5); - write_octeon_64bit_block_sha512(block[6], 6); - write_octeon_64bit_block_sha512(block[7], 7); - write_octeon_64bit_block_sha512(block[8], 8); - write_octeon_64bit_block_sha512(block[9], 9); - write_octeon_64bit_block_sha512(block[10], 10); - write_octeon_64bit_block_sha512(block[11], 11); - write_octeon_64bit_block_sha512(block[12], 12); - write_octeon_64bit_block_sha512(block[13], 13); - write_octeon_64bit_block_sha512(block[14], 14); - octeon_sha512_start(block[15]); - - src += SHA512_BLOCK_SIZE; - } while (--blocks); -} - -static int octeon_sha512_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - struct octeon_cop2_state state; - unsigned long flags; - int remain; - - flags = octeon_crypto_enable(&state); - octeon_sha512_store_hash(sctx); - - remain = sha512_base_do_update_blocks(desc, data, len, - octeon_sha512_transform); - - octeon_sha512_read_hash(sctx); - octeon_crypto_disable(&state, flags); - return remain; -} - -static int octeon_sha512_finup(struct shash_desc *desc, const u8 *src, - unsigned int len, u8 *hash) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - struct octeon_cop2_state state; - unsigned long flags; - - flags = octeon_crypto_enable(&state); - octeon_sha512_store_hash(sctx); - - sha512_base_do_finup(desc, src, len, octeon_sha512_transform); - - octeon_sha512_read_hash(sctx); - octeon_crypto_disable(&state, flags); - return sha512_base_finish(desc, hash); -} - -static struct shash_alg octeon_sha512_algs[2] = { { - .digestsize = SHA512_DIGEST_SIZE, - .init = sha512_base_init, - .update = octeon_sha512_update, - .finup = octeon_sha512_finup, - .descsize = SHA512_STATE_SIZE, - .base = { - .cra_name = "sha512", - .cra_driver_name= "octeon-sha512", - .cra_priority = OCTEON_CR_OPCODE_PRIORITY, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | - CRYPTO_AHASH_ALG_FINUP_MAX, - .cra_blocksize = SHA512_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}, { - .digestsize = SHA384_DIGEST_SIZE, - .init = sha384_base_init, - .update = octeon_sha512_update, - .finup = octeon_sha512_finup, - .descsize = SHA512_STATE_SIZE, - .base = { - .cra_name = "sha384", - .cra_driver_name= "octeon-sha384", - .cra_priority = OCTEON_CR_OPCODE_PRIORITY, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | - CRYPTO_AHASH_ALG_FINUP_MAX, - .cra_blocksize = SHA384_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -} }; - -static int __init octeon_sha512_mod_init(void) -{ - if (!octeon_has_crypto()) - return -ENOTSUPP; - return crypto_register_shashes(octeon_sha512_algs, - ARRAY_SIZE(octeon_sha512_algs)); -} - -static void __exit octeon_sha512_mod_fini(void) -{ - crypto_unregister_shashes(octeon_sha512_algs, - ARRAY_SIZE(octeon_sha512_algs)); -} - -module_init(octeon_sha512_mod_init); -module_exit(octeon_sha512_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SHA-512 and SHA-384 Secure Hash Algorithms (OCTEON)"); -MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>"); diff --git a/arch/mips/configs/cavium_octeon_defconfig b/arch/mips/configs/cavium_octeon_defconfig index 88ae0aa85364..3f50e1d78894 100644 --- a/arch/mips/configs/cavium_octeon_defconfig +++ b/arch/mips/configs/cavium_octeon_defconfig @@ -156,8 +156,6 @@ CONFIG_SECURITY_NETWORK=y CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_MD5_OCTEON=y -CONFIG_CRYPTO_SHA1_OCTEON=m -CONFIG_CRYPTO_SHA512_OCTEON=m CONFIG_CRYPTO_DES=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_FS=y diff --git a/arch/mips/configs/generic/board-marduk.config b/arch/mips/configs/generic/board-marduk.config index 05ca34cd5a73..65433c5c4fde 100644 --- a/arch/mips/configs/generic/board-marduk.config +++ b/arch/mips/configs/generic/board-marduk.config @@ -50,4 +50,3 @@ CONFIG_CRYPTO_DEV_IMGTEC_HASH=y CONFIG_IMGPDC_WDT=y CONFIG_IR_IMG=y CONFIG_CC10001_ADC=y -CONFIG_SND_SOC_IMG=y diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig index 98844b457b7f..5ff0c1554168 100644 --- a/arch/mips/configs/loongson3_defconfig +++ b/arch/mips/configs/loongson3_defconfig @@ -292,7 +292,9 @@ CONFIG_SND_SEQ_DUMMY=m # CONFIG_SND_ISA is not set CONFIG_SND_HDA_INTEL=m CONFIG_SND_HDA_PATCH_LOADER=y -CONFIG_SND_HDA_CODEC_REALTEK=m +CONFIG_SND_HDA_CODEC_REALTEK=y +CONFIG_SND_HDA_CODEC_REALTEK_LIB=m +CONFIG_SND_HDA_CODEC_ALC269=m CONFIG_SND_HDA_CODEC_SIGMATEL=m CONFIG_SND_HDA_CODEC_HDMI=m CONFIG_SND_HDA_CODEC_CONEXANT=m diff --git a/arch/mips/crypto/Kconfig b/arch/mips/crypto/Kconfig index 6bf073ae7613..7b91f4ec65bf 100644 --- a/arch/mips/crypto/Kconfig +++ b/arch/mips/crypto/Kconfig @@ -12,24 +12,4 @@ config CRYPTO_MD5_OCTEON Architecture: mips OCTEON using crypto instructions, when available -config CRYPTO_SHA1_OCTEON - tristate "Hash functions: SHA-1 (OCTEON)" - depends on CPU_CAVIUM_OCTEON - select CRYPTO_SHA1 - select CRYPTO_HASH - help - SHA-1 secure hash algorithm (FIPS 180) - - Architecture: mips OCTEON - -config CRYPTO_SHA512_OCTEON - tristate "Hash functions: SHA-384 and SHA-512 (OCTEON)" - depends on CPU_CAVIUM_OCTEON - select CRYPTO_SHA512 - select CRYPTO_HASH - help - SHA-384 and SHA-512 secure hash algorithms (FIPS 180) - - Architecture: mips OCTEON using crypto instructions, when available - endmenu diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.h b/arch/mips/include/asm/octeon/crypto.h index cb68f9e284bb..cb68f9e284bb 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-crypto.h +++ b/arch/mips/include/asm/octeon/crypto.h diff --git a/arch/mips/include/asm/time.h b/arch/mips/include/asm/time.h index e855a3611d92..5e7193b759f3 100644 --- a/arch/mips/include/asm/time.h +++ b/arch/mips/include/asm/time.h @@ -55,7 +55,7 @@ static inline int mips_clockevent_init(void) */ extern int init_r4k_clocksource(void); -static inline int init_mips_clocksource(void) +static inline __init int init_mips_clocksource(void) { #ifdef CONFIG_CSRC_R4K return init_r4k_clocksource(); diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index b890d64d352c..3f4c94c88124 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -935,7 +935,7 @@ int regs_query_register_offset(const char *name) static const struct user_regset mips_regsets[] = { [REGSET_GPR] = { - .core_note_type = NT_PRSTATUS, + USER_REGSET_NOTE_TYPE(PRSTATUS), .n = ELF_NGREG, .size = sizeof(unsigned int), .align = sizeof(unsigned int), @@ -943,7 +943,7 @@ static const struct user_regset mips_regsets[] = { .set = gpr32_set, }, [REGSET_DSP] = { - .core_note_type = NT_MIPS_DSP, + USER_REGSET_NOTE_TYPE(MIPS_DSP), .n = NUM_DSP_REGS + 1, .size = sizeof(u32), .align = sizeof(u32), @@ -953,7 +953,7 @@ static const struct user_regset mips_regsets[] = { }, #ifdef CONFIG_MIPS_FP_SUPPORT [REGSET_FPR] = { - .core_note_type = NT_PRFPREG, + USER_REGSET_NOTE_TYPE(PRFPREG), .n = ELF_NFPREG, .size = sizeof(elf_fpreg_t), .align = sizeof(elf_fpreg_t), @@ -961,7 +961,7 @@ static const struct user_regset mips_regsets[] = { .set = fpr_set, }, [REGSET_FP_MODE] = { - .core_note_type = NT_MIPS_FP_MODE, + USER_REGSET_NOTE_TYPE(MIPS_FP_MODE), .n = 1, .size = sizeof(int), .align = sizeof(int), @@ -971,7 +971,7 @@ static const struct user_regset mips_regsets[] = { #endif #ifdef CONFIG_CPU_HAS_MSA [REGSET_MSA] = { - .core_note_type = NT_MIPS_MSA, + USER_REGSET_NOTE_TYPE(MIPS_MSA), .n = NUM_FPU_REGS + 1, .size = 16, .align = 16, @@ -995,7 +995,7 @@ static const struct user_regset_view user_mips_view = { static const struct user_regset mips64_regsets[] = { [REGSET_GPR] = { - .core_note_type = NT_PRSTATUS, + USER_REGSET_NOTE_TYPE(PRSTATUS), .n = ELF_NGREG, .size = sizeof(unsigned long), .align = sizeof(unsigned long), @@ -1003,7 +1003,7 @@ static const struct user_regset mips64_regsets[] = { .set = gpr64_set, }, [REGSET_DSP] = { - .core_note_type = NT_MIPS_DSP, + USER_REGSET_NOTE_TYPE(MIPS_DSP), .n = NUM_DSP_REGS + 1, .size = sizeof(u64), .align = sizeof(u64), @@ -1013,7 +1013,7 @@ static const struct user_regset mips64_regsets[] = { }, #ifdef CONFIG_MIPS_FP_SUPPORT [REGSET_FP_MODE] = { - .core_note_type = NT_MIPS_FP_MODE, + USER_REGSET_NOTE_TYPE(MIPS_FP_MODE), .n = 1, .size = sizeof(int), .align = sizeof(int), @@ -1021,7 +1021,7 @@ static const struct user_regset mips64_regsets[] = { .set = fp_mode_set, }, [REGSET_FPR] = { - .core_note_type = NT_PRFPREG, + USER_REGSET_NOTE_TYPE(PRFPREG), .n = ELF_NFPREG, .size = sizeof(elf_fpreg_t), .align = sizeof(elf_fpreg_t), @@ -1031,7 +1031,7 @@ static const struct user_regset mips64_regsets[] = { #endif #ifdef CONFIG_CPU_HAS_MSA [REGSET_MSA] = { - .core_note_type = NT_MIPS_MSA, + USER_REGSET_NOTE_TYPE(MIPS_MSA), .n = NUM_FPU_REGS + 1, .size = 16, .align = 16, diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index aa70e371bb54..d824ffe9a014 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -406,3 +406,5 @@ 465 n32 listxattrat sys_listxattrat 466 n32 removexattrat sys_removexattrat 467 n32 open_tree_attr sys_open_tree_attr +468 n32 file_getattr sys_file_getattr +469 n32 file_setattr sys_file_setattr diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 1e8c44c7b614..7a7049c2c307 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -382,3 +382,5 @@ 465 n64 listxattrat sys_listxattrat 466 n64 removexattrat sys_removexattrat 467 n64 open_tree_attr sys_open_tree_attr +468 n64 file_getattr sys_file_getattr +469 n64 file_setattr sys_file_setattr diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 114a5a1a6230..d330274f0601 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -455,3 +455,5 @@ 465 o32 listxattrat sys_listxattrat 466 o32 removexattrat sys_removexattrat 467 o32 open_tree_attr sys_open_tree_attr +468 o32 file_getattr sys_file_getattr +469 o32 file_setattr sys_file_setattr diff --git a/arch/mips/lib/.gitignore b/arch/mips/lib/.gitignore new file mode 100644 index 000000000000..647d7a922e68 --- /dev/null +++ b/arch/mips/lib/.gitignore @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only + +# This now-removed directory used to contain generated files. +/crypto/ diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile index 9d75845ef78e..5d5b993cbc2b 100644 --- a/arch/mips/lib/Makefile +++ b/arch/mips/lib/Makefile @@ -3,8 +3,6 @@ # Makefile for MIPS-specific library files.. # -obj-y += crypto/ - lib-y += bitops.o csum_partial.o delay.o memcpy.o memset.o \ mips-atomic.o strncpy_user.o \ strnlen_user.o uncached.o @@ -16,7 +14,5 @@ lib-$(CONFIG_GENERIC_CSUM) := $(filter-out csum_partial.o, $(lib-y)) obj-$(CONFIG_CPU_GENERIC_DUMP_TLB) += dump_tlb.o obj-$(CONFIG_CPU_R3000) += r3k_dump_tlb.o -obj-$(CONFIG_CRC32_ARCH) += crc32-mips.o - # libgcc-style stuff needed in the kernel obj-y += bswapsi.o bswapdi.o multi3.o diff --git a/arch/mips/lib/crc32-mips.c b/arch/mips/lib/crc32-mips.c deleted file mode 100644 index 45e4d2c9fbf5..000000000000 --- a/arch/mips/lib/crc32-mips.c +++ /dev/null @@ -1,183 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * crc32-mips.c - CRC32 and CRC32C using optional MIPSr6 instructions - * - * Module based on arm64/crypto/crc32-arm.c - * - * Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org> - * Copyright (C) 2018 MIPS Tech, LLC - */ - -#include <linux/cpufeature.h> -#include <linux/crc32.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <asm/mipsregs.h> -#include <linux/unaligned.h> - -#ifndef TOOLCHAIN_SUPPORTS_CRC -#define _ASM_SET_CRC(OP, SZ, TYPE) \ -_ASM_MACRO_3R(OP, rt, rs, rt2, \ - ".ifnc \\rt, \\rt2\n\t" \ - ".error \"invalid operands \\\"" #OP " \\rt,\\rs,\\rt2\\\"\"\n\t" \ - ".endif\n\t" \ - _ASM_INSN_IF_MIPS(0x7c00000f | (__rt << 16) | (__rs << 21) | \ - ((SZ) << 6) | ((TYPE) << 8)) \ - _ASM_INSN32_IF_MM(0x00000030 | (__rs << 16) | (__rt << 21) | \ - ((SZ) << 14) | ((TYPE) << 3))) -#define _ASM_UNSET_CRC(op, SZ, TYPE) ".purgem " #op "\n\t" -#else /* !TOOLCHAIN_SUPPORTS_CRC */ -#define _ASM_SET_CRC(op, SZ, TYPE) ".set\tcrc\n\t" -#define _ASM_UNSET_CRC(op, SZ, TYPE) -#endif - -#define __CRC32(crc, value, op, SZ, TYPE) \ -do { \ - __asm__ __volatile__( \ - ".set push\n\t" \ - _ASM_SET_CRC(op, SZ, TYPE) \ - #op " %0, %1, %0\n\t" \ - _ASM_UNSET_CRC(op, SZ, TYPE) \ - ".set pop" \ - : "+r" (crc) \ - : "r" (value)); \ -} while (0) - -#define _CRC32_crc32b(crc, value) __CRC32(crc, value, crc32b, 0, 0) -#define _CRC32_crc32h(crc, value) __CRC32(crc, value, crc32h, 1, 0) -#define _CRC32_crc32w(crc, value) __CRC32(crc, value, crc32w, 2, 0) -#define _CRC32_crc32d(crc, value) __CRC32(crc, value, crc32d, 3, 0) -#define _CRC32_crc32cb(crc, value) __CRC32(crc, value, crc32cb, 0, 1) -#define _CRC32_crc32ch(crc, value) __CRC32(crc, value, crc32ch, 1, 1) -#define _CRC32_crc32cw(crc, value) __CRC32(crc, value, crc32cw, 2, 1) -#define _CRC32_crc32cd(crc, value) __CRC32(crc, value, crc32cd, 3, 1) - -#define _CRC32(crc, value, size, op) \ - _CRC32_##op##size(crc, value) - -#define CRC32(crc, value, size) \ - _CRC32(crc, value, size, crc32) - -#define CRC32C(crc, value, size) \ - _CRC32(crc, value, size, crc32c) - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32); - -u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) -{ - if (!static_branch_likely(&have_crc32)) - return crc32_le_base(crc, p, len); - - if (IS_ENABLED(CONFIG_64BIT)) { - for (; len >= sizeof(u64); p += sizeof(u64), len -= sizeof(u64)) { - u64 value = get_unaligned_le64(p); - - CRC32(crc, value, d); - } - - if (len & sizeof(u32)) { - u32 value = get_unaligned_le32(p); - - CRC32(crc, value, w); - p += sizeof(u32); - } - } else { - for (; len >= sizeof(u32); len -= sizeof(u32)) { - u32 value = get_unaligned_le32(p); - - CRC32(crc, value, w); - p += sizeof(u32); - } - } - - if (len & sizeof(u16)) { - u16 value = get_unaligned_le16(p); - - CRC32(crc, value, h); - p += sizeof(u16); - } - - if (len & sizeof(u8)) { - u8 value = *p++; - - CRC32(crc, value, b); - } - - return crc; -} -EXPORT_SYMBOL(crc32_le_arch); - -u32 crc32c_arch(u32 crc, const u8 *p, size_t len) -{ - if (!static_branch_likely(&have_crc32)) - return crc32c_base(crc, p, len); - - if (IS_ENABLED(CONFIG_64BIT)) { - for (; len >= sizeof(u64); p += sizeof(u64), len -= sizeof(u64)) { - u64 value = get_unaligned_le64(p); - - CRC32C(crc, value, d); - } - - if (len & sizeof(u32)) { - u32 value = get_unaligned_le32(p); - - CRC32C(crc, value, w); - p += sizeof(u32); - } - } else { - for (; len >= sizeof(u32); len -= sizeof(u32)) { - u32 value = get_unaligned_le32(p); - - CRC32C(crc, value, w); - p += sizeof(u32); - } - } - - if (len & sizeof(u16)) { - u16 value = get_unaligned_le16(p); - - CRC32C(crc, value, h); - p += sizeof(u16); - } - - if (len & sizeof(u8)) { - u8 value = *p++; - - CRC32C(crc, value, b); - } - return crc; -} -EXPORT_SYMBOL(crc32c_arch); - -u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) -{ - return crc32_be_base(crc, p, len); -} -EXPORT_SYMBOL(crc32_be_arch); - -static int __init crc32_mips_init(void) -{ - if (cpu_have_feature(cpu_feature(MIPS_CRC32))) - static_branch_enable(&have_crc32); - return 0; -} -subsys_initcall(crc32_mips_init); - -static void __exit crc32_mips_exit(void) -{ -} -module_exit(crc32_mips_exit); - -u32 crc32_optimizations(void) -{ - if (static_key_enabled(&have_crc32)) - return CRC32_LE_OPTIMIZATION | CRC32C_OPTIMIZATION; - return 0; -} -EXPORT_SYMBOL(crc32_optimizations); - -MODULE_AUTHOR("Marcin Nowakowski <marcin.nowakowski@mips.com"); -MODULE_DESCRIPTION("CRC32 and CRC32C using optional MIPS instructions"); -MODULE_LICENSE("GPL v2"); diff --git a/arch/mips/lib/crypto/.gitignore b/arch/mips/lib/crypto/.gitignore deleted file mode 100644 index 0d47d4f21c6d..000000000000 --- a/arch/mips/lib/crypto/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -poly1305-core.S diff --git a/arch/mips/lib/crypto/Kconfig b/arch/mips/lib/crypto/Kconfig deleted file mode 100644 index 0670a170c1be..000000000000 --- a/arch/mips/lib/crypto/Kconfig +++ /dev/null @@ -1,12 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -config CRYPTO_CHACHA_MIPS - tristate - depends on CPU_MIPS32_R2 - default CRYPTO_LIB_CHACHA - select CRYPTO_ARCH_HAVE_LIB_CHACHA - -config CRYPTO_POLY1305_MIPS - tristate - default CRYPTO_LIB_POLY1305 - select CRYPTO_ARCH_HAVE_LIB_POLY1305 diff --git a/arch/mips/lib/crypto/Makefile b/arch/mips/lib/crypto/Makefile deleted file mode 100644 index 804488c7aded..000000000000 --- a/arch/mips/lib/crypto/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -obj-$(CONFIG_CRYPTO_CHACHA_MIPS) += chacha-mips.o -chacha-mips-y := chacha-core.o chacha-glue.o -AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots - -obj-$(CONFIG_CRYPTO_POLY1305_MIPS) += poly1305-mips.o -poly1305-mips-y := poly1305-core.o poly1305-glue.o - -perlasm-flavour-$(CONFIG_32BIT) := o32 -perlasm-flavour-$(CONFIG_64BIT) := 64 - -quiet_cmd_perlasm = PERLASM $@ - cmd_perlasm = $(PERL) $(<) $(perlasm-flavour-y) $(@) - -$(obj)/poly1305-core.S: $(src)/poly1305-mips.pl FORCE - $(call if_changed,perlasm) - -targets += poly1305-core.S diff --git a/arch/mips/lib/crypto/chacha-core.S b/arch/mips/lib/crypto/chacha-core.S deleted file mode 100644 index 5755f69cfe00..000000000000 --- a/arch/mips/lib/crypto/chacha-core.S +++ /dev/null @@ -1,497 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR MIT */ -/* - * Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com>. All Rights Reserved. - * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. - */ - -#define MASK_U32 0x3c -#define CHACHA20_BLOCK_SIZE 64 -#define STACK_SIZE 32 - -#define X0 $t0 -#define X1 $t1 -#define X2 $t2 -#define X3 $t3 -#define X4 $t4 -#define X5 $t5 -#define X6 $t6 -#define X7 $t7 -#define X8 $t8 -#define X9 $t9 -#define X10 $v1 -#define X11 $s6 -#define X12 $s5 -#define X13 $s4 -#define X14 $s3 -#define X15 $s2 -/* Use regs which are overwritten on exit for Tx so we don't leak clear data. */ -#define T0 $s1 -#define T1 $s0 -#define T(n) T ## n -#define X(n) X ## n - -/* Input arguments */ -#define STATE $a0 -#define OUT $a1 -#define IN $a2 -#define BYTES $a3 - -/* Output argument */ -/* NONCE[0] is kept in a register and not in memory. - * We don't want to touch original value in memory. - * Must be incremented every loop iteration. - */ -#define NONCE_0 $v0 - -/* SAVED_X and SAVED_CA are set in the jump table. - * Use regs which are overwritten on exit else we don't leak clear data. - * They are used to handling the last bytes which are not multiple of 4. - */ -#define SAVED_X X15 -#define SAVED_CA $s7 - -#define IS_UNALIGNED $s7 - -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -#define MSB 0 -#define LSB 3 -#define ROTx rotl -#define ROTR(n) rotr n, 24 -#define CPU_TO_LE32(n) \ - wsbh n; \ - rotr n, 16; -#else -#define MSB 3 -#define LSB 0 -#define ROTx rotr -#define CPU_TO_LE32(n) -#define ROTR(n) -#endif - -#define FOR_EACH_WORD(x) \ - x( 0); \ - x( 1); \ - x( 2); \ - x( 3); \ - x( 4); \ - x( 5); \ - x( 6); \ - x( 7); \ - x( 8); \ - x( 9); \ - x(10); \ - x(11); \ - x(12); \ - x(13); \ - x(14); \ - x(15); - -#define FOR_EACH_WORD_REV(x) \ - x(15); \ - x(14); \ - x(13); \ - x(12); \ - x(11); \ - x(10); \ - x( 9); \ - x( 8); \ - x( 7); \ - x( 6); \ - x( 5); \ - x( 4); \ - x( 3); \ - x( 2); \ - x( 1); \ - x( 0); - -#define PLUS_ONE_0 1 -#define PLUS_ONE_1 2 -#define PLUS_ONE_2 3 -#define PLUS_ONE_3 4 -#define PLUS_ONE_4 5 -#define PLUS_ONE_5 6 -#define PLUS_ONE_6 7 -#define PLUS_ONE_7 8 -#define PLUS_ONE_8 9 -#define PLUS_ONE_9 10 -#define PLUS_ONE_10 11 -#define PLUS_ONE_11 12 -#define PLUS_ONE_12 13 -#define PLUS_ONE_13 14 -#define PLUS_ONE_14 15 -#define PLUS_ONE_15 16 -#define PLUS_ONE(x) PLUS_ONE_ ## x -#define _CONCAT3(a,b,c) a ## b ## c -#define CONCAT3(a,b,c) _CONCAT3(a,b,c) - -#define STORE_UNALIGNED(x) \ -CONCAT3(.Lchacha_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \ - .if (x != 12); \ - lw T0, (x*4)(STATE); \ - .endif; \ - lwl T1, (x*4)+MSB ## (IN); \ - lwr T1, (x*4)+LSB ## (IN); \ - .if (x == 12); \ - addu X ## x, NONCE_0; \ - .else; \ - addu X ## x, T0; \ - .endif; \ - CPU_TO_LE32(X ## x); \ - xor X ## x, T1; \ - swl X ## x, (x*4)+MSB ## (OUT); \ - swr X ## x, (x*4)+LSB ## (OUT); - -#define STORE_ALIGNED(x) \ -CONCAT3(.Lchacha_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \ - .if (x != 12); \ - lw T0, (x*4)(STATE); \ - .endif; \ - lw T1, (x*4) ## (IN); \ - .if (x == 12); \ - addu X ## x, NONCE_0; \ - .else; \ - addu X ## x, T0; \ - .endif; \ - CPU_TO_LE32(X ## x); \ - xor X ## x, T1; \ - sw X ## x, (x*4) ## (OUT); - -/* Jump table macro. - * Used for setup and handling the last bytes, which are not multiple of 4. - * X15 is free to store Xn - * Every jumptable entry must be equal in size. - */ -#define JMPTBL_ALIGNED(x) \ -.Lchacha_mips_jmptbl_aligned_ ## x: ; \ - .set noreorder; \ - b .Lchacha_mips_xor_aligned_ ## x ## _b; \ - .if (x == 12); \ - addu SAVED_X, X ## x, NONCE_0; \ - .else; \ - addu SAVED_X, X ## x, SAVED_CA; \ - .endif; \ - .set reorder - -#define JMPTBL_UNALIGNED(x) \ -.Lchacha_mips_jmptbl_unaligned_ ## x: ; \ - .set noreorder; \ - b .Lchacha_mips_xor_unaligned_ ## x ## _b; \ - .if (x == 12); \ - addu SAVED_X, X ## x, NONCE_0; \ - .else; \ - addu SAVED_X, X ## x, SAVED_CA; \ - .endif; \ - .set reorder - -#define AXR(A, B, C, D, K, L, M, N, V, W, Y, Z, S) \ - addu X(A), X(K); \ - addu X(B), X(L); \ - addu X(C), X(M); \ - addu X(D), X(N); \ - xor X(V), X(A); \ - xor X(W), X(B); \ - xor X(Y), X(C); \ - xor X(Z), X(D); \ - rotl X(V), S; \ - rotl X(W), S; \ - rotl X(Y), S; \ - rotl X(Z), S; - -.text -.set reorder -.set noat -.globl chacha_crypt_arch -.ent chacha_crypt_arch -chacha_crypt_arch: - .frame $sp, STACK_SIZE, $ra - - /* Load number of rounds */ - lw $at, 16($sp) - - addiu $sp, -STACK_SIZE - - /* Return bytes = 0. */ - beqz BYTES, .Lchacha_mips_end - - lw NONCE_0, 48(STATE) - - /* Save s0-s7 */ - sw $s0, 0($sp) - sw $s1, 4($sp) - sw $s2, 8($sp) - sw $s3, 12($sp) - sw $s4, 16($sp) - sw $s5, 20($sp) - sw $s6, 24($sp) - sw $s7, 28($sp) - - /* Test IN or OUT is unaligned. - * IS_UNALIGNED = ( IN | OUT ) & 0x00000003 - */ - or IS_UNALIGNED, IN, OUT - andi IS_UNALIGNED, 0x3 - - b .Lchacha_rounds_start - -.align 4 -.Loop_chacha_rounds: - addiu IN, CHACHA20_BLOCK_SIZE - addiu OUT, CHACHA20_BLOCK_SIZE - addiu NONCE_0, 1 - -.Lchacha_rounds_start: - lw X0, 0(STATE) - lw X1, 4(STATE) - lw X2, 8(STATE) - lw X3, 12(STATE) - - lw X4, 16(STATE) - lw X5, 20(STATE) - lw X6, 24(STATE) - lw X7, 28(STATE) - lw X8, 32(STATE) - lw X9, 36(STATE) - lw X10, 40(STATE) - lw X11, 44(STATE) - - move X12, NONCE_0 - lw X13, 52(STATE) - lw X14, 56(STATE) - lw X15, 60(STATE) - -.Loop_chacha_xor_rounds: - addiu $at, -2 - AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16); - AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12); - AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8); - AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7); - AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16); - AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12); - AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8); - AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7); - bnez $at, .Loop_chacha_xor_rounds - - addiu BYTES, -(CHACHA20_BLOCK_SIZE) - - /* Is data src/dst unaligned? Jump */ - bnez IS_UNALIGNED, .Loop_chacha_unaligned - - /* Set number rounds here to fill delayslot. */ - lw $at, (STACK_SIZE+16)($sp) - - /* BYTES < 0, it has no full block. */ - bltz BYTES, .Lchacha_mips_no_full_block_aligned - - FOR_EACH_WORD_REV(STORE_ALIGNED) - - /* BYTES > 0? Loop again. */ - bgtz BYTES, .Loop_chacha_rounds - - /* Place this here to fill delay slot */ - addiu NONCE_0, 1 - - /* BYTES < 0? Handle last bytes */ - bltz BYTES, .Lchacha_mips_xor_bytes - -.Lchacha_mips_xor_done: - /* Restore used registers */ - lw $s0, 0($sp) - lw $s1, 4($sp) - lw $s2, 8($sp) - lw $s3, 12($sp) - lw $s4, 16($sp) - lw $s5, 20($sp) - lw $s6, 24($sp) - lw $s7, 28($sp) - - /* Write NONCE_0 back to right location in state */ - sw NONCE_0, 48(STATE) - -.Lchacha_mips_end: - addiu $sp, STACK_SIZE - jr $ra - -.Lchacha_mips_no_full_block_aligned: - /* Restore the offset on BYTES */ - addiu BYTES, CHACHA20_BLOCK_SIZE - - /* Get number of full WORDS */ - andi $at, BYTES, MASK_U32 - - /* Load upper half of jump table addr */ - lui T0, %hi(.Lchacha_mips_jmptbl_aligned_0) - - /* Calculate lower half jump table offset */ - ins T0, $at, 1, 6 - - /* Add offset to STATE */ - addu T1, STATE, $at - - /* Add lower half jump table addr */ - addiu T0, %lo(.Lchacha_mips_jmptbl_aligned_0) - - /* Read value from STATE */ - lw SAVED_CA, 0(T1) - - /* Store remaining bytecounter as negative value */ - subu BYTES, $at, BYTES - - jr T0 - - /* Jump table */ - FOR_EACH_WORD(JMPTBL_ALIGNED) - - -.Loop_chacha_unaligned: - /* Set number rounds here to fill delayslot. */ - lw $at, (STACK_SIZE+16)($sp) - - /* BYTES > 0, it has no full block. */ - bltz BYTES, .Lchacha_mips_no_full_block_unaligned - - FOR_EACH_WORD_REV(STORE_UNALIGNED) - - /* BYTES > 0? Loop again. */ - bgtz BYTES, .Loop_chacha_rounds - - /* Write NONCE_0 back to right location in state */ - sw NONCE_0, 48(STATE) - - .set noreorder - /* Fall through to byte handling */ - bgez BYTES, .Lchacha_mips_xor_done -.Lchacha_mips_xor_unaligned_0_b: -.Lchacha_mips_xor_aligned_0_b: - /* Place this here to fill delay slot */ - addiu NONCE_0, 1 - .set reorder - -.Lchacha_mips_xor_bytes: - addu IN, $at - addu OUT, $at - /* First byte */ - lbu T1, 0(IN) - addiu $at, BYTES, 1 - CPU_TO_LE32(SAVED_X) - ROTR(SAVED_X) - xor T1, SAVED_X - sb T1, 0(OUT) - beqz $at, .Lchacha_mips_xor_done - /* Second byte */ - lbu T1, 1(IN) - addiu $at, BYTES, 2 - ROTx SAVED_X, 8 - xor T1, SAVED_X - sb T1, 1(OUT) - beqz $at, .Lchacha_mips_xor_done - /* Third byte */ - lbu T1, 2(IN) - ROTx SAVED_X, 8 - xor T1, SAVED_X - sb T1, 2(OUT) - b .Lchacha_mips_xor_done - -.Lchacha_mips_no_full_block_unaligned: - /* Restore the offset on BYTES */ - addiu BYTES, CHACHA20_BLOCK_SIZE - - /* Get number of full WORDS */ - andi $at, BYTES, MASK_U32 - - /* Load upper half of jump table addr */ - lui T0, %hi(.Lchacha_mips_jmptbl_unaligned_0) - - /* Calculate lower half jump table offset */ - ins T0, $at, 1, 6 - - /* Add offset to STATE */ - addu T1, STATE, $at - - /* Add lower half jump table addr */ - addiu T0, %lo(.Lchacha_mips_jmptbl_unaligned_0) - - /* Read value from STATE */ - lw SAVED_CA, 0(T1) - - /* Store remaining bytecounter as negative value */ - subu BYTES, $at, BYTES - - jr T0 - - /* Jump table */ - FOR_EACH_WORD(JMPTBL_UNALIGNED) -.end chacha_crypt_arch -.set at - -/* Input arguments - * STATE $a0 - * OUT $a1 - * NROUND $a2 - */ - -#undef X12 -#undef X13 -#undef X14 -#undef X15 - -#define X12 $a3 -#define X13 $at -#define X14 $v0 -#define X15 STATE - -.set noat -.globl hchacha_block_arch -.ent hchacha_block_arch -hchacha_block_arch: - .frame $sp, STACK_SIZE, $ra - - addiu $sp, -STACK_SIZE - - /* Save X11(s6) */ - sw X11, 0($sp) - - lw X0, 0(STATE) - lw X1, 4(STATE) - lw X2, 8(STATE) - lw X3, 12(STATE) - lw X4, 16(STATE) - lw X5, 20(STATE) - lw X6, 24(STATE) - lw X7, 28(STATE) - lw X8, 32(STATE) - lw X9, 36(STATE) - lw X10, 40(STATE) - lw X11, 44(STATE) - lw X12, 48(STATE) - lw X13, 52(STATE) - lw X14, 56(STATE) - lw X15, 60(STATE) - -.Loop_hchacha_xor_rounds: - addiu $a2, -2 - AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16); - AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12); - AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8); - AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7); - AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16); - AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12); - AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8); - AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7); - bnez $a2, .Loop_hchacha_xor_rounds - - /* Restore used register */ - lw X11, 0($sp) - - sw X0, 0(OUT) - sw X1, 4(OUT) - sw X2, 8(OUT) - sw X3, 12(OUT) - sw X12, 16(OUT) - sw X13, 20(OUT) - sw X14, 24(OUT) - sw X15, 28(OUT) - - addiu $sp, STACK_SIZE - jr $ra -.end hchacha_block_arch -.set at diff --git a/arch/mips/lib/crypto/chacha-glue.c b/arch/mips/lib/crypto/chacha-glue.c deleted file mode 100644 index 88c097594eb0..000000000000 --- a/arch/mips/lib/crypto/chacha-glue.c +++ /dev/null @@ -1,29 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ChaCha and HChaCha functions (MIPS optimized) - * - * Copyright (C) 2019 Linaro, Ltd. <ard.biesheuvel@linaro.org> - */ - -#include <crypto/chacha.h> -#include <linux/kernel.h> -#include <linux/module.h> - -asmlinkage void chacha_crypt_arch(struct chacha_state *state, - u8 *dst, const u8 *src, - unsigned int bytes, int nrounds); -EXPORT_SYMBOL(chacha_crypt_arch); - -asmlinkage void hchacha_block_arch(const struct chacha_state *state, - u32 out[HCHACHA_OUT_WORDS], int nrounds); -EXPORT_SYMBOL(hchacha_block_arch); - -bool chacha_is_arch_optimized(void) -{ - return true; -} -EXPORT_SYMBOL(chacha_is_arch_optimized); - -MODULE_DESCRIPTION("ChaCha and HChaCha functions (MIPS optimized)"); -MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); -MODULE_LICENSE("GPL v2"); diff --git a/arch/mips/lib/crypto/poly1305-glue.c b/arch/mips/lib/crypto/poly1305-glue.c deleted file mode 100644 index 764a38a65200..000000000000 --- a/arch/mips/lib/crypto/poly1305-glue.c +++ /dev/null @@ -1,33 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * OpenSSL/Cryptogams accelerated Poly1305 transform for MIPS - * - * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org> - */ - -#include <crypto/internal/poly1305.h> -#include <linux/cpufeature.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/unaligned.h> - -asmlinkage void poly1305_block_init_arch( - struct poly1305_block_state *state, - const u8 raw_key[POLY1305_BLOCK_SIZE]); -EXPORT_SYMBOL_GPL(poly1305_block_init_arch); -asmlinkage void poly1305_blocks_arch(struct poly1305_block_state *state, - const u8 *src, u32 len, u32 hibit); -EXPORT_SYMBOL_GPL(poly1305_blocks_arch); -asmlinkage void poly1305_emit_arch(const struct poly1305_state *state, - u8 digest[POLY1305_DIGEST_SIZE], - const u32 nonce[4]); -EXPORT_SYMBOL_GPL(poly1305_emit_arch); - -bool poly1305_is_arch_optimized(void) -{ - return true; -} -EXPORT_SYMBOL(poly1305_is_arch_optimized); - -MODULE_DESCRIPTION("Poly1305 transform (MIPS accelerated"); -MODULE_LICENSE("GPL v2"); diff --git a/arch/mips/lib/crypto/poly1305-mips.pl b/arch/mips/lib/crypto/poly1305-mips.pl deleted file mode 100644 index 399f10c3e385..000000000000 --- a/arch/mips/lib/crypto/poly1305-mips.pl +++ /dev/null @@ -1,1273 +0,0 @@ -#!/usr/bin/env perl -# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause -# -# ==================================================================== -# Written by Andy Polyakov, @dot-asm, originally for the OpenSSL -# project. -# ==================================================================== - -# Poly1305 hash for MIPS. -# -# May 2016 -# -# Numbers are cycles per processed byte with poly1305_blocks alone. -# -# IALU/gcc -# R1x000 ~5.5/+130% (big-endian) -# Octeon II 2.50/+70% (little-endian) -# -# March 2019 -# -# Add 32-bit code path. -# -# October 2019 -# -# Modulo-scheduling reduction allows to omit dependency chain at the -# end of inner loop and improve performance. Also optimize MIPS32R2 -# code path for MIPS 1004K core. Per René von Dorst's suggestions. -# -# IALU/gcc -# R1x000 ~9.8/? (big-endian) -# Octeon II 3.65/+140% (little-endian) -# MT7621/1004K 4.75/? (little-endian) -# -###################################################################### -# There is a number of MIPS ABI in use, O32 and N32/64 are most -# widely used. Then there is a new contender: NUBI. It appears that if -# one picks the latter, it's possible to arrange code in ABI neutral -# manner. Therefore let's stick to NUBI register layout: -# -($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); -($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); -($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); -($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); -# -# The return value is placed in $a0. Following coding rules facilitate -# interoperability: -# -# - never ever touch $tp, "thread pointer", former $gp [o32 can be -# excluded from the rule, because it's specified volatile]; -# - copy return value to $t0, former $v0 [or to $a0 if you're adapting -# old code]; -# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; -# -# For reference here is register layout for N32/64 MIPS ABIs: -# -# ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); -# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); -# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); -# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); -# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); -# -# <appro@openssl.org> -# -###################################################################### - -$flavour = shift || "64"; # supported flavours are o32,n32,64,nubi32,nubi64 - -$v0 = ($flavour =~ /nubi/i) ? $a0 : $t0; - -if ($flavour =~ /64|n32/i) {{{ -###################################################################### -# 64-bit code path -# - -my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3); -my ($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1); - -$code.=<<___; -#if (defined(_MIPS_ARCH_MIPS64R3) || defined(_MIPS_ARCH_MIPS64R5) || \\ - defined(_MIPS_ARCH_MIPS64R6)) \\ - && !defined(_MIPS_ARCH_MIPS64R2) -# define _MIPS_ARCH_MIPS64R2 -#endif - -#if defined(_MIPS_ARCH_MIPS64R6) -# define dmultu(rs,rt) -# define mflo(rd,rs,rt) dmulu rd,rs,rt -# define mfhi(rd,rs,rt) dmuhu rd,rs,rt -#else -# define dmultu(rs,rt) dmultu rs,rt -# define mflo(rd,rs,rt) mflo rd -# define mfhi(rd,rs,rt) mfhi rd -#endif - -#ifdef __KERNEL__ -# define poly1305_init poly1305_block_init_arch -# define poly1305_blocks poly1305_blocks_arch -# define poly1305_emit poly1305_emit_arch -#endif - -#if defined(__MIPSEB__) && !defined(MIPSEB) -# define MIPSEB -#endif - -#ifdef MIPSEB -# define MSB 0 -# define LSB 7 -#else -# define MSB 7 -# define LSB 0 -#endif - -.text -.set noat -.set noreorder - -.align 5 -.globl poly1305_init -.ent poly1305_init -poly1305_init: - .frame $sp,0,$ra - .set reorder - - sd $zero,0($ctx) - sd $zero,8($ctx) - sd $zero,16($ctx) - - beqz $inp,.Lno_key - -#if defined(_MIPS_ARCH_MIPS64R6) - andi $tmp0,$inp,7 # $inp % 8 - dsubu $inp,$inp,$tmp0 # align $inp - sll $tmp0,$tmp0,3 # byte to bit offset - ld $in0,0($inp) - ld $in1,8($inp) - beqz $tmp0,.Laligned_key - ld $tmp2,16($inp) - - subu $tmp1,$zero,$tmp0 -# ifdef MIPSEB - dsllv $in0,$in0,$tmp0 - dsrlv $tmp3,$in1,$tmp1 - dsllv $in1,$in1,$tmp0 - dsrlv $tmp2,$tmp2,$tmp1 -# else - dsrlv $in0,$in0,$tmp0 - dsllv $tmp3,$in1,$tmp1 - dsrlv $in1,$in1,$tmp0 - dsllv $tmp2,$tmp2,$tmp1 -# endif - or $in0,$in0,$tmp3 - or $in1,$in1,$tmp2 -.Laligned_key: -#else - ldl $in0,0+MSB($inp) - ldl $in1,8+MSB($inp) - ldr $in0,0+LSB($inp) - ldr $in1,8+LSB($inp) -#endif -#ifdef MIPSEB -# if defined(_MIPS_ARCH_MIPS64R2) - dsbh $in0,$in0 # byte swap - dsbh $in1,$in1 - dshd $in0,$in0 - dshd $in1,$in1 -# else - ori $tmp0,$zero,0xFF - dsll $tmp2,$tmp0,32 - or $tmp0,$tmp2 # 0x000000FF000000FF - - and $tmp1,$in0,$tmp0 # byte swap - and $tmp3,$in1,$tmp0 - dsrl $tmp2,$in0,24 - dsrl $tmp4,$in1,24 - dsll $tmp1,24 - dsll $tmp3,24 - and $tmp2,$tmp0 - and $tmp4,$tmp0 - dsll $tmp0,8 # 0x0000FF000000FF00 - or $tmp1,$tmp2 - or $tmp3,$tmp4 - and $tmp2,$in0,$tmp0 - and $tmp4,$in1,$tmp0 - dsrl $in0,8 - dsrl $in1,8 - dsll $tmp2,8 - dsll $tmp4,8 - and $in0,$tmp0 - and $in1,$tmp0 - or $tmp1,$tmp2 - or $tmp3,$tmp4 - or $in0,$tmp1 - or $in1,$tmp3 - dsrl $tmp1,$in0,32 - dsrl $tmp3,$in1,32 - dsll $in0,32 - dsll $in1,32 - or $in0,$tmp1 - or $in1,$tmp3 -# endif -#endif - li $tmp0,1 - dsll $tmp0,32 # 0x0000000100000000 - daddiu $tmp0,-63 # 0x00000000ffffffc1 - dsll $tmp0,28 # 0x0ffffffc10000000 - daddiu $tmp0,-1 # 0x0ffffffc0fffffff - - and $in0,$tmp0 - daddiu $tmp0,-3 # 0x0ffffffc0ffffffc - and $in1,$tmp0 - - sd $in0,24($ctx) - dsrl $tmp0,$in1,2 - sd $in1,32($ctx) - daddu $tmp0,$in1 # s1 = r1 + (r1 >> 2) - sd $tmp0,40($ctx) - -.Lno_key: - li $v0,0 # return 0 - jr $ra -.end poly1305_init -___ -{ -my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000"; - -my ($h0,$h1,$h2,$r0,$r1,$rs1,$d0,$d1,$d2) = - ($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2); -my ($shr,$shl) = ($s6,$s7); # used on R6 - -$code.=<<___; -.align 5 -.globl poly1305_blocks -.ent poly1305_blocks -poly1305_blocks: - .set noreorder - dsrl $len,4 # number of complete blocks - bnez $len,poly1305_blocks_internal - nop - jr $ra - nop -.end poly1305_blocks - -.align 5 -.ent poly1305_blocks_internal -poly1305_blocks_internal: - .set noreorder -#if defined(_MIPS_ARCH_MIPS64R6) - .frame $sp,8*8,$ra - .mask $SAVED_REGS_MASK|0x000c0000,-8 - dsubu $sp,8*8 - sd $s7,56($sp) - sd $s6,48($sp) -#else - .frame $sp,6*8,$ra - .mask $SAVED_REGS_MASK,-8 - dsubu $sp,6*8 -#endif - sd $s5,40($sp) - sd $s4,32($sp) -___ -$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue - sd $s3,24($sp) - sd $s2,16($sp) - sd $s1,8($sp) - sd $s0,0($sp) -___ -$code.=<<___; - .set reorder - -#if defined(_MIPS_ARCH_MIPS64R6) - andi $shr,$inp,7 - dsubu $inp,$inp,$shr # align $inp - sll $shr,$shr,3 # byte to bit offset - subu $shl,$zero,$shr -#endif - - ld $h0,0($ctx) # load hash value - ld $h1,8($ctx) - ld $h2,16($ctx) - - ld $r0,24($ctx) # load key - ld $r1,32($ctx) - ld $rs1,40($ctx) - - dsll $len,4 - daddu $len,$inp # end of buffer - b .Loop - -.align 4 -.Loop: -#if defined(_MIPS_ARCH_MIPS64R6) - ld $in0,0($inp) # load input - ld $in1,8($inp) - beqz $shr,.Laligned_inp - - ld $tmp2,16($inp) -# ifdef MIPSEB - dsllv $in0,$in0,$shr - dsrlv $tmp3,$in1,$shl - dsllv $in1,$in1,$shr - dsrlv $tmp2,$tmp2,$shl -# else - dsrlv $in0,$in0,$shr - dsllv $tmp3,$in1,$shl - dsrlv $in1,$in1,$shr - dsllv $tmp2,$tmp2,$shl -# endif - or $in0,$in0,$tmp3 - or $in1,$in1,$tmp2 -.Laligned_inp: -#else - ldl $in0,0+MSB($inp) # load input - ldl $in1,8+MSB($inp) - ldr $in0,0+LSB($inp) - ldr $in1,8+LSB($inp) -#endif - daddiu $inp,16 -#ifdef MIPSEB -# if defined(_MIPS_ARCH_MIPS64R2) - dsbh $in0,$in0 # byte swap - dsbh $in1,$in1 - dshd $in0,$in0 - dshd $in1,$in1 -# else - ori $tmp0,$zero,0xFF - dsll $tmp2,$tmp0,32 - or $tmp0,$tmp2 # 0x000000FF000000FF - - and $tmp1,$in0,$tmp0 # byte swap - and $tmp3,$in1,$tmp0 - dsrl $tmp2,$in0,24 - dsrl $tmp4,$in1,24 - dsll $tmp1,24 - dsll $tmp3,24 - and $tmp2,$tmp0 - and $tmp4,$tmp0 - dsll $tmp0,8 # 0x0000FF000000FF00 - or $tmp1,$tmp2 - or $tmp3,$tmp4 - and $tmp2,$in0,$tmp0 - and $tmp4,$in1,$tmp0 - dsrl $in0,8 - dsrl $in1,8 - dsll $tmp2,8 - dsll $tmp4,8 - and $in0,$tmp0 - and $in1,$tmp0 - or $tmp1,$tmp2 - or $tmp3,$tmp4 - or $in0,$tmp1 - or $in1,$tmp3 - dsrl $tmp1,$in0,32 - dsrl $tmp3,$in1,32 - dsll $in0,32 - dsll $in1,32 - or $in0,$tmp1 - or $in1,$tmp3 -# endif -#endif - dsrl $tmp1,$h2,2 # modulo-scheduled reduction - andi $h2,$h2,3 - dsll $tmp0,$tmp1,2 - - daddu $d0,$h0,$in0 # accumulate input - daddu $tmp1,$tmp0 - sltu $tmp0,$d0,$h0 - daddu $d0,$d0,$tmp1 # ... and residue - sltu $tmp1,$d0,$tmp1 - daddu $d1,$h1,$in1 - daddu $tmp0,$tmp1 - sltu $tmp1,$d1,$h1 - daddu $d1,$tmp0 - - dmultu ($r0,$d0) # h0*r0 - daddu $d2,$h2,$padbit - sltu $tmp0,$d1,$tmp0 - mflo ($h0,$r0,$d0) - mfhi ($h1,$r0,$d0) - - dmultu ($rs1,$d1) # h1*5*r1 - daddu $d2,$tmp1 - daddu $d2,$tmp0 - mflo ($tmp0,$rs1,$d1) - mfhi ($tmp1,$rs1,$d1) - - dmultu ($r1,$d0) # h0*r1 - mflo ($tmp2,$r1,$d0) - mfhi ($h2,$r1,$d0) - daddu $h0,$tmp0 - daddu $h1,$tmp1 - sltu $tmp0,$h0,$tmp0 - - dmultu ($r0,$d1) # h1*r0 - daddu $h1,$tmp0 - daddu $h1,$tmp2 - mflo ($tmp0,$r0,$d1) - mfhi ($tmp1,$r0,$d1) - - dmultu ($rs1,$d2) # h2*5*r1 - sltu $tmp2,$h1,$tmp2 - daddu $h2,$tmp2 - mflo ($tmp2,$rs1,$d2) - - dmultu ($r0,$d2) # h2*r0 - daddu $h1,$tmp0 - daddu $h2,$tmp1 - mflo ($tmp3,$r0,$d2) - sltu $tmp0,$h1,$tmp0 - daddu $h2,$tmp0 - - daddu $h1,$tmp2 - sltu $tmp2,$h1,$tmp2 - daddu $h2,$tmp2 - daddu $h2,$tmp3 - - bne $inp,$len,.Loop - - sd $h0,0($ctx) # store hash value - sd $h1,8($ctx) - sd $h2,16($ctx) - - .set noreorder -#if defined(_MIPS_ARCH_MIPS64R6) - ld $s7,56($sp) - ld $s6,48($sp) -#endif - ld $s5,40($sp) # epilogue - ld $s4,32($sp) -___ -$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi epilogue - ld $s3,24($sp) - ld $s2,16($sp) - ld $s1,8($sp) - ld $s0,0($sp) -___ -$code.=<<___; - jr $ra -#if defined(_MIPS_ARCH_MIPS64R6) - daddu $sp,8*8 -#else - daddu $sp,6*8 -#endif -.end poly1305_blocks_internal -___ -} -{ -my ($ctx,$mac,$nonce) = ($a0,$a1,$a2); - -$code.=<<___; -.align 5 -.globl poly1305_emit -.ent poly1305_emit -poly1305_emit: - .frame $sp,0,$ra - .set reorder - - ld $tmp2,16($ctx) - ld $tmp0,0($ctx) - ld $tmp1,8($ctx) - - li $in0,-4 # final reduction - dsrl $in1,$tmp2,2 - and $in0,$tmp2 - andi $tmp2,$tmp2,3 - daddu $in0,$in1 - - daddu $tmp0,$tmp0,$in0 - sltu $in1,$tmp0,$in0 - daddiu $in0,$tmp0,5 # compare to modulus - daddu $tmp1,$tmp1,$in1 - sltiu $tmp3,$in0,5 - sltu $tmp4,$tmp1,$in1 - daddu $in1,$tmp1,$tmp3 - daddu $tmp2,$tmp2,$tmp4 - sltu $tmp3,$in1,$tmp3 - daddu $tmp2,$tmp2,$tmp3 - - dsrl $tmp2,2 # see if it carried/borrowed - dsubu $tmp2,$zero,$tmp2 - - xor $in0,$tmp0 - xor $in1,$tmp1 - and $in0,$tmp2 - and $in1,$tmp2 - xor $in0,$tmp0 - xor $in1,$tmp1 - - lwu $tmp0,0($nonce) # load nonce - lwu $tmp1,4($nonce) - lwu $tmp2,8($nonce) - lwu $tmp3,12($nonce) - dsll $tmp1,32 - dsll $tmp3,32 - or $tmp0,$tmp1 - or $tmp2,$tmp3 - - daddu $in0,$tmp0 # accumulate nonce - daddu $in1,$tmp2 - sltu $tmp0,$in0,$tmp0 - daddu $in1,$tmp0 - - dsrl $tmp0,$in0,8 # write mac value - dsrl $tmp1,$in0,16 - dsrl $tmp2,$in0,24 - sb $in0,0($mac) - dsrl $tmp3,$in0,32 - sb $tmp0,1($mac) - dsrl $tmp0,$in0,40 - sb $tmp1,2($mac) - dsrl $tmp1,$in0,48 - sb $tmp2,3($mac) - dsrl $tmp2,$in0,56 - sb $tmp3,4($mac) - dsrl $tmp3,$in1,8 - sb $tmp0,5($mac) - dsrl $tmp0,$in1,16 - sb $tmp1,6($mac) - dsrl $tmp1,$in1,24 - sb $tmp2,7($mac) - - sb $in1,8($mac) - dsrl $tmp2,$in1,32 - sb $tmp3,9($mac) - dsrl $tmp3,$in1,40 - sb $tmp0,10($mac) - dsrl $tmp0,$in1,48 - sb $tmp1,11($mac) - dsrl $tmp1,$in1,56 - sb $tmp2,12($mac) - sb $tmp3,13($mac) - sb $tmp0,14($mac) - sb $tmp1,15($mac) - - jr $ra -.end poly1305_emit -.rdata -.asciiz "Poly1305 for MIPS64, CRYPTOGAMS by \@dot-asm" -.align 2 -___ -} -}}} else {{{ -###################################################################### -# 32-bit code path -# - -my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3); -my ($in0,$in1,$in2,$in3,$tmp0,$tmp1,$tmp2,$tmp3) = - ($a4,$a5,$a6,$a7,$at,$t0,$t1,$t2); - -$code.=<<___; -#if (defined(_MIPS_ARCH_MIPS32R3) || defined(_MIPS_ARCH_MIPS32R5) || \\ - defined(_MIPS_ARCH_MIPS32R6)) \\ - && !defined(_MIPS_ARCH_MIPS32R2) -# define _MIPS_ARCH_MIPS32R2 -#endif - -#if defined(_MIPS_ARCH_MIPS32R6) -# define multu(rs,rt) -# define mflo(rd,rs,rt) mulu rd,rs,rt -# define mfhi(rd,rs,rt) muhu rd,rs,rt -#else -# define multu(rs,rt) multu rs,rt -# define mflo(rd,rs,rt) mflo rd -# define mfhi(rd,rs,rt) mfhi rd -#endif - -#ifdef __KERNEL__ -# define poly1305_init poly1305_block_init_arch -# define poly1305_blocks poly1305_blocks_arch -# define poly1305_emit poly1305_emit_arch -#endif - -#if defined(__MIPSEB__) && !defined(MIPSEB) -# define MIPSEB -#endif - -#ifdef MIPSEB -# define MSB 0 -# define LSB 3 -#else -# define MSB 3 -# define LSB 0 -#endif - -.text -.set noat -.set noreorder - -.align 5 -.globl poly1305_init -.ent poly1305_init -poly1305_init: - .frame $sp,0,$ra - .set reorder - - sw $zero,0($ctx) - sw $zero,4($ctx) - sw $zero,8($ctx) - sw $zero,12($ctx) - sw $zero,16($ctx) - - beqz $inp,.Lno_key - -#if defined(_MIPS_ARCH_MIPS32R6) - andi $tmp0,$inp,3 # $inp % 4 - subu $inp,$inp,$tmp0 # align $inp - sll $tmp0,$tmp0,3 # byte to bit offset - lw $in0,0($inp) - lw $in1,4($inp) - lw $in2,8($inp) - lw $in3,12($inp) - beqz $tmp0,.Laligned_key - - lw $tmp2,16($inp) - subu $tmp1,$zero,$tmp0 -# ifdef MIPSEB - sllv $in0,$in0,$tmp0 - srlv $tmp3,$in1,$tmp1 - sllv $in1,$in1,$tmp0 - or $in0,$in0,$tmp3 - srlv $tmp3,$in2,$tmp1 - sllv $in2,$in2,$tmp0 - or $in1,$in1,$tmp3 - srlv $tmp3,$in3,$tmp1 - sllv $in3,$in3,$tmp0 - or $in2,$in2,$tmp3 - srlv $tmp2,$tmp2,$tmp1 - or $in3,$in3,$tmp2 -# else - srlv $in0,$in0,$tmp0 - sllv $tmp3,$in1,$tmp1 - srlv $in1,$in1,$tmp0 - or $in0,$in0,$tmp3 - sllv $tmp3,$in2,$tmp1 - srlv $in2,$in2,$tmp0 - or $in1,$in1,$tmp3 - sllv $tmp3,$in3,$tmp1 - srlv $in3,$in3,$tmp0 - or $in2,$in2,$tmp3 - sllv $tmp2,$tmp2,$tmp1 - or $in3,$in3,$tmp2 -# endif -.Laligned_key: -#else - lwl $in0,0+MSB($inp) - lwl $in1,4+MSB($inp) - lwl $in2,8+MSB($inp) - lwl $in3,12+MSB($inp) - lwr $in0,0+LSB($inp) - lwr $in1,4+LSB($inp) - lwr $in2,8+LSB($inp) - lwr $in3,12+LSB($inp) -#endif -#ifdef MIPSEB -# if defined(_MIPS_ARCH_MIPS32R2) - wsbh $in0,$in0 # byte swap - wsbh $in1,$in1 - wsbh $in2,$in2 - wsbh $in3,$in3 - rotr $in0,$in0,16 - rotr $in1,$in1,16 - rotr $in2,$in2,16 - rotr $in3,$in3,16 -# else - srl $tmp0,$in0,24 # byte swap - srl $tmp1,$in0,8 - andi $tmp2,$in0,0xFF00 - sll $in0,$in0,24 - andi $tmp1,0xFF00 - sll $tmp2,$tmp2,8 - or $in0,$tmp0 - srl $tmp0,$in1,24 - or $tmp1,$tmp2 - srl $tmp2,$in1,8 - or $in0,$tmp1 - andi $tmp1,$in1,0xFF00 - sll $in1,$in1,24 - andi $tmp2,0xFF00 - sll $tmp1,$tmp1,8 - or $in1,$tmp0 - srl $tmp0,$in2,24 - or $tmp2,$tmp1 - srl $tmp1,$in2,8 - or $in1,$tmp2 - andi $tmp2,$in2,0xFF00 - sll $in2,$in2,24 - andi $tmp1,0xFF00 - sll $tmp2,$tmp2,8 - or $in2,$tmp0 - srl $tmp0,$in3,24 - or $tmp1,$tmp2 - srl $tmp2,$in3,8 - or $in2,$tmp1 - andi $tmp1,$in3,0xFF00 - sll $in3,$in3,24 - andi $tmp2,0xFF00 - sll $tmp1,$tmp1,8 - or $in3,$tmp0 - or $tmp2,$tmp1 - or $in3,$tmp2 -# endif -#endif - lui $tmp0,0x0fff - ori $tmp0,0xffff # 0x0fffffff - and $in0,$in0,$tmp0 - subu $tmp0,3 # 0x0ffffffc - and $in1,$in1,$tmp0 - and $in2,$in2,$tmp0 - and $in3,$in3,$tmp0 - - sw $in0,20($ctx) - sw $in1,24($ctx) - sw $in2,28($ctx) - sw $in3,32($ctx) - - srl $tmp1,$in1,2 - srl $tmp2,$in2,2 - srl $tmp3,$in3,2 - addu $in1,$in1,$tmp1 # s1 = r1 + (r1 >> 2) - addu $in2,$in2,$tmp2 - addu $in3,$in3,$tmp3 - sw $in1,36($ctx) - sw $in2,40($ctx) - sw $in3,44($ctx) -.Lno_key: - li $v0,0 - jr $ra -.end poly1305_init -___ -{ -my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x00fff000" : "0x00ff0000"; - -my ($h0,$h1,$h2,$h3,$h4, $r0,$r1,$r2,$r3, $rs1,$rs2,$rs3) = - ($s0,$s1,$s2,$s3,$s4, $s5,$s6,$s7,$s8, $s9,$s10,$s11); -my ($d0,$d1,$d2,$d3) = - ($a4,$a5,$a6,$a7); -my $shr = $t2; # used on R6 -my $one = $t2; # used on R2 - -$code.=<<___; -.globl poly1305_blocks -.align 5 -.ent poly1305_blocks -poly1305_blocks: - .frame $sp,16*4,$ra - .mask $SAVED_REGS_MASK,-4 - .set noreorder - subu $sp, $sp,4*12 - sw $s11,4*11($sp) - sw $s10,4*10($sp) - sw $s9, 4*9($sp) - sw $s8, 4*8($sp) - sw $s7, 4*7($sp) - sw $s6, 4*6($sp) - sw $s5, 4*5($sp) - sw $s4, 4*4($sp) -___ -$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue - sw $s3, 4*3($sp) - sw $s2, 4*2($sp) - sw $s1, 4*1($sp) - sw $s0, 4*0($sp) -___ -$code.=<<___; - .set reorder - - srl $len,4 # number of complete blocks - li $one,1 - beqz $len,.Labort - -#if defined(_MIPS_ARCH_MIPS32R6) - andi $shr,$inp,3 - subu $inp,$inp,$shr # align $inp - sll $shr,$shr,3 # byte to bit offset -#endif - - lw $h0,0($ctx) # load hash value - lw $h1,4($ctx) - lw $h2,8($ctx) - lw $h3,12($ctx) - lw $h4,16($ctx) - - lw $r0,20($ctx) # load key - lw $r1,24($ctx) - lw $r2,28($ctx) - lw $r3,32($ctx) - lw $rs1,36($ctx) - lw $rs2,40($ctx) - lw $rs3,44($ctx) - - sll $len,4 - addu $len,$len,$inp # end of buffer - b .Loop - -.align 4 -.Loop: -#if defined(_MIPS_ARCH_MIPS32R6) - lw $d0,0($inp) # load input - lw $d1,4($inp) - lw $d2,8($inp) - lw $d3,12($inp) - beqz $shr,.Laligned_inp - - lw $t0,16($inp) - subu $t1,$zero,$shr -# ifdef MIPSEB - sllv $d0,$d0,$shr - srlv $at,$d1,$t1 - sllv $d1,$d1,$shr - or $d0,$d0,$at - srlv $at,$d2,$t1 - sllv $d2,$d2,$shr - or $d1,$d1,$at - srlv $at,$d3,$t1 - sllv $d3,$d3,$shr - or $d2,$d2,$at - srlv $t0,$t0,$t1 - or $d3,$d3,$t0 -# else - srlv $d0,$d0,$shr - sllv $at,$d1,$t1 - srlv $d1,$d1,$shr - or $d0,$d0,$at - sllv $at,$d2,$t1 - srlv $d2,$d2,$shr - or $d1,$d1,$at - sllv $at,$d3,$t1 - srlv $d3,$d3,$shr - or $d2,$d2,$at - sllv $t0,$t0,$t1 - or $d3,$d3,$t0 -# endif -.Laligned_inp: -#else - lwl $d0,0+MSB($inp) # load input - lwl $d1,4+MSB($inp) - lwl $d2,8+MSB($inp) - lwl $d3,12+MSB($inp) - lwr $d0,0+LSB($inp) - lwr $d1,4+LSB($inp) - lwr $d2,8+LSB($inp) - lwr $d3,12+LSB($inp) -#endif -#ifdef MIPSEB -# if defined(_MIPS_ARCH_MIPS32R2) - wsbh $d0,$d0 # byte swap - wsbh $d1,$d1 - wsbh $d2,$d2 - wsbh $d3,$d3 - rotr $d0,$d0,16 - rotr $d1,$d1,16 - rotr $d2,$d2,16 - rotr $d3,$d3,16 -# else - srl $at,$d0,24 # byte swap - srl $t0,$d0,8 - andi $t1,$d0,0xFF00 - sll $d0,$d0,24 - andi $t0,0xFF00 - sll $t1,$t1,8 - or $d0,$at - srl $at,$d1,24 - or $t0,$t1 - srl $t1,$d1,8 - or $d0,$t0 - andi $t0,$d1,0xFF00 - sll $d1,$d1,24 - andi $t1,0xFF00 - sll $t0,$t0,8 - or $d1,$at - srl $at,$d2,24 - or $t1,$t0 - srl $t0,$d2,8 - or $d1,$t1 - andi $t1,$d2,0xFF00 - sll $d2,$d2,24 - andi $t0,0xFF00 - sll $t1,$t1,8 - or $d2,$at - srl $at,$d3,24 - or $t0,$t1 - srl $t1,$d3,8 - or $d2,$t0 - andi $t0,$d3,0xFF00 - sll $d3,$d3,24 - andi $t1,0xFF00 - sll $t0,$t0,8 - or $d3,$at - or $t1,$t0 - or $d3,$t1 -# endif -#endif - srl $t0,$h4,2 # modulo-scheduled reduction - andi $h4,$h4,3 - sll $at,$t0,2 - - addu $d0,$d0,$h0 # accumulate input - addu $t0,$t0,$at - sltu $h0,$d0,$h0 - addu $d0,$d0,$t0 # ... and residue - sltu $at,$d0,$t0 - - addu $d1,$d1,$h1 - addu $h0,$h0,$at # carry - sltu $h1,$d1,$h1 - addu $d1,$d1,$h0 - sltu $h0,$d1,$h0 - - addu $d2,$d2,$h2 - addu $h1,$h1,$h0 # carry - sltu $h2,$d2,$h2 - addu $d2,$d2,$h1 - sltu $h1,$d2,$h1 - - addu $d3,$d3,$h3 - addu $h2,$h2,$h1 # carry - sltu $h3,$d3,$h3 - addu $d3,$d3,$h2 - -#if defined(_MIPS_ARCH_MIPS32R2) && !defined(_MIPS_ARCH_MIPS32R6) - multu $r0,$d0 # d0*r0 - sltu $h2,$d3,$h2 - maddu $rs3,$d1 # d1*s3 - addu $h3,$h3,$h2 # carry - maddu $rs2,$d2 # d2*s2 - addu $h4,$h4,$padbit - maddu $rs1,$d3 # d3*s1 - addu $h4,$h4,$h3 - mfhi $at - mflo $h0 - - multu $r1,$d0 # d0*r1 - maddu $r0,$d1 # d1*r0 - maddu $rs3,$d2 # d2*s3 - maddu $rs2,$d3 # d3*s2 - maddu $rs1,$h4 # h4*s1 - maddu $at,$one # hi*1 - mfhi $at - mflo $h1 - - multu $r2,$d0 # d0*r2 - maddu $r1,$d1 # d1*r1 - maddu $r0,$d2 # d2*r0 - maddu $rs3,$d3 # d3*s3 - maddu $rs2,$h4 # h4*s2 - maddu $at,$one # hi*1 - mfhi $at - mflo $h2 - - mul $t0,$r0,$h4 # h4*r0 - - multu $r3,$d0 # d0*r3 - maddu $r2,$d1 # d1*r2 - maddu $r1,$d2 # d2*r1 - maddu $r0,$d3 # d3*r0 - maddu $rs3,$h4 # h4*s3 - maddu $at,$one # hi*1 - mfhi $at - mflo $h3 - - addiu $inp,$inp,16 - - addu $h4,$t0,$at -#else - multu ($r0,$d0) # d0*r0 - mflo ($h0,$r0,$d0) - mfhi ($h1,$r0,$d0) - - sltu $h2,$d3,$h2 - addu $h3,$h3,$h2 # carry - - multu ($rs3,$d1) # d1*s3 - mflo ($at,$rs3,$d1) - mfhi ($t0,$rs3,$d1) - - addu $h4,$h4,$padbit - addiu $inp,$inp,16 - addu $h4,$h4,$h3 - - multu ($rs2,$d2) # d2*s2 - mflo ($a3,$rs2,$d2) - mfhi ($t1,$rs2,$d2) - addu $h0,$h0,$at - addu $h1,$h1,$t0 - multu ($rs1,$d3) # d3*s1 - sltu $at,$h0,$at - addu $h1,$h1,$at - - mflo ($at,$rs1,$d3) - mfhi ($t0,$rs1,$d3) - addu $h0,$h0,$a3 - addu $h1,$h1,$t1 - multu ($r1,$d0) # d0*r1 - sltu $a3,$h0,$a3 - addu $h1,$h1,$a3 - - - mflo ($a3,$r1,$d0) - mfhi ($h2,$r1,$d0) - addu $h0,$h0,$at - addu $h1,$h1,$t0 - multu ($r0,$d1) # d1*r0 - sltu $at,$h0,$at - addu $h1,$h1,$at - - mflo ($at,$r0,$d1) - mfhi ($t0,$r0,$d1) - addu $h1,$h1,$a3 - sltu $a3,$h1,$a3 - multu ($rs3,$d2) # d2*s3 - addu $h2,$h2,$a3 - - mflo ($a3,$rs3,$d2) - mfhi ($t1,$rs3,$d2) - addu $h1,$h1,$at - addu $h2,$h2,$t0 - multu ($rs2,$d3) # d3*s2 - sltu $at,$h1,$at - addu $h2,$h2,$at - - mflo ($at,$rs2,$d3) - mfhi ($t0,$rs2,$d3) - addu $h1,$h1,$a3 - addu $h2,$h2,$t1 - multu ($rs1,$h4) # h4*s1 - sltu $a3,$h1,$a3 - addu $h2,$h2,$a3 - - mflo ($a3,$rs1,$h4) - addu $h1,$h1,$at - addu $h2,$h2,$t0 - multu ($r2,$d0) # d0*r2 - sltu $at,$h1,$at - addu $h2,$h2,$at - - - mflo ($at,$r2,$d0) - mfhi ($h3,$r2,$d0) - addu $h1,$h1,$a3 - sltu $a3,$h1,$a3 - multu ($r1,$d1) # d1*r1 - addu $h2,$h2,$a3 - - mflo ($a3,$r1,$d1) - mfhi ($t1,$r1,$d1) - addu $h2,$h2,$at - sltu $at,$h2,$at - multu ($r0,$d2) # d2*r0 - addu $h3,$h3,$at - - mflo ($at,$r0,$d2) - mfhi ($t0,$r0,$d2) - addu $h2,$h2,$a3 - addu $h3,$h3,$t1 - multu ($rs3,$d3) # d3*s3 - sltu $a3,$h2,$a3 - addu $h3,$h3,$a3 - - mflo ($a3,$rs3,$d3) - mfhi ($t1,$rs3,$d3) - addu $h2,$h2,$at - addu $h3,$h3,$t0 - multu ($rs2,$h4) # h4*s2 - sltu $at,$h2,$at - addu $h3,$h3,$at - - mflo ($at,$rs2,$h4) - addu $h2,$h2,$a3 - addu $h3,$h3,$t1 - multu ($r3,$d0) # d0*r3 - sltu $a3,$h2,$a3 - addu $h3,$h3,$a3 - - - mflo ($a3,$r3,$d0) - mfhi ($t1,$r3,$d0) - addu $h2,$h2,$at - sltu $at,$h2,$at - multu ($r2,$d1) # d1*r2 - addu $h3,$h3,$at - - mflo ($at,$r2,$d1) - mfhi ($t0,$r2,$d1) - addu $h3,$h3,$a3 - sltu $a3,$h3,$a3 - multu ($r0,$d3) # d3*r0 - addu $t1,$t1,$a3 - - mflo ($a3,$r0,$d3) - mfhi ($d3,$r0,$d3) - addu $h3,$h3,$at - addu $t1,$t1,$t0 - multu ($r1,$d2) # d2*r1 - sltu $at,$h3,$at - addu $t1,$t1,$at - - mflo ($at,$r1,$d2) - mfhi ($t0,$r1,$d2) - addu $h3,$h3,$a3 - addu $t1,$t1,$d3 - multu ($rs3,$h4) # h4*s3 - sltu $a3,$h3,$a3 - addu $t1,$t1,$a3 - - mflo ($a3,$rs3,$h4) - addu $h3,$h3,$at - addu $t1,$t1,$t0 - multu ($r0,$h4) # h4*r0 - sltu $at,$h3,$at - addu $t1,$t1,$at - - - mflo ($h4,$r0,$h4) - addu $h3,$h3,$a3 - sltu $a3,$h3,$a3 - addu $t1,$t1,$a3 - addu $h4,$h4,$t1 - - li $padbit,1 # if we loop, padbit is 1 -#endif - bne $inp,$len,.Loop - - sw $h0,0($ctx) # store hash value - sw $h1,4($ctx) - sw $h2,8($ctx) - sw $h3,12($ctx) - sw $h4,16($ctx) - - .set noreorder -.Labort: - lw $s11,4*11($sp) - lw $s10,4*10($sp) - lw $s9, 4*9($sp) - lw $s8, 4*8($sp) - lw $s7, 4*7($sp) - lw $s6, 4*6($sp) - lw $s5, 4*5($sp) - lw $s4, 4*4($sp) -___ -$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue - lw $s3, 4*3($sp) - lw $s2, 4*2($sp) - lw $s1, 4*1($sp) - lw $s0, 4*0($sp) -___ -$code.=<<___; - jr $ra - addu $sp,$sp,4*12 -.end poly1305_blocks -___ -} -{ -my ($ctx,$mac,$nonce,$tmp4) = ($a0,$a1,$a2,$a3); - -$code.=<<___; -.align 5 -.globl poly1305_emit -.ent poly1305_emit -poly1305_emit: - .frame $sp,0,$ra - .set reorder - - lw $tmp4,16($ctx) - lw $tmp0,0($ctx) - lw $tmp1,4($ctx) - lw $tmp2,8($ctx) - lw $tmp3,12($ctx) - - li $in0,-4 # final reduction - srl $ctx,$tmp4,2 - and $in0,$in0,$tmp4 - andi $tmp4,$tmp4,3 - addu $ctx,$ctx,$in0 - - addu $tmp0,$tmp0,$ctx - sltu $ctx,$tmp0,$ctx - addiu $in0,$tmp0,5 # compare to modulus - addu $tmp1,$tmp1,$ctx - sltiu $in1,$in0,5 - sltu $ctx,$tmp1,$ctx - addu $in1,$in1,$tmp1 - addu $tmp2,$tmp2,$ctx - sltu $in2,$in1,$tmp1 - sltu $ctx,$tmp2,$ctx - addu $in2,$in2,$tmp2 - addu $tmp3,$tmp3,$ctx - sltu $in3,$in2,$tmp2 - sltu $ctx,$tmp3,$ctx - addu $in3,$in3,$tmp3 - addu $tmp4,$tmp4,$ctx - sltu $ctx,$in3,$tmp3 - addu $ctx,$tmp4 - - srl $ctx,2 # see if it carried/borrowed - subu $ctx,$zero,$ctx - - xor $in0,$tmp0 - xor $in1,$tmp1 - xor $in2,$tmp2 - xor $in3,$tmp3 - and $in0,$ctx - and $in1,$ctx - and $in2,$ctx - and $in3,$ctx - xor $in0,$tmp0 - xor $in1,$tmp1 - xor $in2,$tmp2 - xor $in3,$tmp3 - - lw $tmp0,0($nonce) # load nonce - lw $tmp1,4($nonce) - lw $tmp2,8($nonce) - lw $tmp3,12($nonce) - - addu $in0,$tmp0 # accumulate nonce - sltu $ctx,$in0,$tmp0 - - addu $in1,$tmp1 - sltu $tmp1,$in1,$tmp1 - addu $in1,$ctx - sltu $ctx,$in1,$ctx - addu $ctx,$tmp1 - - addu $in2,$tmp2 - sltu $tmp2,$in2,$tmp2 - addu $in2,$ctx - sltu $ctx,$in2,$ctx - addu $ctx,$tmp2 - - addu $in3,$tmp3 - addu $in3,$ctx - - srl $tmp0,$in0,8 # write mac value - srl $tmp1,$in0,16 - srl $tmp2,$in0,24 - sb $in0, 0($mac) - sb $tmp0,1($mac) - srl $tmp0,$in1,8 - sb $tmp1,2($mac) - srl $tmp1,$in1,16 - sb $tmp2,3($mac) - srl $tmp2,$in1,24 - sb $in1, 4($mac) - sb $tmp0,5($mac) - srl $tmp0,$in2,8 - sb $tmp1,6($mac) - srl $tmp1,$in2,16 - sb $tmp2,7($mac) - srl $tmp2,$in2,24 - sb $in2, 8($mac) - sb $tmp0,9($mac) - srl $tmp0,$in3,8 - sb $tmp1,10($mac) - srl $tmp1,$in3,16 - sb $tmp2,11($mac) - srl $tmp2,$in3,24 - sb $in3, 12($mac) - sb $tmp0,13($mac) - sb $tmp1,14($mac) - sb $tmp2,15($mac) - - jr $ra -.end poly1305_emit -.rdata -.asciiz "Poly1305 for MIPS32, CRYPTOGAMS by \@dot-asm" -.align 2 -___ -} -}}} - -$output=pop and open STDOUT,">$output"; -print $code; -close STDOUT; |