summaryrefslogtreecommitdiff
path: root/arch/mips/crypto/chacha-core.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/mips/crypto/chacha-core.S')
-rw-r--r--arch/mips/crypto/chacha-core.S159
1 files changed, 116 insertions, 43 deletions
diff --git a/arch/mips/crypto/chacha-core.S b/arch/mips/crypto/chacha-core.S
index a81e02db95e7..5755f69cfe00 100644
--- a/arch/mips/crypto/chacha-core.S
+++ b/arch/mips/crypto/chacha-core.S
@@ -125,7 +125,7 @@
#define CONCAT3(a,b,c) _CONCAT3(a,b,c)
#define STORE_UNALIGNED(x) \
-CONCAT3(.Lchacha20_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \
+CONCAT3(.Lchacha_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \
.if (x != 12); \
lw T0, (x*4)(STATE); \
.endif; \
@@ -142,7 +142,7 @@ CONCAT3(.Lchacha20_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \
swr X ## x, (x*4)+LSB ## (OUT);
#define STORE_ALIGNED(x) \
-CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
+CONCAT3(.Lchacha_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
.if (x != 12); \
lw T0, (x*4)(STATE); \
.endif; \
@@ -162,9 +162,9 @@ CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
* Every jumptable entry must be equal in size.
*/
#define JMPTBL_ALIGNED(x) \
-.Lchacha20_mips_jmptbl_aligned_ ## x: ; \
+.Lchacha_mips_jmptbl_aligned_ ## x: ; \
.set noreorder; \
- b .Lchacha20_mips_xor_aligned_ ## x ## _b; \
+ b .Lchacha_mips_xor_aligned_ ## x ## _b; \
.if (x == 12); \
addu SAVED_X, X ## x, NONCE_0; \
.else; \
@@ -173,9 +173,9 @@ CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
.set reorder
#define JMPTBL_UNALIGNED(x) \
-.Lchacha20_mips_jmptbl_unaligned_ ## x: ; \
+.Lchacha_mips_jmptbl_unaligned_ ## x: ; \
.set noreorder; \
- b .Lchacha20_mips_xor_unaligned_ ## x ## _b; \
+ b .Lchacha_mips_xor_unaligned_ ## x ## _b; \
.if (x == 12); \
addu SAVED_X, X ## x, NONCE_0; \
.else; \
@@ -200,15 +200,18 @@ CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
.text
.set reorder
.set noat
-.globl chacha20_mips
-.ent chacha20_mips
-chacha20_mips:
+.globl chacha_crypt_arch
+.ent chacha_crypt_arch
+chacha_crypt_arch:
.frame $sp, STACK_SIZE, $ra
+ /* Load number of rounds */
+ lw $at, 16($sp)
+
addiu $sp, -STACK_SIZE
/* Return bytes = 0. */
- beqz BYTES, .Lchacha20_mips_end
+ beqz BYTES, .Lchacha_mips_end
lw NONCE_0, 48(STATE)
@@ -228,18 +231,15 @@ chacha20_mips:
or IS_UNALIGNED, IN, OUT
andi IS_UNALIGNED, 0x3
- /* Set number of rounds */
- li $at, 20
-
- b .Lchacha20_rounds_start
+ b .Lchacha_rounds_start
.align 4
-.Loop_chacha20_rounds:
+.Loop_chacha_rounds:
addiu IN, CHACHA20_BLOCK_SIZE
addiu OUT, CHACHA20_BLOCK_SIZE
addiu NONCE_0, 1
-.Lchacha20_rounds_start:
+.Lchacha_rounds_start:
lw X0, 0(STATE)
lw X1, 4(STATE)
lw X2, 8(STATE)
@@ -259,7 +259,7 @@ chacha20_mips:
lw X14, 56(STATE)
lw X15, 60(STATE)
-.Loop_chacha20_xor_rounds:
+.Loop_chacha_xor_rounds:
addiu $at, -2
AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16);
AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12);
@@ -269,31 +269,31 @@ chacha20_mips:
AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12);
AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8);
AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7);
- bnez $at, .Loop_chacha20_xor_rounds
+ bnez $at, .Loop_chacha_xor_rounds
addiu BYTES, -(CHACHA20_BLOCK_SIZE)
/* Is data src/dst unaligned? Jump */
- bnez IS_UNALIGNED, .Loop_chacha20_unaligned
+ bnez IS_UNALIGNED, .Loop_chacha_unaligned
/* Set number rounds here to fill delayslot. */
- li $at, 20
+ lw $at, (STACK_SIZE+16)($sp)
/* BYTES < 0, it has no full block. */
- bltz BYTES, .Lchacha20_mips_no_full_block_aligned
+ bltz BYTES, .Lchacha_mips_no_full_block_aligned
FOR_EACH_WORD_REV(STORE_ALIGNED)
/* BYTES > 0? Loop again. */
- bgtz BYTES, .Loop_chacha20_rounds
+ bgtz BYTES, .Loop_chacha_rounds
/* Place this here to fill delay slot */
addiu NONCE_0, 1
/* BYTES < 0? Handle last bytes */
- bltz BYTES, .Lchacha20_mips_xor_bytes
+ bltz BYTES, .Lchacha_mips_xor_bytes
-.Lchacha20_mips_xor_done:
+.Lchacha_mips_xor_done:
/* Restore used registers */
lw $s0, 0($sp)
lw $s1, 4($sp)
@@ -307,11 +307,11 @@ chacha20_mips:
/* Write NONCE_0 back to right location in state */
sw NONCE_0, 48(STATE)
-.Lchacha20_mips_end:
+.Lchacha_mips_end:
addiu $sp, STACK_SIZE
jr $ra
-.Lchacha20_mips_no_full_block_aligned:
+.Lchacha_mips_no_full_block_aligned:
/* Restore the offset on BYTES */
addiu BYTES, CHACHA20_BLOCK_SIZE
@@ -319,7 +319,7 @@ chacha20_mips:
andi $at, BYTES, MASK_U32
/* Load upper half of jump table addr */
- lui T0, %hi(.Lchacha20_mips_jmptbl_aligned_0)
+ lui T0, %hi(.Lchacha_mips_jmptbl_aligned_0)
/* Calculate lower half jump table offset */
ins T0, $at, 1, 6
@@ -328,7 +328,7 @@ chacha20_mips:
addu T1, STATE, $at
/* Add lower half jump table addr */
- addiu T0, %lo(.Lchacha20_mips_jmptbl_aligned_0)
+ addiu T0, %lo(.Lchacha_mips_jmptbl_aligned_0)
/* Read value from STATE */
lw SAVED_CA, 0(T1)
@@ -342,31 +342,31 @@ chacha20_mips:
FOR_EACH_WORD(JMPTBL_ALIGNED)
-.Loop_chacha20_unaligned:
+.Loop_chacha_unaligned:
/* Set number rounds here to fill delayslot. */
- li $at, 20
+ lw $at, (STACK_SIZE+16)($sp)
/* BYTES > 0, it has no full block. */
- bltz BYTES, .Lchacha20_mips_no_full_block_unaligned
+ bltz BYTES, .Lchacha_mips_no_full_block_unaligned
FOR_EACH_WORD_REV(STORE_UNALIGNED)
/* BYTES > 0? Loop again. */
- bgtz BYTES, .Loop_chacha20_rounds
+ bgtz BYTES, .Loop_chacha_rounds
/* Write NONCE_0 back to right location in state */
sw NONCE_0, 48(STATE)
.set noreorder
/* Fall through to byte handling */
- bgez BYTES, .Lchacha20_mips_xor_done
-.Lchacha20_mips_xor_unaligned_0_b:
-.Lchacha20_mips_xor_aligned_0_b:
+ bgez BYTES, .Lchacha_mips_xor_done
+.Lchacha_mips_xor_unaligned_0_b:
+.Lchacha_mips_xor_aligned_0_b:
/* Place this here to fill delay slot */
addiu NONCE_0, 1
.set reorder
-.Lchacha20_mips_xor_bytes:
+.Lchacha_mips_xor_bytes:
addu IN, $at
addu OUT, $at
/* First byte */
@@ -376,22 +376,22 @@ chacha20_mips:
ROTR(SAVED_X)
xor T1, SAVED_X
sb T1, 0(OUT)
- beqz $at, .Lchacha20_mips_xor_done
+ beqz $at, .Lchacha_mips_xor_done
/* Second byte */
lbu T1, 1(IN)
addiu $at, BYTES, 2
ROTx SAVED_X, 8
xor T1, SAVED_X
sb T1, 1(OUT)
- beqz $at, .Lchacha20_mips_xor_done
+ beqz $at, .Lchacha_mips_xor_done
/* Third byte */
lbu T1, 2(IN)
ROTx SAVED_X, 8
xor T1, SAVED_X
sb T1, 2(OUT)
- b .Lchacha20_mips_xor_done
+ b .Lchacha_mips_xor_done
-.Lchacha20_mips_no_full_block_unaligned:
+.Lchacha_mips_no_full_block_unaligned:
/* Restore the offset on BYTES */
addiu BYTES, CHACHA20_BLOCK_SIZE
@@ -399,7 +399,7 @@ chacha20_mips:
andi $at, BYTES, MASK_U32
/* Load upper half of jump table addr */
- lui T0, %hi(.Lchacha20_mips_jmptbl_unaligned_0)
+ lui T0, %hi(.Lchacha_mips_jmptbl_unaligned_0)
/* Calculate lower half jump table offset */
ins T0, $at, 1, 6
@@ -408,7 +408,7 @@ chacha20_mips:
addu T1, STATE, $at
/* Add lower half jump table addr */
- addiu T0, %lo(.Lchacha20_mips_jmptbl_unaligned_0)
+ addiu T0, %lo(.Lchacha_mips_jmptbl_unaligned_0)
/* Read value from STATE */
lw SAVED_CA, 0(T1)
@@ -420,5 +420,78 @@ chacha20_mips:
/* Jump table */
FOR_EACH_WORD(JMPTBL_UNALIGNED)
-.end chacha20_mips
+.end chacha_crypt_arch
+.set at
+
+/* Input arguments
+ * STATE $a0
+ * OUT $a1
+ * NROUND $a2
+ */
+
+#undef X12
+#undef X13
+#undef X14
+#undef X15
+
+#define X12 $a3
+#define X13 $at
+#define X14 $v0
+#define X15 STATE
+
+.set noat
+.globl hchacha_block_arch
+.ent hchacha_block_arch
+hchacha_block_arch:
+ .frame $sp, STACK_SIZE, $ra
+
+ addiu $sp, -STACK_SIZE
+
+ /* Save X11(s6) */
+ sw X11, 0($sp)
+
+ lw X0, 0(STATE)
+ lw X1, 4(STATE)
+ lw X2, 8(STATE)
+ lw X3, 12(STATE)
+ lw X4, 16(STATE)
+ lw X5, 20(STATE)
+ lw X6, 24(STATE)
+ lw X7, 28(STATE)
+ lw X8, 32(STATE)
+ lw X9, 36(STATE)
+ lw X10, 40(STATE)
+ lw X11, 44(STATE)
+ lw X12, 48(STATE)
+ lw X13, 52(STATE)
+ lw X14, 56(STATE)
+ lw X15, 60(STATE)
+
+.Loop_hchacha_xor_rounds:
+ addiu $a2, -2
+ AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16);
+ AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12);
+ AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8);
+ AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7);
+ AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16);
+ AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12);
+ AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8);
+ AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7);
+ bnez $a2, .Loop_hchacha_xor_rounds
+
+ /* Restore used register */
+ lw X11, 0($sp)
+
+ sw X0, 0(OUT)
+ sw X1, 4(OUT)
+ sw X2, 8(OUT)
+ sw X3, 12(OUT)
+ sw X12, 16(OUT)
+ sw X13, 20(OUT)
+ sw X14, 24(OUT)
+ sw X15, 28(OUT)
+
+ addiu $sp, STACK_SIZE
+ jr $ra
+.end hchacha_block_arch
.set at