summaryrefslogtreecommitdiff
path: root/arch/arm64/crypto
diff options
context:
space:
mode:
authorArd Biesheuvel <ard.biesheuvel@linaro.org>2018-03-10 15:21:52 +0000
committerHerbert Xu <herbert@gondor.apana.org.au>2018-03-16 23:35:57 +0800
commita8f8a69e82b6ecfbaa869987955b1dbd5ae7f612 (patch)
tree15e72d689c6efa2c46880ca2d0e2e885eef788d8 /arch/arm64/crypto
parent55868b45cffdb7b4d7f9eddd3d4f1262cf645885 (diff)
crypto: arm64/aes-blk - add 4 way interleave to CBC encrypt path
CBC encryption is strictly sequential, and so the current AES code simply processes the input one block at a time. However, we are about to add yield support, which adds a bit of overhead, and which we prefer to align with other modes in terms of granularity (i.e., it is better to have all routines yield every 64 bytes and not have an exception for CBC encrypt which yields every 16 bytes) So unroll the loop by 4. We still cannot perform the AES algorithm in parallel, but we can at least merge the loads and stores. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/arm64/crypto')
-rw-r--r--arch/arm64/crypto/aes-modes.S31
1 files changed, 25 insertions, 6 deletions
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index 27a235b2ddee..e86535a1329d 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -94,17 +94,36 @@ AES_ENDPROC(aes_ecb_decrypt)
*/
AES_ENTRY(aes_cbc_encrypt)
- ld1 {v0.16b}, [x5] /* get iv */
+ ld1 {v4.16b}, [x5] /* get iv */
enc_prepare w3, x2, x6
-.Lcbcencloop:
- ld1 {v1.16b}, [x1], #16 /* get next pt block */
- eor v0.16b, v0.16b, v1.16b /* ..and xor with iv */
+.Lcbcencloop4x:
+ subs w4, w4, #4
+ bmi .Lcbcenc1x
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
+ eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
encrypt_block v0, w3, x2, x6, w7
- st1 {v0.16b}, [x0], #16
+ eor v1.16b, v1.16b, v0.16b
+ encrypt_block v1, w3, x2, x6, w7
+ eor v2.16b, v2.16b, v1.16b
+ encrypt_block v2, w3, x2, x6, w7
+ eor v3.16b, v3.16b, v2.16b
+ encrypt_block v3, w3, x2, x6, w7
+ st1 {v0.16b-v3.16b}, [x0], #64
+ mov v4.16b, v3.16b
+ b .Lcbcencloop4x
+.Lcbcenc1x:
+ adds w4, w4, #4
+ beq .Lcbcencout
+.Lcbcencloop:
+ ld1 {v0.16b}, [x1], #16 /* get next pt block */
+ eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
+ encrypt_block v4, w3, x2, x6, w7
+ st1 {v4.16b}, [x0], #16
subs w4, w4, #1
bne .Lcbcencloop
- st1 {v0.16b}, [x5] /* return iv */
+.Lcbcencout:
+ st1 {v4.16b}, [x5] /* return iv */
ret
AES_ENDPROC(aes_cbc_encrypt)