diff options
Diffstat (limited to 'arch/arm64/crypto/aes-ce-ccm-core.S')
| -rw-r--r-- | arch/arm64/crypto/aes-ce-ccm-core.S | 142 |
1 files changed, 142 insertions, 0 deletions
diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S new file mode 100644 index 000000000000..f2624238fd95 --- /dev/null +++ b/arch/arm64/crypto/aes-ce-ccm-core.S @@ -0,0 +1,142 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * aes-ce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions + * + * Copyright (C) 2013 - 2017 Linaro Ltd. + * Copyright (C) 2024 Google LLC + * + * Author: Ard Biesheuvel <ardb@kernel.org> + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .arch armv8-a+crypto + + .macro load_round_keys, rk, nr, tmp + sub w\tmp, \nr, #10 + add \tmp, \rk, w\tmp, sxtw #4 + ld1 {v10.4s-v13.4s}, [\rk] + ld1 {v14.4s-v17.4s}, [\tmp], #64 + ld1 {v18.4s-v21.4s}, [\tmp], #64 + ld1 {v3.4s-v5.4s}, [\tmp] + .endm + + .macro dround, va, vb, vk + aese \va\().16b, \vk\().16b + aesmc \va\().16b, \va\().16b + aese \vb\().16b, \vk\().16b + aesmc \vb\().16b, \vb\().16b + .endm + + .macro aes_encrypt, va, vb, nr + tbz \nr, #2, .L\@ + dround \va, \vb, v10 + dround \va, \vb, v11 + tbz \nr, #1, .L\@ + dround \va, \vb, v12 + dround \va, \vb, v13 +.L\@: .irp v, v14, v15, v16, v17, v18, v19, v20, v21, v3 + dround \va, \vb, \v + .endr + aese \va\().16b, v4.16b + aese \vb\().16b, v4.16b + .endm + + .macro aes_ccm_do_crypt,enc + load_round_keys x3, w4, x10 + + ld1 {v0.16b}, [x5] /* load mac */ + cbz x2, ce_aes_ccm_final + ldr x8, [x6, #8] /* load lower ctr */ +CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */ +0: /* outer loop */ + ld1 {v1.8b}, [x6] /* load upper ctr */ + prfm pldl1strm, [x1] + add x8, x8, #1 + rev x9, x8 + ins v1.d[1], x9 /* no carry in lower ctr */ + + aes_encrypt v0, v1, w4 + + subs w2, w2, #16 + bmi ce_aes_ccm_crypt_tail + ld1 {v2.16b}, [x1], #16 /* load next input block */ + .if \enc == 1 + eor v2.16b, v2.16b, v5.16b /* final round enc+mac */ + eor v6.16b, v1.16b, v2.16b /* xor with crypted ctr */ + .else + eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */ + eor v6.16b, v2.16b, v5.16b /* final round enc */ + .endif + eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ + st1 {v6.16b}, [x0], #16 /* write output block */ + bne 0b +CPU_LE( rev x8, x8 ) + str x8, [x6, #8] /* store lsb end of ctr (BE) */ + cbnz x7, ce_aes_ccm_final + st1 {v0.16b}, [x5] /* store mac */ + ret + .endm + +SYM_FUNC_START_LOCAL(ce_aes_ccm_crypt_tail) + eor v0.16b, v0.16b, v5.16b /* final round mac */ + eor v1.16b, v1.16b, v5.16b /* final round enc */ + + add x1, x1, w2, sxtw /* rewind the input pointer (w2 < 0) */ + add x0, x0, w2, sxtw /* rewind the output pointer */ + + adr_l x8, .Lpermute /* load permute vectors */ + add x9, x8, w2, sxtw + sub x8, x8, w2, sxtw + ld1 {v7.16b-v8.16b}, [x9] + ld1 {v9.16b}, [x8] + + ld1 {v2.16b}, [x1] /* load a full block of input */ + tbl v1.16b, {v1.16b}, v7.16b /* move keystream to end of register */ + eor v7.16b, v2.16b, v1.16b /* encrypt partial input block */ + bif v2.16b, v7.16b, v22.16b /* select plaintext */ + tbx v7.16b, {v6.16b}, v8.16b /* insert output from previous iteration */ + tbl v2.16b, {v2.16b}, v9.16b /* copy plaintext to start of v2 */ + eor v0.16b, v0.16b, v2.16b /* fold plaintext into mac */ + + st1 {v7.16b}, [x0] /* store output block */ + cbz x7, 0f + +SYM_INNER_LABEL(ce_aes_ccm_final, SYM_L_LOCAL) + ld1 {v1.16b}, [x7] /* load 1st ctriv */ + + aes_encrypt v0, v1, w4 + + /* final round key cancels out */ + eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */ +0: st1 {v0.16b}, [x5] /* store result */ + ret +SYM_FUNC_END(ce_aes_ccm_crypt_tail) + + /* + * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes, + * u8 const rk[], u32 rounds, u8 mac[], + * u8 ctr[], u8 const final_iv[]); + * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes, + * u8 const rk[], u32 rounds, u8 mac[], + * u8 ctr[], u8 const final_iv[]); + */ +SYM_FUNC_START(ce_aes_ccm_encrypt) + movi v22.16b, #255 + aes_ccm_do_crypt 1 +SYM_FUNC_END(ce_aes_ccm_encrypt) + +SYM_FUNC_START(ce_aes_ccm_decrypt) + movi v22.16b, #0 + aes_ccm_do_crypt 0 +SYM_FUNC_END(ce_aes_ccm_decrypt) + + .section ".rodata", "a" + .align 6 + .fill 15, 1, 0xff +.Lpermute: + .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 + .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf + .fill 15, 1, 0xff |
