summaryrefslogtreecommitdiff
path: root/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/crypto/aes_ctrby8_avx-x86_64.S')
-rw-r--r--arch/x86/crypto/aes_ctrby8_avx-x86_64.S597
1 files changed, 0 insertions, 597 deletions
diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
deleted file mode 100644
index 2402b9418cd7..000000000000
--- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
+++ /dev/null
@@ -1,597 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause */
-/*
- * AES CTR mode by8 optimization with AVX instructions. (x86_64)
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Contact Information:
- * James Guilford <james.guilford@intel.com>
- * Sean Gulley <sean.m.gulley@intel.com>
- * Chandramouli Narayanan <mouli@linux.intel.com>
- */
-/*
- * This is AES128/192/256 CTR mode optimization implementation. It requires
- * the support of Intel(R) AESNI and AVX instructions.
- *
- * This work was inspired by the AES CTR mode optimization published
- * in Intel Optimized IPSEC Cryptographic library.
- * Additional information on it can be found at:
- * https://github.com/intel/intel-ipsec-mb
- */
-
-#include <linux/linkage.h>
-
-#define VMOVDQ vmovdqu
-
-/*
- * Note: the "x" prefix in these aliases means "this is an xmm register". The
- * alias prefixes have no relation to XCTR where the "X" prefix means "XOR
- * counter".
- */
-#define xdata0 %xmm0
-#define xdata1 %xmm1
-#define xdata2 %xmm2
-#define xdata3 %xmm3
-#define xdata4 %xmm4
-#define xdata5 %xmm5
-#define xdata6 %xmm6
-#define xdata7 %xmm7
-#define xcounter %xmm8 // CTR mode only
-#define xiv %xmm8 // XCTR mode only
-#define xbyteswap %xmm9 // CTR mode only
-#define xtmp %xmm9 // XCTR mode only
-#define xkey0 %xmm10
-#define xkey4 %xmm11
-#define xkey8 %xmm12
-#define xkey12 %xmm13
-#define xkeyA %xmm14
-#define xkeyB %xmm15
-
-#define p_in %rdi
-#define p_iv %rsi
-#define p_keys %rdx
-#define p_out %rcx
-#define num_bytes %r8
-#define counter %r9 // XCTR mode only
-#define tmp %r10
-#define DDQ_DATA 0
-#define XDATA 1
-#define KEY_128 1
-#define KEY_192 2
-#define KEY_256 3
-
-.section .rodata
-.align 16
-
-byteswap_const:
- .octa 0x000102030405060708090A0B0C0D0E0F
-ddq_low_msk:
- .octa 0x0000000000000000FFFFFFFFFFFFFFFF
-ddq_high_add_1:
- .octa 0x00000000000000010000000000000000
-ddq_add_1:
- .octa 0x00000000000000000000000000000001
-ddq_add_2:
- .octa 0x00000000000000000000000000000002
-ddq_add_3:
- .octa 0x00000000000000000000000000000003
-ddq_add_4:
- .octa 0x00000000000000000000000000000004
-ddq_add_5:
- .octa 0x00000000000000000000000000000005
-ddq_add_6:
- .octa 0x00000000000000000000000000000006
-ddq_add_7:
- .octa 0x00000000000000000000000000000007
-ddq_add_8:
- .octa 0x00000000000000000000000000000008
-
-.text
-
-/* generate a unique variable for ddq_add_x */
-
-/* generate a unique variable for xmm register */
-.macro setxdata n
- var_xdata = %xmm\n
-.endm
-
-/* club the numeric 'id' to the symbol 'name' */
-
-.macro club name, id
-.altmacro
- .if \name == XDATA
- setxdata %\id
- .endif
-.noaltmacro
-.endm
-
-/*
- * do_aes num_in_par load_keys key_len
- * This increments p_in, but not p_out
- */
-.macro do_aes b, k, key_len, xctr
- .set by, \b
- .set load_keys, \k
- .set klen, \key_len
-
- .if (load_keys)
- vmovdqa 0*16(p_keys), xkey0
- .endif
-
- .if \xctr
- movq counter, xtmp
- .set i, 0
- .rept (by)
- club XDATA, i
- vpaddq (ddq_add_1 + 16 * i)(%rip), xtmp, var_xdata
- .set i, (i +1)
- .endr
- .set i, 0
- .rept (by)
- club XDATA, i
- vpxor xiv, var_xdata, var_xdata
- .set i, (i +1)
- .endr
- .else
- vpshufb xbyteswap, xcounter, xdata0
- .set i, 1
- .rept (by - 1)
- club XDATA, i
- vpaddq (ddq_add_1 + 16 * (i - 1))(%rip), xcounter, var_xdata
- vptest ddq_low_msk(%rip), var_xdata
- jnz 1f
- vpaddq ddq_high_add_1(%rip), var_xdata, var_xdata
- vpaddq ddq_high_add_1(%rip), xcounter, xcounter
- 1:
- vpshufb xbyteswap, var_xdata, var_xdata
- .set i, (i +1)
- .endr
- .endif
-
- vmovdqa 1*16(p_keys), xkeyA
-
- vpxor xkey0, xdata0, xdata0
- .if \xctr
- add $by, counter
- .else
- vpaddq (ddq_add_1 + 16 * (by - 1))(%rip), xcounter, xcounter
- vptest ddq_low_msk(%rip), xcounter
- jnz 1f
- vpaddq ddq_high_add_1(%rip), xcounter, xcounter
- 1:
- .endif
-
- .set i, 1
- .rept (by - 1)
- club XDATA, i
- vpxor xkey0, var_xdata, var_xdata
- .set i, (i +1)
- .endr
-
- vmovdqa 2*16(p_keys), xkeyB
-
- .set i, 0
- .rept by
- club XDATA, i
- vaesenc xkeyA, var_xdata, var_xdata /* key 1 */
- .set i, (i +1)
- .endr
-
- .if (klen == KEY_128)
- .if (load_keys)
- vmovdqa 3*16(p_keys), xkey4
- .endif
- .else
- vmovdqa 3*16(p_keys), xkeyA
- .endif
-
- .set i, 0
- .rept by
- club XDATA, i
- vaesenc xkeyB, var_xdata, var_xdata /* key 2 */
- .set i, (i +1)
- .endr
-
- add $(16*by), p_in
-
- .if (klen == KEY_128)
- vmovdqa 4*16(p_keys), xkeyB
- .else
- .if (load_keys)
- vmovdqa 4*16(p_keys), xkey4
- .endif
- .endif
-
- .set i, 0
- .rept by
- club XDATA, i
- /* key 3 */
- .if (klen == KEY_128)
- vaesenc xkey4, var_xdata, var_xdata
- .else
- vaesenc xkeyA, var_xdata, var_xdata
- .endif
- .set i, (i +1)
- .endr
-
- vmovdqa 5*16(p_keys), xkeyA
-
- .set i, 0
- .rept by
- club XDATA, i
- /* key 4 */
- .if (klen == KEY_128)
- vaesenc xkeyB, var_xdata, var_xdata
- .else
- vaesenc xkey4, var_xdata, var_xdata
- .endif
- .set i, (i +1)
- .endr
-
- .if (klen == KEY_128)
- .if (load_keys)
- vmovdqa 6*16(p_keys), xkey8
- .endif
- .else
- vmovdqa 6*16(p_keys), xkeyB
- .endif
-
- .set i, 0
- .rept by
- club XDATA, i
- vaesenc xkeyA, var_xdata, var_xdata /* key 5 */
- .set i, (i +1)
- .endr
-
- vmovdqa 7*16(p_keys), xkeyA
-
- .set i, 0
- .rept by
- club XDATA, i
- /* key 6 */
- .if (klen == KEY_128)
- vaesenc xkey8, var_xdata, var_xdata
- .else
- vaesenc xkeyB, var_xdata, var_xdata
- .endif
- .set i, (i +1)
- .endr
-
- .if (klen == KEY_128)
- vmovdqa 8*16(p_keys), xkeyB
- .else
- .if (load_keys)
- vmovdqa 8*16(p_keys), xkey8
- .endif
- .endif
-
- .set i, 0
- .rept by
- club XDATA, i
- vaesenc xkeyA, var_xdata, var_xdata /* key 7 */
- .set i, (i +1)
- .endr
-
- .if (klen == KEY_128)
- .if (load_keys)
- vmovdqa 9*16(p_keys), xkey12
- .endif
- .else
- vmovdqa 9*16(p_keys), xkeyA
- .endif
-
- .set i, 0
- .rept by
- club XDATA, i
- /* key 8 */
- .if (klen == KEY_128)
- vaesenc xkeyB, var_xdata, var_xdata
- .else
- vaesenc xkey8, var_xdata, var_xdata
- .endif
- .set i, (i +1)
- .endr
-
- vmovdqa 10*16(p_keys), xkeyB
-
- .set i, 0
- .rept by
- club XDATA, i
- /* key 9 */
- .if (klen == KEY_128)
- vaesenc xkey12, var_xdata, var_xdata
- .else
- vaesenc xkeyA, var_xdata, var_xdata
- .endif
- .set i, (i +1)
- .endr
-
- .if (klen != KEY_128)
- vmovdqa 11*16(p_keys), xkeyA
- .endif
-
- .set i, 0
- .rept by
- club XDATA, i
- /* key 10 */
- .if (klen == KEY_128)
- vaesenclast xkeyB, var_xdata, var_xdata
- .else
- vaesenc xkeyB, var_xdata, var_xdata
- .endif
- .set i, (i +1)
- .endr
-
- .if (klen != KEY_128)
- .if (load_keys)
- vmovdqa 12*16(p_keys), xkey12
- .endif
-
- .set i, 0
- .rept by
- club XDATA, i
- vaesenc xkeyA, var_xdata, var_xdata /* key 11 */
- .set i, (i +1)
- .endr
-
- .if (klen == KEY_256)
- vmovdqa 13*16(p_keys), xkeyA
- .endif
-
- .set i, 0
- .rept by
- club XDATA, i
- .if (klen == KEY_256)
- /* key 12 */
- vaesenc xkey12, var_xdata, var_xdata
- .else
- vaesenclast xkey12, var_xdata, var_xdata
- .endif
- .set i, (i +1)
- .endr
-
- .if (klen == KEY_256)
- vmovdqa 14*16(p_keys), xkeyB
-
- .set i, 0
- .rept by
- club XDATA, i
- /* key 13 */
- vaesenc xkeyA, var_xdata, var_xdata
- .set i, (i +1)
- .endr
-
- .set i, 0
- .rept by
- club XDATA, i
- /* key 14 */
- vaesenclast xkeyB, var_xdata, var_xdata
- .set i, (i +1)
- .endr
- .endif
- .endif
-
- .set i, 0
- .rept (by / 2)
- .set j, (i+1)
- VMOVDQ (i*16 - 16*by)(p_in), xkeyA
- VMOVDQ (j*16 - 16*by)(p_in), xkeyB
- club XDATA, i
- vpxor xkeyA, var_xdata, var_xdata
- club XDATA, j
- vpxor xkeyB, var_xdata, var_xdata
- .set i, (i+2)
- .endr
-
- .if (i < by)
- VMOVDQ (i*16 - 16*by)(p_in), xkeyA
- club XDATA, i
- vpxor xkeyA, var_xdata, var_xdata
- .endif
-
- .set i, 0
- .rept by
- club XDATA, i
- VMOVDQ var_xdata, i*16(p_out)
- .set i, (i+1)
- .endr
-.endm
-
-.macro do_aes_load val, key_len, xctr
- do_aes \val, 1, \key_len, \xctr
-.endm
-
-.macro do_aes_noload val, key_len, xctr
- do_aes \val, 0, \key_len, \xctr
-.endm
-
-/* main body of aes ctr load */
-
-.macro do_aes_ctrmain key_len, xctr
- cmp $16, num_bytes
- jb .Ldo_return2\xctr\key_len
-
- .if \xctr
- shr $4, counter
- vmovdqu (p_iv), xiv
- .else
- vmovdqa byteswap_const(%rip), xbyteswap
- vmovdqu (p_iv), xcounter
- vpshufb xbyteswap, xcounter, xcounter
- .endif
-
- mov num_bytes, tmp
- and $(7*16), tmp
- jz .Lmult_of_8_blks\xctr\key_len
-
- /* 1 <= tmp <= 7 */
- cmp $(4*16), tmp
- jg .Lgt4\xctr\key_len
- je .Leq4\xctr\key_len
-
-.Llt4\xctr\key_len:
- cmp $(2*16), tmp
- jg .Leq3\xctr\key_len
- je .Leq2\xctr\key_len
-
-.Leq1\xctr\key_len:
- do_aes_load 1, \key_len, \xctr
- add $(1*16), p_out
- and $(~7*16), num_bytes
- jz .Ldo_return2\xctr\key_len
- jmp .Lmain_loop2\xctr\key_len
-
-.Leq2\xctr\key_len:
- do_aes_load 2, \key_len, \xctr
- add $(2*16), p_out
- and $(~7*16), num_bytes
- jz .Ldo_return2\xctr\key_len
- jmp .Lmain_loop2\xctr\key_len
-
-
-.Leq3\xctr\key_len:
- do_aes_load 3, \key_len, \xctr
- add $(3*16), p_out
- and $(~7*16), num_bytes
- jz .Ldo_return2\xctr\key_len
- jmp .Lmain_loop2\xctr\key_len
-
-.Leq4\xctr\key_len:
- do_aes_load 4, \key_len, \xctr
- add $(4*16), p_out
- and $(~7*16), num_bytes
- jz .Ldo_return2\xctr\key_len
- jmp .Lmain_loop2\xctr\key_len
-
-.Lgt4\xctr\key_len:
- cmp $(6*16), tmp
- jg .Leq7\xctr\key_len
- je .Leq6\xctr\key_len
-
-.Leq5\xctr\key_len:
- do_aes_load 5, \key_len, \xctr
- add $(5*16), p_out
- and $(~7*16), num_bytes
- jz .Ldo_return2\xctr\key_len
- jmp .Lmain_loop2\xctr\key_len
-
-.Leq6\xctr\key_len:
- do_aes_load 6, \key_len, \xctr
- add $(6*16), p_out
- and $(~7*16), num_bytes
- jz .Ldo_return2\xctr\key_len
- jmp .Lmain_loop2\xctr\key_len
-
-.Leq7\xctr\key_len:
- do_aes_load 7, \key_len, \xctr
- add $(7*16), p_out
- and $(~7*16), num_bytes
- jz .Ldo_return2\xctr\key_len
- jmp .Lmain_loop2\xctr\key_len
-
-.Lmult_of_8_blks\xctr\key_len:
- .if (\key_len != KEY_128)
- vmovdqa 0*16(p_keys), xkey0
- vmovdqa 4*16(p_keys), xkey4
- vmovdqa 8*16(p_keys), xkey8
- vmovdqa 12*16(p_keys), xkey12
- .else
- vmovdqa 0*16(p_keys), xkey0
- vmovdqa 3*16(p_keys), xkey4
- vmovdqa 6*16(p_keys), xkey8
- vmovdqa 9*16(p_keys), xkey12
- .endif
-.align 16
-.Lmain_loop2\xctr\key_len:
- /* num_bytes is a multiple of 8 and >0 */
- do_aes_noload 8, \key_len, \xctr
- add $(8*16), p_out
- sub $(8*16), num_bytes
- jne .Lmain_loop2\xctr\key_len
-
-.Ldo_return2\xctr\key_len:
- .if !\xctr
- /* return updated IV */
- vpshufb xbyteswap, xcounter, xcounter
- vmovdqu xcounter, (p_iv)
- .endif
- RET
-.endm
-
-/*
- * routine to do AES128 CTR enc/decrypt "by8"
- * XMM registers are clobbered.
- * Saving/restoring must be done at a higher level
- * aes_ctr_enc_128_avx_by8(void *in, void *iv, void *keys, void *out,
- * unsigned int num_bytes)
- */
-SYM_FUNC_START(aes_ctr_enc_128_avx_by8)
- /* call the aes main loop */
- do_aes_ctrmain KEY_128 0
-
-SYM_FUNC_END(aes_ctr_enc_128_avx_by8)
-
-/*
- * routine to do AES192 CTR enc/decrypt "by8"
- * XMM registers are clobbered.
- * Saving/restoring must be done at a higher level
- * aes_ctr_enc_192_avx_by8(void *in, void *iv, void *keys, void *out,
- * unsigned int num_bytes)
- */
-SYM_FUNC_START(aes_ctr_enc_192_avx_by8)
- /* call the aes main loop */
- do_aes_ctrmain KEY_192 0
-
-SYM_FUNC_END(aes_ctr_enc_192_avx_by8)
-
-/*
- * routine to do AES256 CTR enc/decrypt "by8"
- * XMM registers are clobbered.
- * Saving/restoring must be done at a higher level
- * aes_ctr_enc_256_avx_by8(void *in, void *iv, void *keys, void *out,
- * unsigned int num_bytes)
- */
-SYM_FUNC_START(aes_ctr_enc_256_avx_by8)
- /* call the aes main loop */
- do_aes_ctrmain KEY_256 0
-
-SYM_FUNC_END(aes_ctr_enc_256_avx_by8)
-
-/*
- * routine to do AES128 XCTR enc/decrypt "by8"
- * XMM registers are clobbered.
- * Saving/restoring must be done at a higher level
- * aes_xctr_enc_128_avx_by8(const u8 *in, const u8 *iv, const void *keys,
- * u8* out, unsigned int num_bytes, unsigned int byte_ctr)
- */
-SYM_FUNC_START(aes_xctr_enc_128_avx_by8)
- /* call the aes main loop */
- do_aes_ctrmain KEY_128 1
-
-SYM_FUNC_END(aes_xctr_enc_128_avx_by8)
-
-/*
- * routine to do AES192 XCTR enc/decrypt "by8"
- * XMM registers are clobbered.
- * Saving/restoring must be done at a higher level
- * aes_xctr_enc_192_avx_by8(const u8 *in, const u8 *iv, const void *keys,
- * u8* out, unsigned int num_bytes, unsigned int byte_ctr)
- */
-SYM_FUNC_START(aes_xctr_enc_192_avx_by8)
- /* call the aes main loop */
- do_aes_ctrmain KEY_192 1
-
-SYM_FUNC_END(aes_xctr_enc_192_avx_by8)
-
-/*
- * routine to do AES256 XCTR enc/decrypt "by8"
- * XMM registers are clobbered.
- * Saving/restoring must be done at a higher level
- * aes_xctr_enc_256_avx_by8(const u8 *in, const u8 *iv, const void *keys,
- * u8* out, unsigned int num_bytes, unsigned int byte_ctr)
- */
-SYM_FUNC_START(aes_xctr_enc_256_avx_by8)
- /* call the aes main loop */
- do_aes_ctrmain KEY_256 1
-
-SYM_FUNC_END(aes_xctr_enc_256_avx_by8)