diff options
Diffstat (limited to 'arch/x86/crypto/aes_ctrby8_avx-x86_64.S')
-rw-r--r-- | arch/x86/crypto/aes_ctrby8_avx-x86_64.S | 597 |
1 files changed, 0 insertions, 597 deletions
diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S deleted file mode 100644 index 2402b9418cd7..000000000000 --- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S +++ /dev/null @@ -1,597 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause */ -/* - * AES CTR mode by8 optimization with AVX instructions. (x86_64) - * - * Copyright(c) 2014 Intel Corporation. - * - * Contact Information: - * James Guilford <james.guilford@intel.com> - * Sean Gulley <sean.m.gulley@intel.com> - * Chandramouli Narayanan <mouli@linux.intel.com> - */ -/* - * This is AES128/192/256 CTR mode optimization implementation. It requires - * the support of Intel(R) AESNI and AVX instructions. - * - * This work was inspired by the AES CTR mode optimization published - * in Intel Optimized IPSEC Cryptographic library. - * Additional information on it can be found at: - * https://github.com/intel/intel-ipsec-mb - */ - -#include <linux/linkage.h> - -#define VMOVDQ vmovdqu - -/* - * Note: the "x" prefix in these aliases means "this is an xmm register". The - * alias prefixes have no relation to XCTR where the "X" prefix means "XOR - * counter". - */ -#define xdata0 %xmm0 -#define xdata1 %xmm1 -#define xdata2 %xmm2 -#define xdata3 %xmm3 -#define xdata4 %xmm4 -#define xdata5 %xmm5 -#define xdata6 %xmm6 -#define xdata7 %xmm7 -#define xcounter %xmm8 // CTR mode only -#define xiv %xmm8 // XCTR mode only -#define xbyteswap %xmm9 // CTR mode only -#define xtmp %xmm9 // XCTR mode only -#define xkey0 %xmm10 -#define xkey4 %xmm11 -#define xkey8 %xmm12 -#define xkey12 %xmm13 -#define xkeyA %xmm14 -#define xkeyB %xmm15 - -#define p_in %rdi -#define p_iv %rsi -#define p_keys %rdx -#define p_out %rcx -#define num_bytes %r8 -#define counter %r9 // XCTR mode only -#define tmp %r10 -#define DDQ_DATA 0 -#define XDATA 1 -#define KEY_128 1 -#define KEY_192 2 -#define KEY_256 3 - -.section .rodata -.align 16 - -byteswap_const: - .octa 0x000102030405060708090A0B0C0D0E0F -ddq_low_msk: - .octa 0x0000000000000000FFFFFFFFFFFFFFFF -ddq_high_add_1: - .octa 0x00000000000000010000000000000000 -ddq_add_1: - .octa 0x00000000000000000000000000000001 -ddq_add_2: - .octa 0x00000000000000000000000000000002 -ddq_add_3: - .octa 0x00000000000000000000000000000003 -ddq_add_4: - .octa 0x00000000000000000000000000000004 -ddq_add_5: - .octa 0x00000000000000000000000000000005 -ddq_add_6: - .octa 0x00000000000000000000000000000006 -ddq_add_7: - .octa 0x00000000000000000000000000000007 -ddq_add_8: - .octa 0x00000000000000000000000000000008 - -.text - -/* generate a unique variable for ddq_add_x */ - -/* generate a unique variable for xmm register */ -.macro setxdata n - var_xdata = %xmm\n -.endm - -/* club the numeric 'id' to the symbol 'name' */ - -.macro club name, id -.altmacro - .if \name == XDATA - setxdata %\id - .endif -.noaltmacro -.endm - -/* - * do_aes num_in_par load_keys key_len - * This increments p_in, but not p_out - */ -.macro do_aes b, k, key_len, xctr - .set by, \b - .set load_keys, \k - .set klen, \key_len - - .if (load_keys) - vmovdqa 0*16(p_keys), xkey0 - .endif - - .if \xctr - movq counter, xtmp - .set i, 0 - .rept (by) - club XDATA, i - vpaddq (ddq_add_1 + 16 * i)(%rip), xtmp, var_xdata - .set i, (i +1) - .endr - .set i, 0 - .rept (by) - club XDATA, i - vpxor xiv, var_xdata, var_xdata - .set i, (i +1) - .endr - .else - vpshufb xbyteswap, xcounter, xdata0 - .set i, 1 - .rept (by - 1) - club XDATA, i - vpaddq (ddq_add_1 + 16 * (i - 1))(%rip), xcounter, var_xdata - vptest ddq_low_msk(%rip), var_xdata - jnz 1f - vpaddq ddq_high_add_1(%rip), var_xdata, var_xdata - vpaddq ddq_high_add_1(%rip), xcounter, xcounter - 1: - vpshufb xbyteswap, var_xdata, var_xdata - .set i, (i +1) - .endr - .endif - - vmovdqa 1*16(p_keys), xkeyA - - vpxor xkey0, xdata0, xdata0 - .if \xctr - add $by, counter - .else - vpaddq (ddq_add_1 + 16 * (by - 1))(%rip), xcounter, xcounter - vptest ddq_low_msk(%rip), xcounter - jnz 1f - vpaddq ddq_high_add_1(%rip), xcounter, xcounter - 1: - .endif - - .set i, 1 - .rept (by - 1) - club XDATA, i - vpxor xkey0, var_xdata, var_xdata - .set i, (i +1) - .endr - - vmovdqa 2*16(p_keys), xkeyB - - .set i, 0 - .rept by - club XDATA, i - vaesenc xkeyA, var_xdata, var_xdata /* key 1 */ - .set i, (i +1) - .endr - - .if (klen == KEY_128) - .if (load_keys) - vmovdqa 3*16(p_keys), xkey4 - .endif - .else - vmovdqa 3*16(p_keys), xkeyA - .endif - - .set i, 0 - .rept by - club XDATA, i - vaesenc xkeyB, var_xdata, var_xdata /* key 2 */ - .set i, (i +1) - .endr - - add $(16*by), p_in - - .if (klen == KEY_128) - vmovdqa 4*16(p_keys), xkeyB - .else - .if (load_keys) - vmovdqa 4*16(p_keys), xkey4 - .endif - .endif - - .set i, 0 - .rept by - club XDATA, i - /* key 3 */ - .if (klen == KEY_128) - vaesenc xkey4, var_xdata, var_xdata - .else - vaesenc xkeyA, var_xdata, var_xdata - .endif - .set i, (i +1) - .endr - - vmovdqa 5*16(p_keys), xkeyA - - .set i, 0 - .rept by - club XDATA, i - /* key 4 */ - .if (klen == KEY_128) - vaesenc xkeyB, var_xdata, var_xdata - .else - vaesenc xkey4, var_xdata, var_xdata - .endif - .set i, (i +1) - .endr - - .if (klen == KEY_128) - .if (load_keys) - vmovdqa 6*16(p_keys), xkey8 - .endif - .else - vmovdqa 6*16(p_keys), xkeyB - .endif - - .set i, 0 - .rept by - club XDATA, i - vaesenc xkeyA, var_xdata, var_xdata /* key 5 */ - .set i, (i +1) - .endr - - vmovdqa 7*16(p_keys), xkeyA - - .set i, 0 - .rept by - club XDATA, i - /* key 6 */ - .if (klen == KEY_128) - vaesenc xkey8, var_xdata, var_xdata - .else - vaesenc xkeyB, var_xdata, var_xdata - .endif - .set i, (i +1) - .endr - - .if (klen == KEY_128) - vmovdqa 8*16(p_keys), xkeyB - .else - .if (load_keys) - vmovdqa 8*16(p_keys), xkey8 - .endif - .endif - - .set i, 0 - .rept by - club XDATA, i - vaesenc xkeyA, var_xdata, var_xdata /* key 7 */ - .set i, (i +1) - .endr - - .if (klen == KEY_128) - .if (load_keys) - vmovdqa 9*16(p_keys), xkey12 - .endif - .else - vmovdqa 9*16(p_keys), xkeyA - .endif - - .set i, 0 - .rept by - club XDATA, i - /* key 8 */ - .if (klen == KEY_128) - vaesenc xkeyB, var_xdata, var_xdata - .else - vaesenc xkey8, var_xdata, var_xdata - .endif - .set i, (i +1) - .endr - - vmovdqa 10*16(p_keys), xkeyB - - .set i, 0 - .rept by - club XDATA, i - /* key 9 */ - .if (klen == KEY_128) - vaesenc xkey12, var_xdata, var_xdata - .else - vaesenc xkeyA, var_xdata, var_xdata - .endif - .set i, (i +1) - .endr - - .if (klen != KEY_128) - vmovdqa 11*16(p_keys), xkeyA - .endif - - .set i, 0 - .rept by - club XDATA, i - /* key 10 */ - .if (klen == KEY_128) - vaesenclast xkeyB, var_xdata, var_xdata - .else - vaesenc xkeyB, var_xdata, var_xdata - .endif - .set i, (i +1) - .endr - - .if (klen != KEY_128) - .if (load_keys) - vmovdqa 12*16(p_keys), xkey12 - .endif - - .set i, 0 - .rept by - club XDATA, i - vaesenc xkeyA, var_xdata, var_xdata /* key 11 */ - .set i, (i +1) - .endr - - .if (klen == KEY_256) - vmovdqa 13*16(p_keys), xkeyA - .endif - - .set i, 0 - .rept by - club XDATA, i - .if (klen == KEY_256) - /* key 12 */ - vaesenc xkey12, var_xdata, var_xdata - .else - vaesenclast xkey12, var_xdata, var_xdata - .endif - .set i, (i +1) - .endr - - .if (klen == KEY_256) - vmovdqa 14*16(p_keys), xkeyB - - .set i, 0 - .rept by - club XDATA, i - /* key 13 */ - vaesenc xkeyA, var_xdata, var_xdata - .set i, (i +1) - .endr - - .set i, 0 - .rept by - club XDATA, i - /* key 14 */ - vaesenclast xkeyB, var_xdata, var_xdata - .set i, (i +1) - .endr - .endif - .endif - - .set i, 0 - .rept (by / 2) - .set j, (i+1) - VMOVDQ (i*16 - 16*by)(p_in), xkeyA - VMOVDQ (j*16 - 16*by)(p_in), xkeyB - club XDATA, i - vpxor xkeyA, var_xdata, var_xdata - club XDATA, j - vpxor xkeyB, var_xdata, var_xdata - .set i, (i+2) - .endr - - .if (i < by) - VMOVDQ (i*16 - 16*by)(p_in), xkeyA - club XDATA, i - vpxor xkeyA, var_xdata, var_xdata - .endif - - .set i, 0 - .rept by - club XDATA, i - VMOVDQ var_xdata, i*16(p_out) - .set i, (i+1) - .endr -.endm - -.macro do_aes_load val, key_len, xctr - do_aes \val, 1, \key_len, \xctr -.endm - -.macro do_aes_noload val, key_len, xctr - do_aes \val, 0, \key_len, \xctr -.endm - -/* main body of aes ctr load */ - -.macro do_aes_ctrmain key_len, xctr - cmp $16, num_bytes - jb .Ldo_return2\xctr\key_len - - .if \xctr - shr $4, counter - vmovdqu (p_iv), xiv - .else - vmovdqa byteswap_const(%rip), xbyteswap - vmovdqu (p_iv), xcounter - vpshufb xbyteswap, xcounter, xcounter - .endif - - mov num_bytes, tmp - and $(7*16), tmp - jz .Lmult_of_8_blks\xctr\key_len - - /* 1 <= tmp <= 7 */ - cmp $(4*16), tmp - jg .Lgt4\xctr\key_len - je .Leq4\xctr\key_len - -.Llt4\xctr\key_len: - cmp $(2*16), tmp - jg .Leq3\xctr\key_len - je .Leq2\xctr\key_len - -.Leq1\xctr\key_len: - do_aes_load 1, \key_len, \xctr - add $(1*16), p_out - and $(~7*16), num_bytes - jz .Ldo_return2\xctr\key_len - jmp .Lmain_loop2\xctr\key_len - -.Leq2\xctr\key_len: - do_aes_load 2, \key_len, \xctr - add $(2*16), p_out - and $(~7*16), num_bytes - jz .Ldo_return2\xctr\key_len - jmp .Lmain_loop2\xctr\key_len - - -.Leq3\xctr\key_len: - do_aes_load 3, \key_len, \xctr - add $(3*16), p_out - and $(~7*16), num_bytes - jz .Ldo_return2\xctr\key_len - jmp .Lmain_loop2\xctr\key_len - -.Leq4\xctr\key_len: - do_aes_load 4, \key_len, \xctr - add $(4*16), p_out - and $(~7*16), num_bytes - jz .Ldo_return2\xctr\key_len - jmp .Lmain_loop2\xctr\key_len - -.Lgt4\xctr\key_len: - cmp $(6*16), tmp - jg .Leq7\xctr\key_len - je .Leq6\xctr\key_len - -.Leq5\xctr\key_len: - do_aes_load 5, \key_len, \xctr - add $(5*16), p_out - and $(~7*16), num_bytes - jz .Ldo_return2\xctr\key_len - jmp .Lmain_loop2\xctr\key_len - -.Leq6\xctr\key_len: - do_aes_load 6, \key_len, \xctr - add $(6*16), p_out - and $(~7*16), num_bytes - jz .Ldo_return2\xctr\key_len - jmp .Lmain_loop2\xctr\key_len - -.Leq7\xctr\key_len: - do_aes_load 7, \key_len, \xctr - add $(7*16), p_out - and $(~7*16), num_bytes - jz .Ldo_return2\xctr\key_len - jmp .Lmain_loop2\xctr\key_len - -.Lmult_of_8_blks\xctr\key_len: - .if (\key_len != KEY_128) - vmovdqa 0*16(p_keys), xkey0 - vmovdqa 4*16(p_keys), xkey4 - vmovdqa 8*16(p_keys), xkey8 - vmovdqa 12*16(p_keys), xkey12 - .else - vmovdqa 0*16(p_keys), xkey0 - vmovdqa 3*16(p_keys), xkey4 - vmovdqa 6*16(p_keys), xkey8 - vmovdqa 9*16(p_keys), xkey12 - .endif -.align 16 -.Lmain_loop2\xctr\key_len: - /* num_bytes is a multiple of 8 and >0 */ - do_aes_noload 8, \key_len, \xctr - add $(8*16), p_out - sub $(8*16), num_bytes - jne .Lmain_loop2\xctr\key_len - -.Ldo_return2\xctr\key_len: - .if !\xctr - /* return updated IV */ - vpshufb xbyteswap, xcounter, xcounter - vmovdqu xcounter, (p_iv) - .endif - RET -.endm - -/* - * routine to do AES128 CTR enc/decrypt "by8" - * XMM registers are clobbered. - * Saving/restoring must be done at a higher level - * aes_ctr_enc_128_avx_by8(void *in, void *iv, void *keys, void *out, - * unsigned int num_bytes) - */ -SYM_FUNC_START(aes_ctr_enc_128_avx_by8) - /* call the aes main loop */ - do_aes_ctrmain KEY_128 0 - -SYM_FUNC_END(aes_ctr_enc_128_avx_by8) - -/* - * routine to do AES192 CTR enc/decrypt "by8" - * XMM registers are clobbered. - * Saving/restoring must be done at a higher level - * aes_ctr_enc_192_avx_by8(void *in, void *iv, void *keys, void *out, - * unsigned int num_bytes) - */ -SYM_FUNC_START(aes_ctr_enc_192_avx_by8) - /* call the aes main loop */ - do_aes_ctrmain KEY_192 0 - -SYM_FUNC_END(aes_ctr_enc_192_avx_by8) - -/* - * routine to do AES256 CTR enc/decrypt "by8" - * XMM registers are clobbered. - * Saving/restoring must be done at a higher level - * aes_ctr_enc_256_avx_by8(void *in, void *iv, void *keys, void *out, - * unsigned int num_bytes) - */ -SYM_FUNC_START(aes_ctr_enc_256_avx_by8) - /* call the aes main loop */ - do_aes_ctrmain KEY_256 0 - -SYM_FUNC_END(aes_ctr_enc_256_avx_by8) - -/* - * routine to do AES128 XCTR enc/decrypt "by8" - * XMM registers are clobbered. - * Saving/restoring must be done at a higher level - * aes_xctr_enc_128_avx_by8(const u8 *in, const u8 *iv, const void *keys, - * u8* out, unsigned int num_bytes, unsigned int byte_ctr) - */ -SYM_FUNC_START(aes_xctr_enc_128_avx_by8) - /* call the aes main loop */ - do_aes_ctrmain KEY_128 1 - -SYM_FUNC_END(aes_xctr_enc_128_avx_by8) - -/* - * routine to do AES192 XCTR enc/decrypt "by8" - * XMM registers are clobbered. - * Saving/restoring must be done at a higher level - * aes_xctr_enc_192_avx_by8(const u8 *in, const u8 *iv, const void *keys, - * u8* out, unsigned int num_bytes, unsigned int byte_ctr) - */ -SYM_FUNC_START(aes_xctr_enc_192_avx_by8) - /* call the aes main loop */ - do_aes_ctrmain KEY_192 1 - -SYM_FUNC_END(aes_xctr_enc_192_avx_by8) - -/* - * routine to do AES256 XCTR enc/decrypt "by8" - * XMM registers are clobbered. - * Saving/restoring must be done at a higher level - * aes_xctr_enc_256_avx_by8(const u8 *in, const u8 *iv, const void *keys, - * u8* out, unsigned int num_bytes, unsigned int byte_ctr) - */ -SYM_FUNC_START(aes_xctr_enc_256_avx_by8) - /* call the aes main loop */ - do_aes_ctrmain KEY_256 1 - -SYM_FUNC_END(aes_xctr_enc_256_avx_by8) |