summaryrefslogtreecommitdiff
path: root/arch/x86/crypto/aesni-intel_glue.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/crypto/aesni-intel_glue.c')
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c1588
1 files changed, 1015 insertions, 573 deletions
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index b1d90c25975a..11e95fc62636 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * Support for Intel AES-NI instructions. This file contains glue
- * code, the real AES implementation is in intel-aes_asm.S.
+ * Support for AES-NI and VAES instructions. This file contains glue code.
+ * The real AES implementations are in aesni-intel_asm.S and other .S files.
*
* Copyright (C) 2008, Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
@@ -13,6 +13,8 @@
* Tadeusz Struk (tadeusz.struk@intel.com)
* Aidan O'Mahony (aidan.o.mahony@intel.com)
* Copyright (c) 2010, Intel Corporation.
+ *
+ * Copyright 2024 Google LLC
*/
#include <linux/hardirq.h>
@@ -40,46 +42,15 @@
#define AESNI_ALIGN 16
#define AESNI_ALIGN_ATTR __attribute__ ((__aligned__(AESNI_ALIGN)))
#define AES_BLOCK_MASK (~(AES_BLOCK_SIZE - 1))
-#define RFC4106_HASH_SUBKEY_SIZE 16
#define AESNI_ALIGN_EXTRA ((AESNI_ALIGN - 1) & ~(CRYPTO_MINALIGN - 1))
#define CRYPTO_AES_CTX_SIZE (sizeof(struct crypto_aes_ctx) + AESNI_ALIGN_EXTRA)
#define XTS_AES_CTX_SIZE (sizeof(struct aesni_xts_ctx) + AESNI_ALIGN_EXTRA)
-/* This data is stored at the end of the crypto_tfm struct.
- * It's a type of per "session" data storage location.
- * This needs to be 16 byte aligned.
- */
-struct aesni_rfc4106_gcm_ctx {
- u8 hash_subkey[16] AESNI_ALIGN_ATTR;
- struct crypto_aes_ctx aes_key_expanded AESNI_ALIGN_ATTR;
- u8 nonce[4];
-};
-
-struct generic_gcmaes_ctx {
- u8 hash_subkey[16] AESNI_ALIGN_ATTR;
- struct crypto_aes_ctx aes_key_expanded AESNI_ALIGN_ATTR;
-};
-
struct aesni_xts_ctx {
struct crypto_aes_ctx tweak_ctx AESNI_ALIGN_ATTR;
struct crypto_aes_ctx crypt_ctx AESNI_ALIGN_ATTR;
};
-#define GCM_BLOCK_LEN 16
-
-struct gcm_context_data {
- /* init, update and finalize context data */
- u8 aad_hash[GCM_BLOCK_LEN];
- u64 aad_length;
- u64 in_length;
- u8 partial_block_enc_key[GCM_BLOCK_LEN];
- u8 orig_IV[GCM_BLOCK_LEN];
- u8 current_counter[GCM_BLOCK_LEN];
- u64 partial_block_len;
- u64 unused;
- u8 hash_keys[GCM_BLOCK_LEN * 16];
-};
-
static inline void *aes_align_addr(void *addr)
{
if (crypto_tfm_ctx_alignment() >= AESNI_ALIGN)
@@ -87,8 +58,8 @@ static inline void *aes_align_addr(void *addr)
return PTR_ALIGN(addr, AESNI_ALIGN);
}
-asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
- unsigned int key_len);
+asmlinkage void aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
+ unsigned int key_len);
asmlinkage void aesni_enc(const void *ctx, u8 *out, const u8 *in);
asmlinkage void aesni_dec(const void *ctx, u8 *out, const u8 *in);
asmlinkage void aesni_ecb_enc(struct crypto_aes_ctx *ctx, u8 *out,
@@ -104,14 +75,11 @@ asmlinkage void aesni_cts_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
asmlinkage void aesni_cts_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len, u8 *iv);
-#define AVX_GEN2_OPTSIZE 640
-#define AVX_GEN4_OPTSIZE 4096
-
-asmlinkage void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *out,
- const u8 *in, unsigned int len, u8 *iv);
+asmlinkage void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *out,
+ const u8 *in, unsigned int len, u8 *iv);
-asmlinkage void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *out,
- const u8 *in, unsigned int len, u8 *iv);
+asmlinkage void aesni_xts_dec(const struct crypto_aes_ctx *ctx, u8 *out,
+ const u8 *in, unsigned int len, u8 *iv);
#ifdef CONFIG_X86_64
@@ -119,23 +87,6 @@ asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len, u8 *iv);
DEFINE_STATIC_CALL(aesni_ctr_enc_tfm, aesni_ctr_enc);
-/* Scatter / Gather routines, with args similar to above */
-asmlinkage void aesni_gcm_init(void *ctx,
- struct gcm_context_data *gdata,
- u8 *iv,
- u8 *hash_subkey, const u8 *aad,
- unsigned long aad_len);
-asmlinkage void aesni_gcm_enc_update(void *ctx,
- struct gcm_context_data *gdata, u8 *out,
- const u8 *in, unsigned long plaintext_len);
-asmlinkage void aesni_gcm_dec_update(void *ctx,
- struct gcm_context_data *gdata, u8 *out,
- const u8 *in,
- unsigned long ciphertext_len);
-asmlinkage void aesni_gcm_finalize(void *ctx,
- struct gcm_context_data *gdata,
- u8 *auth_tag, unsigned long auth_tag_len);
-
asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv,
void *keys, u8 *out, unsigned int num_bytes);
asmlinkage void aes_ctr_enc_192_avx_by8(const u8 *in, u8 *iv,
@@ -155,67 +106,6 @@ asmlinkage void aes_xctr_enc_192_avx_by8(const u8 *in, const u8 *iv,
asmlinkage void aes_xctr_enc_256_avx_by8(const u8 *in, const u8 *iv,
const void *keys, u8 *out, unsigned int num_bytes,
unsigned int byte_ctr);
-
-/*
- * asmlinkage void aesni_gcm_init_avx_gen2()
- * gcm_data *my_ctx_data, context data
- * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
- */
-asmlinkage void aesni_gcm_init_avx_gen2(void *my_ctx_data,
- struct gcm_context_data *gdata,
- u8 *iv,
- u8 *hash_subkey,
- const u8 *aad,
- unsigned long aad_len);
-
-asmlinkage void aesni_gcm_enc_update_avx_gen2(void *ctx,
- struct gcm_context_data *gdata, u8 *out,
- const u8 *in, unsigned long plaintext_len);
-asmlinkage void aesni_gcm_dec_update_avx_gen2(void *ctx,
- struct gcm_context_data *gdata, u8 *out,
- const u8 *in,
- unsigned long ciphertext_len);
-asmlinkage void aesni_gcm_finalize_avx_gen2(void *ctx,
- struct gcm_context_data *gdata,
- u8 *auth_tag, unsigned long auth_tag_len);
-
-/*
- * asmlinkage void aesni_gcm_init_avx_gen4()
- * gcm_data *my_ctx_data, context data
- * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
- */
-asmlinkage void aesni_gcm_init_avx_gen4(void *my_ctx_data,
- struct gcm_context_data *gdata,
- u8 *iv,
- u8 *hash_subkey,
- const u8 *aad,
- unsigned long aad_len);
-
-asmlinkage void aesni_gcm_enc_update_avx_gen4(void *ctx,
- struct gcm_context_data *gdata, u8 *out,
- const u8 *in, unsigned long plaintext_len);
-asmlinkage void aesni_gcm_dec_update_avx_gen4(void *ctx,
- struct gcm_context_data *gdata, u8 *out,
- const u8 *in,
- unsigned long ciphertext_len);
-asmlinkage void aesni_gcm_finalize_avx_gen4(void *ctx,
- struct gcm_context_data *gdata,
- u8 *auth_tag, unsigned long auth_tag_len);
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(gcm_use_avx);
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(gcm_use_avx2);
-
-static inline struct
-aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm)
-{
- return aes_align_addr(crypto_aead_ctx(tfm));
-}
-
-static inline struct
-generic_gcmaes_ctx *generic_gcmaes_ctx_get(struct crypto_aead *tfm)
-{
- return aes_align_addr(crypto_aead_ctx(tfm));
-}
#endif
static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx)
@@ -233,19 +123,17 @@ static int aes_set_key_common(struct crypto_aes_ctx *ctx,
{
int err;
- if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 &&
- key_len != AES_KEYSIZE_256)
- return -EINVAL;
-
if (!crypto_simd_usable())
- err = aes_expandkey(ctx, in_key, key_len);
- else {
- kernel_fpu_begin();
- err = aesni_set_key(ctx, in_key, key_len);
- kernel_fpu_end();
- }
+ return aes_expandkey(ctx, in_key, key_len);
- return err;
+ err = aes_check_keylen(key_len);
+ if (err)
+ return err;
+
+ kernel_fpu_begin();
+ aesni_set_key(ctx, in_key, key_len);
+ kernel_fpu_end();
+ return 0;
}
static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
@@ -591,293 +479,9 @@ static int xctr_crypt(struct skcipher_request *req)
}
return err;
}
-
-static int
-rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len)
-{
- struct crypto_aes_ctx ctx;
- int ret;
-
- ret = aes_expandkey(&ctx, key, key_len);
- if (ret)
- return ret;
-
- /* Clear the data in the hash sub key container to zero.*/
- /* We want to cipher all zeros to create the hash sub key. */
- memset(hash_subkey, 0, RFC4106_HASH_SUBKEY_SIZE);
-
- aes_encrypt(&ctx, hash_subkey, hash_subkey);
-
- memzero_explicit(&ctx, sizeof(ctx));
- return 0;
-}
-
-static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key,
- unsigned int key_len)
-{
- struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(aead);
-
- if (key_len < 4)
- return -EINVAL;
-
- /*Account for 4 byte nonce at the end.*/
- key_len -= 4;
-
- memcpy(ctx->nonce, key + key_len, sizeof(ctx->nonce));
-
- return aes_set_key_common(&ctx->aes_key_expanded, key, key_len) ?:
- rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len);
-}
-
-/* This is the Integrity Check Value (aka the authentication tag) length and can
- * be 8, 12 or 16 bytes long. */
-static int common_rfc4106_set_authsize(struct crypto_aead *aead,
- unsigned int authsize)
-{
- switch (authsize) {
- case 8:
- case 12:
- case 16:
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int generic_gcmaes_set_authsize(struct crypto_aead *tfm,
- unsigned int authsize)
-{
- switch (authsize) {
- case 4:
- case 8:
- case 12:
- case 13:
- case 14:
- case 15:
- case 16:
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
- unsigned int assoclen, u8 *hash_subkey,
- u8 *iv, void *aes_ctx, u8 *auth_tag,
- unsigned long auth_tag_len)
-{
- u8 databuf[sizeof(struct gcm_context_data) + (AESNI_ALIGN - 8)] __aligned(8);
- struct gcm_context_data *data = PTR_ALIGN((void *)databuf, AESNI_ALIGN);
- unsigned long left = req->cryptlen;
- struct scatter_walk assoc_sg_walk;
- struct skcipher_walk walk;
- bool do_avx, do_avx2;
- u8 *assocmem = NULL;
- u8 *assoc;
- int err;
-
- if (!enc)
- left -= auth_tag_len;
-
- do_avx = (left >= AVX_GEN2_OPTSIZE);
- do_avx2 = (left >= AVX_GEN4_OPTSIZE);
-
- /* Linearize assoc, if not already linear */
- if (req->src->length >= assoclen && req->src->length) {
- scatterwalk_start(&assoc_sg_walk, req->src);
- assoc = scatterwalk_map(&assoc_sg_walk);
- } else {
- gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
- GFP_KERNEL : GFP_ATOMIC;
-
- /* assoc can be any length, so must be on heap */
- assocmem = kmalloc(assoclen, flags);
- if (unlikely(!assocmem))
- return -ENOMEM;
- assoc = assocmem;
-
- scatterwalk_map_and_copy(assoc, req->src, 0, assoclen, 0);
- }
-
- kernel_fpu_begin();
- if (static_branch_likely(&gcm_use_avx2) && do_avx2)
- aesni_gcm_init_avx_gen4(aes_ctx, data, iv, hash_subkey, assoc,
- assoclen);
- else if (static_branch_likely(&gcm_use_avx) && do_avx)
- aesni_gcm_init_avx_gen2(aes_ctx, data, iv, hash_subkey, assoc,
- assoclen);
- else
- aesni_gcm_init(aes_ctx, data, iv, hash_subkey, assoc, assoclen);
- kernel_fpu_end();
-
- if (!assocmem)
- scatterwalk_unmap(assoc);
- else
- kfree(assocmem);
-
- err = enc ? skcipher_walk_aead_encrypt(&walk, req, false)
- : skcipher_walk_aead_decrypt(&walk, req, false);
-
- while (walk.nbytes > 0) {
- kernel_fpu_begin();
- if (static_branch_likely(&gcm_use_avx2) && do_avx2) {
- if (enc)
- aesni_gcm_enc_update_avx_gen4(aes_ctx, data,
- walk.dst.virt.addr,
- walk.src.virt.addr,
- walk.nbytes);
- else
- aesni_gcm_dec_update_avx_gen4(aes_ctx, data,
- walk.dst.virt.addr,
- walk.src.virt.addr,
- walk.nbytes);
- } else if (static_branch_likely(&gcm_use_avx) && do_avx) {
- if (enc)
- aesni_gcm_enc_update_avx_gen2(aes_ctx, data,
- walk.dst.virt.addr,
- walk.src.virt.addr,
- walk.nbytes);
- else
- aesni_gcm_dec_update_avx_gen2(aes_ctx, data,
- walk.dst.virt.addr,
- walk.src.virt.addr,
- walk.nbytes);
- } else if (enc) {
- aesni_gcm_enc_update(aes_ctx, data, walk.dst.virt.addr,
- walk.src.virt.addr, walk.nbytes);
- } else {
- aesni_gcm_dec_update(aes_ctx, data, walk.dst.virt.addr,
- walk.src.virt.addr, walk.nbytes);
- }
- kernel_fpu_end();
-
- err = skcipher_walk_done(&walk, 0);
- }
-
- if (err)
- return err;
-
- kernel_fpu_begin();
- if (static_branch_likely(&gcm_use_avx2) && do_avx2)
- aesni_gcm_finalize_avx_gen4(aes_ctx, data, auth_tag,
- auth_tag_len);
- else if (static_branch_likely(&gcm_use_avx) && do_avx)
- aesni_gcm_finalize_avx_gen2(aes_ctx, data, auth_tag,
- auth_tag_len);
- else
- aesni_gcm_finalize(aes_ctx, data, auth_tag, auth_tag_len);
- kernel_fpu_end();
-
- return 0;
-}
-
-static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen,
- u8 *hash_subkey, u8 *iv, void *aes_ctx)
-{
- struct crypto_aead *tfm = crypto_aead_reqtfm(req);
- unsigned long auth_tag_len = crypto_aead_authsize(tfm);
- u8 auth_tag[16];
- int err;
-
- err = gcmaes_crypt_by_sg(true, req, assoclen, hash_subkey, iv, aes_ctx,
- auth_tag, auth_tag_len);
- if (err)
- return err;
-
- scatterwalk_map_and_copy(auth_tag, req->dst,
- req->assoclen + req->cryptlen,
- auth_tag_len, 1);
- return 0;
-}
-
-static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen,
- u8 *hash_subkey, u8 *iv, void *aes_ctx)
-{
- struct crypto_aead *tfm = crypto_aead_reqtfm(req);
- unsigned long auth_tag_len = crypto_aead_authsize(tfm);
- u8 auth_tag_msg[16];
- u8 auth_tag[16];
- int err;
-
- err = gcmaes_crypt_by_sg(false, req, assoclen, hash_subkey, iv, aes_ctx,
- auth_tag, auth_tag_len);
- if (err)
- return err;
-
- /* Copy out original auth_tag */
- scatterwalk_map_and_copy(auth_tag_msg, req->src,
- req->assoclen + req->cryptlen - auth_tag_len,
- auth_tag_len, 0);
-
- /* Compare generated tag with passed in tag. */
- if (crypto_memneq(auth_tag_msg, auth_tag, auth_tag_len)) {
- memzero_explicit(auth_tag, sizeof(auth_tag));
- return -EBADMSG;
- }
- return 0;
-}
-
-static int helper_rfc4106_encrypt(struct aead_request *req)
-{
- struct crypto_aead *tfm = crypto_aead_reqtfm(req);
- struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
- void *aes_ctx = &(ctx->aes_key_expanded);
- u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8);
- u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN);
- unsigned int i;
- __be32 counter = cpu_to_be32(1);
-
- /* Assuming we are supporting rfc4106 64-bit extended */
- /* sequence numbers We need to have the AAD length equal */
- /* to 16 or 20 bytes */
- if (unlikely(req->assoclen != 16 && req->assoclen != 20))
- return -EINVAL;
-
- /* IV below built */
- for (i = 0; i < 4; i++)
- *(iv+i) = ctx->nonce[i];
- for (i = 0; i < 8; i++)
- *(iv+4+i) = req->iv[i];
- *((__be32 *)(iv+12)) = counter;
-
- return gcmaes_encrypt(req, req->assoclen - 8, ctx->hash_subkey, iv,
- aes_ctx);
-}
-
-static int helper_rfc4106_decrypt(struct aead_request *req)
-{
- __be32 counter = cpu_to_be32(1);
- struct crypto_aead *tfm = crypto_aead_reqtfm(req);
- struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
- void *aes_ctx = &(ctx->aes_key_expanded);
- u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8);
- u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN);
- unsigned int i;
-
- if (unlikely(req->assoclen != 16 && req->assoclen != 20))
- return -EINVAL;
-
- /* Assuming we are supporting rfc4106 64-bit extended */
- /* sequence numbers We need to have the AAD length */
- /* equal to 16 or 20 bytes */
-
- /* IV below built */
- for (i = 0; i < 4; i++)
- *(iv+i) = ctx->nonce[i];
- for (i = 0; i < 8; i++)
- *(iv+4+i) = req->iv[i];
- *((__be32 *)(iv+12)) = counter;
-
- return gcmaes_decrypt(req, req->assoclen - 8, ctx->hash_subkey, iv,
- aes_ctx);
-}
#endif
-static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key,
+static int xts_setkey_aesni(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keylen)
{
struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm);
@@ -898,108 +502,149 @@ static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key,
return aes_set_key_common(&ctx->tweak_ctx, key + keylen, keylen);
}
-static int xts_crypt(struct skcipher_request *req, bool encrypt)
+typedef void (*xts_encrypt_iv_func)(const struct crypto_aes_ctx *tweak_key,
+ u8 iv[AES_BLOCK_SIZE]);
+typedef void (*xts_crypt_func)(const struct crypto_aes_ctx *key,
+ const u8 *src, u8 *dst, int len,
+ u8 tweak[AES_BLOCK_SIZE]);
+
+/* This handles cases where the source and/or destination span pages. */
+static noinline int
+xts_crypt_slowpath(struct skcipher_request *req, xts_crypt_func crypt_func)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm);
+ const struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm);
int tail = req->cryptlen % AES_BLOCK_SIZE;
+ struct scatterlist sg_src[2], sg_dst[2];
struct skcipher_request subreq;
struct skcipher_walk walk;
+ struct scatterlist *src, *dst;
int err;
- if (req->cryptlen < AES_BLOCK_SIZE)
- return -EINVAL;
-
- err = skcipher_walk_virt(&walk, req, false);
- if (!walk.nbytes)
- return err;
-
- if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
- int blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
-
- skcipher_walk_abort(&walk);
-
+ /*
+ * If the message length isn't divisible by the AES block size, then
+ * separate off the last full block and the partial block. This ensures
+ * that they are processed in the same call to the assembly function,
+ * which is required for ciphertext stealing.
+ */
+ if (tail) {
skcipher_request_set_tfm(&subreq, tfm);
skcipher_request_set_callback(&subreq,
skcipher_request_flags(req),
NULL, NULL);
skcipher_request_set_crypt(&subreq, req->src, req->dst,
- blocks * AES_BLOCK_SIZE, req->iv);
+ req->cryptlen - tail - AES_BLOCK_SIZE,
+ req->iv);
req = &subreq;
-
- err = skcipher_walk_virt(&walk, req, false);
- if (!walk.nbytes)
- return err;
- } else {
- tail = 0;
}
- kernel_fpu_begin();
+ err = skcipher_walk_virt(&walk, req, false);
- /* calculate first value of T */
- aesni_enc(&ctx->tweak_ctx, walk.iv, walk.iv);
+ while (walk.nbytes) {
+ kernel_fpu_begin();
+ (*crypt_func)(&ctx->crypt_ctx,
+ walk.src.virt.addr, walk.dst.virt.addr,
+ walk.nbytes & ~(AES_BLOCK_SIZE - 1), req->iv);
+ kernel_fpu_end();
+ err = skcipher_walk_done(&walk,
+ walk.nbytes & (AES_BLOCK_SIZE - 1));
+ }
- while (walk.nbytes > 0) {
- int nbytes = walk.nbytes;
+ if (err || !tail)
+ return err;
- if (nbytes < walk.total)
- nbytes &= ~(AES_BLOCK_SIZE - 1);
+ /* Do ciphertext stealing with the last full block and partial block. */
- if (encrypt)
- aesni_xts_encrypt(&ctx->crypt_ctx,
- walk.dst.virt.addr, walk.src.virt.addr,
- nbytes, walk.iv);
- else
- aesni_xts_decrypt(&ctx->crypt_ctx,
- walk.dst.virt.addr, walk.src.virt.addr,
- nbytes, walk.iv);
- kernel_fpu_end();
+ dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
+ if (req->dst != req->src)
+ dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
- err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
+ req->iv);
- if (walk.nbytes > 0)
- kernel_fpu_begin();
- }
+ err = skcipher_walk_virt(&walk, req, false);
+ if (err)
+ return err;
+
+ kernel_fpu_begin();
+ (*crypt_func)(&ctx->crypt_ctx, walk.src.virt.addr, walk.dst.virt.addr,
+ walk.nbytes, req->iv);
+ kernel_fpu_end();
- if (unlikely(tail > 0 && !err)) {
- struct scatterlist sg_src[2], sg_dst[2];
- struct scatterlist *src, *dst;
+ return skcipher_walk_done(&walk, 0);
+}
- dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
- if (req->dst != req->src)
- dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
+/* __always_inline to avoid indirect call in fastpath */
+static __always_inline int
+xts_crypt(struct skcipher_request *req, xts_encrypt_iv_func encrypt_iv,
+ xts_crypt_func crypt_func)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm);
+ const unsigned int cryptlen = req->cryptlen;
+ struct scatterlist *src = req->src;
+ struct scatterlist *dst = req->dst;
- skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
- req->iv);
+ if (unlikely(cryptlen < AES_BLOCK_SIZE))
+ return -EINVAL;
- err = skcipher_walk_virt(&walk, &subreq, false);
- if (err)
- return err;
+ kernel_fpu_begin();
+ (*encrypt_iv)(&ctx->tweak_ctx, req->iv);
- kernel_fpu_begin();
- if (encrypt)
- aesni_xts_encrypt(&ctx->crypt_ctx,
- walk.dst.virt.addr, walk.src.virt.addr,
- walk.nbytes, walk.iv);
- else
- aesni_xts_decrypt(&ctx->crypt_ctx,
- walk.dst.virt.addr, walk.src.virt.addr,
- walk.nbytes, walk.iv);
+ /*
+ * In practice, virtually all XTS plaintexts and ciphertexts are either
+ * 512 or 4096 bytes, aligned such that they don't span page boundaries.
+ * To optimize the performance of these cases, and also any other case
+ * where no page boundary is spanned, the below fast-path handles
+ * single-page sources and destinations as efficiently as possible.
+ */
+ if (likely(src->length >= cryptlen && dst->length >= cryptlen &&
+ src->offset + cryptlen <= PAGE_SIZE &&
+ dst->offset + cryptlen <= PAGE_SIZE)) {
+ struct page *src_page = sg_page(src);
+ struct page *dst_page = sg_page(dst);
+ void *src_virt = kmap_local_page(src_page) + src->offset;
+ void *dst_virt = kmap_local_page(dst_page) + dst->offset;
+
+ (*crypt_func)(&ctx->crypt_ctx, src_virt, dst_virt, cryptlen,
+ req->iv);
+ kunmap_local(dst_virt);
+ kunmap_local(src_virt);
kernel_fpu_end();
-
- err = skcipher_walk_done(&walk, 0);
+ return 0;
}
- return err;
+ kernel_fpu_end();
+ return xts_crypt_slowpath(req, crypt_func);
}
-static int xts_encrypt(struct skcipher_request *req)
+static void aesni_xts_encrypt_iv(const struct crypto_aes_ctx *tweak_key,
+ u8 iv[AES_BLOCK_SIZE])
{
- return xts_crypt(req, true);
+ aesni_enc(tweak_key, iv, iv);
}
-static int xts_decrypt(struct skcipher_request *req)
+static void aesni_xts_encrypt(const struct crypto_aes_ctx *key,
+ const u8 *src, u8 *dst, int len,
+ u8 tweak[AES_BLOCK_SIZE])
{
- return xts_crypt(req, false);
+ aesni_xts_enc(key, dst, src, len, tweak);
+}
+
+static void aesni_xts_decrypt(const struct crypto_aes_ctx *key,
+ const u8 *src, u8 *dst, int len,
+ u8 tweak[AES_BLOCK_SIZE])
+{
+ aesni_xts_dec(key, dst, src, len, tweak);
+}
+
+static int xts_encrypt_aesni(struct skcipher_request *req)
+{
+ return xts_crypt(req, aesni_xts_encrypt_iv, aesni_xts_encrypt);
+}
+
+static int xts_decrypt_aesni(struct skcipher_request *req)
+{
+ return xts_crypt(req, aesni_xts_encrypt_iv, aesni_xts_decrypt);
}
static struct crypto_alg aesni_cipher_alg = {
@@ -1103,9 +748,9 @@ static struct skcipher_alg aesni_skciphers[] = {
.max_keysize = 2 * AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.walksize = 2 * AES_BLOCK_SIZE,
- .setkey = xts_aesni_setkey,
- .encrypt = xts_encrypt,
- .decrypt = xts_decrypt,
+ .setkey = xts_setkey_aesni,
+ .encrypt = xts_encrypt_aesni,
+ .decrypt = xts_decrypt_aesni,
}
};
@@ -1137,90 +782,887 @@ static struct skcipher_alg aesni_xctr = {
};
static struct simd_skcipher_alg *aesni_simd_xctr;
-#endif /* CONFIG_X86_64 */
-#ifdef CONFIG_X86_64
-static int generic_gcmaes_set_key(struct crypto_aead *aead, const u8 *key,
- unsigned int key_len)
+asmlinkage void aes_xts_encrypt_iv(const struct crypto_aes_ctx *tweak_key,
+ u8 iv[AES_BLOCK_SIZE]);
+
+#define DEFINE_XTS_ALG(suffix, driver_name, priority) \
+ \
+asmlinkage void \
+aes_xts_encrypt_##suffix(const struct crypto_aes_ctx *key, const u8 *src, \
+ u8 *dst, int len, u8 tweak[AES_BLOCK_SIZE]); \
+asmlinkage void \
+aes_xts_decrypt_##suffix(const struct crypto_aes_ctx *key, const u8 *src, \
+ u8 *dst, int len, u8 tweak[AES_BLOCK_SIZE]); \
+ \
+static int xts_encrypt_##suffix(struct skcipher_request *req) \
+{ \
+ return xts_crypt(req, aes_xts_encrypt_iv, aes_xts_encrypt_##suffix); \
+} \
+ \
+static int xts_decrypt_##suffix(struct skcipher_request *req) \
+{ \
+ return xts_crypt(req, aes_xts_encrypt_iv, aes_xts_decrypt_##suffix); \
+} \
+ \
+static struct skcipher_alg aes_xts_alg_##suffix = { \
+ .base = { \
+ .cra_name = "__xts(aes)", \
+ .cra_driver_name = "__" driver_name, \
+ .cra_priority = priority, \
+ .cra_flags = CRYPTO_ALG_INTERNAL, \
+ .cra_blocksize = AES_BLOCK_SIZE, \
+ .cra_ctxsize = XTS_AES_CTX_SIZE, \
+ .cra_module = THIS_MODULE, \
+ }, \
+ .min_keysize = 2 * AES_MIN_KEY_SIZE, \
+ .max_keysize = 2 * AES_MAX_KEY_SIZE, \
+ .ivsize = AES_BLOCK_SIZE, \
+ .walksize = 2 * AES_BLOCK_SIZE, \
+ .setkey = xts_setkey_aesni, \
+ .encrypt = xts_encrypt_##suffix, \
+ .decrypt = xts_decrypt_##suffix, \
+}; \
+ \
+static struct simd_skcipher_alg *aes_xts_simdalg_##suffix
+
+DEFINE_XTS_ALG(aesni_avx, "xts-aes-aesni-avx", 500);
+#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
+DEFINE_XTS_ALG(vaes_avx2, "xts-aes-vaes-avx2", 600);
+DEFINE_XTS_ALG(vaes_avx10_256, "xts-aes-vaes-avx10_256", 700);
+DEFINE_XTS_ALG(vaes_avx10_512, "xts-aes-vaes-avx10_512", 800);
+#endif
+
+/* The common part of the x86_64 AES-GCM key struct */
+struct aes_gcm_key {
+ /* Expanded AES key and the AES key length in bytes */
+ struct crypto_aes_ctx aes_key;
+
+ /* RFC4106 nonce (used only by the rfc4106 algorithms) */
+ u32 rfc4106_nonce;
+};
+
+/* Key struct used by the AES-NI implementations of AES-GCM */
+struct aes_gcm_key_aesni {
+ /*
+ * Common part of the key. The assembly code requires 16-byte alignment
+ * for the round keys; we get this by them being located at the start of
+ * the struct and the whole struct being 16-byte aligned.
+ */
+ struct aes_gcm_key base;
+
+ /*
+ * Powers of the hash key H^8 through H^1. These are 128-bit values.
+ * They all have an extra factor of x^-1 and are byte-reversed. 16-byte
+ * alignment is required by the assembly code.
+ */
+ u64 h_powers[8][2] __aligned(16);
+
+ /*
+ * h_powers_xored[i] contains the two 64-bit halves of h_powers[i] XOR'd
+ * together. It's used for Karatsuba multiplication. 16-byte alignment
+ * is required by the assembly code.
+ */
+ u64 h_powers_xored[8] __aligned(16);
+
+ /*
+ * H^1 times x^64 (and also the usual extra factor of x^-1). 16-byte
+ * alignment is required by the assembly code.
+ */
+ u64 h_times_x64[2] __aligned(16);
+};
+#define AES_GCM_KEY_AESNI(key) \
+ container_of((key), struct aes_gcm_key_aesni, base)
+#define AES_GCM_KEY_AESNI_SIZE \
+ (sizeof(struct aes_gcm_key_aesni) + (15 & ~(CRYPTO_MINALIGN - 1)))
+
+/* Key struct used by the VAES + AVX10 implementations of AES-GCM */
+struct aes_gcm_key_avx10 {
+ /*
+ * Common part of the key. The assembly code prefers 16-byte alignment
+ * for the round keys; we get this by them being located at the start of
+ * the struct and the whole struct being 64-byte aligned.
+ */
+ struct aes_gcm_key base;
+
+ /*
+ * Powers of the hash key H^16 through H^1. These are 128-bit values.
+ * They all have an extra factor of x^-1 and are byte-reversed. This
+ * array is aligned to a 64-byte boundary to make it naturally aligned
+ * for 512-bit loads, which can improve performance. (The assembly code
+ * doesn't *need* the alignment; this is just an optimization.)
+ */
+ u64 h_powers[16][2] __aligned(64);
+
+ /* Three padding blocks required by the assembly code */
+ u64 padding[3][2];
+};
+#define AES_GCM_KEY_AVX10(key) \
+ container_of((key), struct aes_gcm_key_avx10, base)
+#define AES_GCM_KEY_AVX10_SIZE \
+ (sizeof(struct aes_gcm_key_avx10) + (63 & ~(CRYPTO_MINALIGN - 1)))
+
+/*
+ * These flags are passed to the AES-GCM helper functions to specify the
+ * specific version of AES-GCM (RFC4106 or not), whether it's encryption or
+ * decryption, and which assembly functions should be called. Assembly
+ * functions are selected using flags instead of function pointers to avoid
+ * indirect calls (which are very expensive on x86) regardless of inlining.
+ */
+#define FLAG_RFC4106 BIT(0)
+#define FLAG_ENC BIT(1)
+#define FLAG_AVX BIT(2)
+#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
+# define FLAG_AVX10_256 BIT(3)
+# define FLAG_AVX10_512 BIT(4)
+#else
+ /*
+ * This should cause all calls to the AVX10 assembly functions to be
+ * optimized out, avoiding the need to ifdef each call individually.
+ */
+# define FLAG_AVX10_256 0
+# define FLAG_AVX10_512 0
+#endif
+
+static inline struct aes_gcm_key *
+aes_gcm_key_get(struct crypto_aead *tfm, int flags)
{
- struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(aead);
+ if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512))
+ return PTR_ALIGN(crypto_aead_ctx(tfm), 64);
+ else
+ return PTR_ALIGN(crypto_aead_ctx(tfm), 16);
+}
+
+asmlinkage void
+aes_gcm_precompute_aesni(struct aes_gcm_key_aesni *key);
+asmlinkage void
+aes_gcm_precompute_aesni_avx(struct aes_gcm_key_aesni *key);
+asmlinkage void
+aes_gcm_precompute_vaes_avx10_256(struct aes_gcm_key_avx10 *key);
+asmlinkage void
+aes_gcm_precompute_vaes_avx10_512(struct aes_gcm_key_avx10 *key);
- return aes_set_key_common(&ctx->aes_key_expanded, key, key_len) ?:
- rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len);
+static void aes_gcm_precompute(struct aes_gcm_key *key, int flags)
+{
+ /*
+ * To make things a bit easier on the assembly side, the AVX10
+ * implementations use the same key format. Therefore, a single
+ * function using 256-bit vectors would suffice here. However, it's
+ * straightforward to provide a 512-bit one because of how the assembly
+ * code is structured, and it works nicely because the total size of the
+ * key powers is a multiple of 512 bits. So we take advantage of that.
+ *
+ * A similar situation applies to the AES-NI implementations.
+ */
+ if (flags & FLAG_AVX10_512)
+ aes_gcm_precompute_vaes_avx10_512(AES_GCM_KEY_AVX10(key));
+ else if (flags & FLAG_AVX10_256)
+ aes_gcm_precompute_vaes_avx10_256(AES_GCM_KEY_AVX10(key));
+ else if (flags & FLAG_AVX)
+ aes_gcm_precompute_aesni_avx(AES_GCM_KEY_AESNI(key));
+ else
+ aes_gcm_precompute_aesni(AES_GCM_KEY_AESNI(key));
}
-static int generic_gcmaes_encrypt(struct aead_request *req)
+asmlinkage void
+aes_gcm_aad_update_aesni(const struct aes_gcm_key_aesni *key,
+ u8 ghash_acc[16], const u8 *aad, int aadlen);
+asmlinkage void
+aes_gcm_aad_update_aesni_avx(const struct aes_gcm_key_aesni *key,
+ u8 ghash_acc[16], const u8 *aad, int aadlen);
+asmlinkage void
+aes_gcm_aad_update_vaes_avx10(const struct aes_gcm_key_avx10 *key,
+ u8 ghash_acc[16], const u8 *aad, int aadlen);
+
+static void aes_gcm_aad_update(const struct aes_gcm_key *key, u8 ghash_acc[16],
+ const u8 *aad, int aadlen, int flags)
{
- struct crypto_aead *tfm = crypto_aead_reqtfm(req);
- struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm);
- void *aes_ctx = &(ctx->aes_key_expanded);
- u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8);
- u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN);
- __be32 counter = cpu_to_be32(1);
+ if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512))
+ aes_gcm_aad_update_vaes_avx10(AES_GCM_KEY_AVX10(key), ghash_acc,
+ aad, aadlen);
+ else if (flags & FLAG_AVX)
+ aes_gcm_aad_update_aesni_avx(AES_GCM_KEY_AESNI(key), ghash_acc,
+ aad, aadlen);
+ else
+ aes_gcm_aad_update_aesni(AES_GCM_KEY_AESNI(key), ghash_acc,
+ aad, aadlen);
+}
+
+asmlinkage void
+aes_gcm_enc_update_aesni(const struct aes_gcm_key_aesni *key,
+ const u32 le_ctr[4], u8 ghash_acc[16],
+ const u8 *src, u8 *dst, int datalen);
+asmlinkage void
+aes_gcm_enc_update_aesni_avx(const struct aes_gcm_key_aesni *key,
+ const u32 le_ctr[4], u8 ghash_acc[16],
+ const u8 *src, u8 *dst, int datalen);
+asmlinkage void
+aes_gcm_enc_update_vaes_avx10_256(const struct aes_gcm_key_avx10 *key,
+ const u32 le_ctr[4], u8 ghash_acc[16],
+ const u8 *src, u8 *dst, int datalen);
+asmlinkage void
+aes_gcm_enc_update_vaes_avx10_512(const struct aes_gcm_key_avx10 *key,
+ const u32 le_ctr[4], u8 ghash_acc[16],
+ const u8 *src, u8 *dst, int datalen);
+
+asmlinkage void
+aes_gcm_dec_update_aesni(const struct aes_gcm_key_aesni *key,
+ const u32 le_ctr[4], u8 ghash_acc[16],
+ const u8 *src, u8 *dst, int datalen);
+asmlinkage void
+aes_gcm_dec_update_aesni_avx(const struct aes_gcm_key_aesni *key,
+ const u32 le_ctr[4], u8 ghash_acc[16],
+ const u8 *src, u8 *dst, int datalen);
+asmlinkage void
+aes_gcm_dec_update_vaes_avx10_256(const struct aes_gcm_key_avx10 *key,
+ const u32 le_ctr[4], u8 ghash_acc[16],
+ const u8 *src, u8 *dst, int datalen);
+asmlinkage void
+aes_gcm_dec_update_vaes_avx10_512(const struct aes_gcm_key_avx10 *key,
+ const u32 le_ctr[4], u8 ghash_acc[16],
+ const u8 *src, u8 *dst, int datalen);
+
+/* __always_inline to optimize out the branches based on @flags */
+static __always_inline void
+aes_gcm_update(const struct aes_gcm_key *key,
+ const u32 le_ctr[4], u8 ghash_acc[16],
+ const u8 *src, u8 *dst, int datalen, int flags)
+{
+ if (flags & FLAG_ENC) {
+ if (flags & FLAG_AVX10_512)
+ aes_gcm_enc_update_vaes_avx10_512(AES_GCM_KEY_AVX10(key),
+ le_ctr, ghash_acc,
+ src, dst, datalen);
+ else if (flags & FLAG_AVX10_256)
+ aes_gcm_enc_update_vaes_avx10_256(AES_GCM_KEY_AVX10(key),
+ le_ctr, ghash_acc,
+ src, dst, datalen);
+ else if (flags & FLAG_AVX)
+ aes_gcm_enc_update_aesni_avx(AES_GCM_KEY_AESNI(key),
+ le_ctr, ghash_acc,
+ src, dst, datalen);
+ else
+ aes_gcm_enc_update_aesni(AES_GCM_KEY_AESNI(key), le_ctr,
+ ghash_acc, src, dst, datalen);
+ } else {
+ if (flags & FLAG_AVX10_512)
+ aes_gcm_dec_update_vaes_avx10_512(AES_GCM_KEY_AVX10(key),
+ le_ctr, ghash_acc,
+ src, dst, datalen);
+ else if (flags & FLAG_AVX10_256)
+ aes_gcm_dec_update_vaes_avx10_256(AES_GCM_KEY_AVX10(key),
+ le_ctr, ghash_acc,
+ src, dst, datalen);
+ else if (flags & FLAG_AVX)
+ aes_gcm_dec_update_aesni_avx(AES_GCM_KEY_AESNI(key),
+ le_ctr, ghash_acc,
+ src, dst, datalen);
+ else
+ aes_gcm_dec_update_aesni(AES_GCM_KEY_AESNI(key),
+ le_ctr, ghash_acc,
+ src, dst, datalen);
+ }
+}
+
+asmlinkage void
+aes_gcm_enc_final_aesni(const struct aes_gcm_key_aesni *key,
+ const u32 le_ctr[4], u8 ghash_acc[16],
+ u64 total_aadlen, u64 total_datalen);
+asmlinkage void
+aes_gcm_enc_final_aesni_avx(const struct aes_gcm_key_aesni *key,
+ const u32 le_ctr[4], u8 ghash_acc[16],
+ u64 total_aadlen, u64 total_datalen);
+asmlinkage void
+aes_gcm_enc_final_vaes_avx10(const struct aes_gcm_key_avx10 *key,
+ const u32 le_ctr[4], u8 ghash_acc[16],
+ u64 total_aadlen, u64 total_datalen);
+
+/* __always_inline to optimize out the branches based on @flags */
+static __always_inline void
+aes_gcm_enc_final(const struct aes_gcm_key *key,
+ const u32 le_ctr[4], u8 ghash_acc[16],
+ u64 total_aadlen, u64 total_datalen, int flags)
+{
+ if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512))
+ aes_gcm_enc_final_vaes_avx10(AES_GCM_KEY_AVX10(key),
+ le_ctr, ghash_acc,
+ total_aadlen, total_datalen);
+ else if (flags & FLAG_AVX)
+ aes_gcm_enc_final_aesni_avx(AES_GCM_KEY_AESNI(key),
+ le_ctr, ghash_acc,
+ total_aadlen, total_datalen);
+ else
+ aes_gcm_enc_final_aesni(AES_GCM_KEY_AESNI(key),
+ le_ctr, ghash_acc,
+ total_aadlen, total_datalen);
+}
+
+asmlinkage bool __must_check
+aes_gcm_dec_final_aesni(const struct aes_gcm_key_aesni *key,
+ const u32 le_ctr[4], const u8 ghash_acc[16],
+ u64 total_aadlen, u64 total_datalen,
+ const u8 tag[16], int taglen);
+asmlinkage bool __must_check
+aes_gcm_dec_final_aesni_avx(const struct aes_gcm_key_aesni *key,
+ const u32 le_ctr[4], const u8 ghash_acc[16],
+ u64 total_aadlen, u64 total_datalen,
+ const u8 tag[16], int taglen);
+asmlinkage bool __must_check
+aes_gcm_dec_final_vaes_avx10(const struct aes_gcm_key_avx10 *key,
+ const u32 le_ctr[4], const u8 ghash_acc[16],
+ u64 total_aadlen, u64 total_datalen,
+ const u8 tag[16], int taglen);
+
+/* __always_inline to optimize out the branches based on @flags */
+static __always_inline bool __must_check
+aes_gcm_dec_final(const struct aes_gcm_key *key, const u32 le_ctr[4],
+ u8 ghash_acc[16], u64 total_aadlen, u64 total_datalen,
+ u8 tag[16], int taglen, int flags)
+{
+ if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512))
+ return aes_gcm_dec_final_vaes_avx10(AES_GCM_KEY_AVX10(key),
+ le_ctr, ghash_acc,
+ total_aadlen, total_datalen,
+ tag, taglen);
+ else if (flags & FLAG_AVX)
+ return aes_gcm_dec_final_aesni_avx(AES_GCM_KEY_AESNI(key),
+ le_ctr, ghash_acc,
+ total_aadlen, total_datalen,
+ tag, taglen);
+ else
+ return aes_gcm_dec_final_aesni(AES_GCM_KEY_AESNI(key),
+ le_ctr, ghash_acc,
+ total_aadlen, total_datalen,
+ tag, taglen);
+}
+
+/*
+ * This is the Integrity Check Value (aka the authentication tag) length and can
+ * be 8, 12 or 16 bytes long.
+ */
+static int common_rfc4106_set_authsize(struct crypto_aead *aead,
+ unsigned int authsize)
+{
+ switch (authsize) {
+ case 8:
+ case 12:
+ case 16:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int generic_gcmaes_set_authsize(struct crypto_aead *tfm,
+ unsigned int authsize)
+{
+ switch (authsize) {
+ case 4:
+ case 8:
+ case 12:
+ case 13:
+ case 14:
+ case 15:
+ case 16:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/*
+ * This is the setkey function for the x86_64 implementations of AES-GCM. It
+ * saves the RFC4106 nonce if applicable, expands the AES key, and precomputes
+ * powers of the hash key.
+ *
+ * To comply with the crypto_aead API, this has to be usable in no-SIMD context.
+ * For that reason, this function includes a portable C implementation of the
+ * needed logic. However, the portable C implementation is very slow, taking
+ * about the same time as encrypting 37 KB of data. To be ready for users that
+ * may set a key even somewhat frequently, we therefore also include a SIMD
+ * assembly implementation, expanding the AES key using AES-NI and precomputing
+ * the hash key powers using PCLMULQDQ or VPCLMULQDQ.
+ */
+static int gcm_setkey(struct crypto_aead *tfm, const u8 *raw_key,
+ unsigned int keylen, int flags)
+{
+ struct aes_gcm_key *key = aes_gcm_key_get(tfm, flags);
+ int err;
+
+ if (flags & FLAG_RFC4106) {
+ if (keylen < 4)
+ return -EINVAL;
+ keylen -= 4;
+ key->rfc4106_nonce = get_unaligned_be32(raw_key + keylen);
+ }
+
+ /* The assembly code assumes the following offsets. */
+ BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, base.aes_key.key_enc) != 0);
+ BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, base.aes_key.key_length) != 480);
+ BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, h_powers) != 496);
+ BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, h_powers_xored) != 624);
+ BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, h_times_x64) != 688);
+ BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, base.aes_key.key_enc) != 0);
+ BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, base.aes_key.key_length) != 480);
+ BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, h_powers) != 512);
+ BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, padding) != 768);
+
+ if (likely(crypto_simd_usable())) {
+ err = aes_check_keylen(keylen);
+ if (err)
+ return err;
+ kernel_fpu_begin();
+ aesni_set_key(&key->aes_key, raw_key, keylen);
+ aes_gcm_precompute(key, flags);
+ kernel_fpu_end();
+ } else {
+ static const u8 x_to_the_minus1[16] __aligned(__alignof__(be128)) = {
+ [0] = 0xc2, [15] = 1
+ };
+ static const u8 x_to_the_63[16] __aligned(__alignof__(be128)) = {
+ [7] = 1,
+ };
+ be128 h1 = {};
+ be128 h;
+ int i;
+
+ err = aes_expandkey(&key->aes_key, raw_key, keylen);
+ if (err)
+ return err;
+
+ /* Encrypt the all-zeroes block to get the hash key H^1 */
+ aes_encrypt(&key->aes_key, (u8 *)&h1, (u8 *)&h1);
- memcpy(iv, req->iv, 12);
- *((__be32 *)(iv+12)) = counter;
+ /* Compute H^1 * x^-1 */
+ h = h1;
+ gf128mul_lle(&h, (const be128 *)x_to_the_minus1);
+
+ /* Compute the needed key powers */
+ if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512)) {
+ struct aes_gcm_key_avx10 *k = AES_GCM_KEY_AVX10(key);
+
+ for (i = ARRAY_SIZE(k->h_powers) - 1; i >= 0; i--) {
+ k->h_powers[i][0] = be64_to_cpu(h.b);
+ k->h_powers[i][1] = be64_to_cpu(h.a);
+ gf128mul_lle(&h, &h1);
+ }
+ memset(k->padding, 0, sizeof(k->padding));
+ } else {
+ struct aes_gcm_key_aesni *k = AES_GCM_KEY_AESNI(key);
+
+ for (i = ARRAY_SIZE(k->h_powers) - 1; i >= 0; i--) {
+ k->h_powers[i][0] = be64_to_cpu(h.b);
+ k->h_powers[i][1] = be64_to_cpu(h.a);
+ k->h_powers_xored[i] = k->h_powers[i][0] ^
+ k->h_powers[i][1];
+ gf128mul_lle(&h, &h1);
+ }
+ gf128mul_lle(&h1, (const be128 *)x_to_the_63);
+ k->h_times_x64[0] = be64_to_cpu(h1.b);
+ k->h_times_x64[1] = be64_to_cpu(h1.a);
+ }
+ }
+ return 0;
+}
- return gcmaes_encrypt(req, req->assoclen, ctx->hash_subkey, iv,
- aes_ctx);
+/*
+ * Initialize @ghash_acc, then pass all @assoclen bytes of associated data
+ * (a.k.a. additional authenticated data) from @sg_src through the GHASH update
+ * assembly function. kernel_fpu_begin() must have already been called.
+ */
+static void gcm_process_assoc(const struct aes_gcm_key *key, u8 ghash_acc[16],
+ struct scatterlist *sg_src, unsigned int assoclen,
+ int flags)
+{
+ struct scatter_walk walk;
+ /*
+ * The assembly function requires that the length of any non-last
+ * segment of associated data be a multiple of 16 bytes, so this
+ * function does the buffering needed to achieve that.
+ */
+ unsigned int pos = 0;
+ u8 buf[16];
+
+ memset(ghash_acc, 0, 16);
+ scatterwalk_start(&walk, sg_src);
+
+ while (assoclen) {
+ unsigned int len_this_page = scatterwalk_clamp(&walk, assoclen);
+ void *mapped = scatterwalk_map(&walk);
+ const void *src = mapped;
+ unsigned int len;
+
+ assoclen -= len_this_page;
+ scatterwalk_advance(&walk, len_this_page);
+ if (unlikely(pos)) {
+ len = min(len_this_page, 16 - pos);
+ memcpy(&buf[pos], src, len);
+ pos += len;
+ src += len;
+ len_this_page -= len;
+ if (pos < 16)
+ goto next;
+ aes_gcm_aad_update(key, ghash_acc, buf, 16, flags);
+ pos = 0;
+ }
+ len = len_this_page;
+ if (unlikely(assoclen)) /* Not the last segment yet? */
+ len = round_down(len, 16);
+ aes_gcm_aad_update(key, ghash_acc, src, len, flags);
+ src += len;
+ len_this_page -= len;
+ if (unlikely(len_this_page)) {
+ memcpy(buf, src, len_this_page);
+ pos = len_this_page;
+ }
+next:
+ scatterwalk_unmap(mapped);
+ scatterwalk_pagedone(&walk, 0, assoclen);
+ if (need_resched()) {
+ kernel_fpu_end();
+ kernel_fpu_begin();
+ }
+ }
+ if (unlikely(pos))
+ aes_gcm_aad_update(key, ghash_acc, buf, pos, flags);
}
-static int generic_gcmaes_decrypt(struct aead_request *req)
+
+/* __always_inline to optimize out the branches based on @flags */
+static __always_inline int
+gcm_crypt(struct aead_request *req, int flags)
{
- __be32 counter = cpu_to_be32(1);
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
- struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm);
- void *aes_ctx = &(ctx->aes_key_expanded);
- u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8);
- u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN);
+ const struct aes_gcm_key *key = aes_gcm_key_get(tfm, flags);
+ unsigned int assoclen = req->assoclen;
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ u8 ghash_acc[16]; /* GHASH accumulator */
+ u32 le_ctr[4]; /* Counter in little-endian format */
+ int taglen;
+ int err;
- memcpy(iv, req->iv, 12);
- *((__be32 *)(iv+12)) = counter;
+ /* Initialize the counter and determine the associated data length. */
+ le_ctr[0] = 2;
+ if (flags & FLAG_RFC4106) {
+ if (unlikely(assoclen != 16 && assoclen != 20))
+ return -EINVAL;
+ assoclen -= 8;
+ le_ctr[1] = get_unaligned_be32(req->iv + 4);
+ le_ctr[2] = get_unaligned_be32(req->iv + 0);
+ le_ctr[3] = key->rfc4106_nonce; /* already byte-swapped */
+ } else {
+ le_ctr[1] = get_unaligned_be32(req->iv + 8);
+ le_ctr[2] = get_unaligned_be32(req->iv + 4);
+ le_ctr[3] = get_unaligned_be32(req->iv + 0);
+ }
- return gcmaes_decrypt(req, req->assoclen, ctx->hash_subkey, iv,
- aes_ctx);
+ /* Begin walking through the plaintext or ciphertext. */
+ if (flags & FLAG_ENC)
+ err = skcipher_walk_aead_encrypt(&walk, req, false);
+ else
+ err = skcipher_walk_aead_decrypt(&walk, req, false);
+ if (err)
+ return err;
+
+ /*
+ * Since the AES-GCM assembly code requires that at least three assembly
+ * functions be called to process any message (this is needed to support
+ * incremental updates cleanly), to reduce overhead we try to do all
+ * three calls in the same kernel FPU section if possible. We close the
+ * section and start a new one if there are multiple data segments or if
+ * rescheduling is needed while processing the associated data.
+ */
+ kernel_fpu_begin();
+
+ /* Pass the associated data through GHASH. */
+ gcm_process_assoc(key, ghash_acc, req->src, assoclen, flags);
+
+ /* En/decrypt the data and pass the ciphertext through GHASH. */
+ while (unlikely((nbytes = walk.nbytes) < walk.total)) {
+ /*
+ * Non-last segment. In this case, the assembly function
+ * requires that the length be a multiple of 16 (AES_BLOCK_SIZE)
+ * bytes. The needed buffering of up to 16 bytes is handled by
+ * the skcipher_walk. Here we just need to round down to a
+ * multiple of 16.
+ */
+ nbytes = round_down(nbytes, AES_BLOCK_SIZE);
+ aes_gcm_update(key, le_ctr, ghash_acc, walk.src.virt.addr,
+ walk.dst.virt.addr, nbytes, flags);
+ le_ctr[0] += nbytes / AES_BLOCK_SIZE;
+ kernel_fpu_end();
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ if (err)
+ return err;
+ kernel_fpu_begin();
+ }
+ /* Last segment: process all remaining data. */
+ aes_gcm_update(key, le_ctr, ghash_acc, walk.src.virt.addr,
+ walk.dst.virt.addr, nbytes, flags);
+ /*
+ * The low word of the counter isn't used by the finalize, so there's no
+ * need to increment it here.
+ */
+
+ /* Finalize */
+ taglen = crypto_aead_authsize(tfm);
+ if (flags & FLAG_ENC) {
+ /* Finish computing the auth tag. */
+ aes_gcm_enc_final(key, le_ctr, ghash_acc, assoclen,
+ req->cryptlen, flags);
+
+ /* Store the computed auth tag in the dst scatterlist. */
+ scatterwalk_map_and_copy(ghash_acc, req->dst, req->assoclen +
+ req->cryptlen, taglen, 1);
+ } else {
+ unsigned int datalen = req->cryptlen - taglen;
+ u8 tag[16];
+
+ /* Get the transmitted auth tag from the src scatterlist. */
+ scatterwalk_map_and_copy(tag, req->src, req->assoclen + datalen,
+ taglen, 0);
+ /*
+ * Finish computing the auth tag and compare it to the
+ * transmitted one. The assembly function does the actual tag
+ * comparison. Here, just check the boolean result.
+ */
+ if (!aes_gcm_dec_final(key, le_ctr, ghash_acc, assoclen,
+ datalen, tag, taglen, flags))
+ err = -EBADMSG;
+ }
+ kernel_fpu_end();
+ if (nbytes)
+ skcipher_walk_done(&walk, 0);
+ return err;
}
-static struct aead_alg aesni_aeads[] = { {
- .setkey = common_rfc4106_set_key,
- .setauthsize = common_rfc4106_set_authsize,
- .encrypt = helper_rfc4106_encrypt,
- .decrypt = helper_rfc4106_decrypt,
- .ivsize = GCM_RFC4106_IV_SIZE,
- .maxauthsize = 16,
- .base = {
- .cra_name = "__rfc4106(gcm(aes))",
- .cra_driver_name = "__rfc4106-gcm-aesni",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_INTERNAL,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx),
- .cra_alignmask = 0,
- .cra_module = THIS_MODULE,
- },
-}, {
- .setkey = generic_gcmaes_set_key,
- .setauthsize = generic_gcmaes_set_authsize,
- .encrypt = generic_gcmaes_encrypt,
- .decrypt = generic_gcmaes_decrypt,
- .ivsize = GCM_AES_IV_SIZE,
- .maxauthsize = 16,
- .base = {
- .cra_name = "__gcm(aes)",
- .cra_driver_name = "__generic-gcm-aesni",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_INTERNAL,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct generic_gcmaes_ctx),
- .cra_alignmask = 0,
- .cra_module = THIS_MODULE,
- },
-} };
-#else
-static struct aead_alg aesni_aeads[0];
+#define DEFINE_GCM_ALGS(suffix, flags, generic_driver_name, rfc_driver_name, \
+ ctxsize, priority) \
+ \
+static int gcm_setkey_##suffix(struct crypto_aead *tfm, const u8 *raw_key, \
+ unsigned int keylen) \
+{ \
+ return gcm_setkey(tfm, raw_key, keylen, (flags)); \
+} \
+ \
+static int gcm_encrypt_##suffix(struct aead_request *req) \
+{ \
+ return gcm_crypt(req, (flags) | FLAG_ENC); \
+} \
+ \
+static int gcm_decrypt_##suffix(struct aead_request *req) \
+{ \
+ return gcm_crypt(req, (flags)); \
+} \
+ \
+static int rfc4106_setkey_##suffix(struct crypto_aead *tfm, const u8 *raw_key, \
+ unsigned int keylen) \
+{ \
+ return gcm_setkey(tfm, raw_key, keylen, (flags) | FLAG_RFC4106); \
+} \
+ \
+static int rfc4106_encrypt_##suffix(struct aead_request *req) \
+{ \
+ return gcm_crypt(req, (flags) | FLAG_RFC4106 | FLAG_ENC); \
+} \
+ \
+static int rfc4106_decrypt_##suffix(struct aead_request *req) \
+{ \
+ return gcm_crypt(req, (flags) | FLAG_RFC4106); \
+} \
+ \
+static struct aead_alg aes_gcm_algs_##suffix[] = { { \
+ .setkey = gcm_setkey_##suffix, \
+ .setauthsize = generic_gcmaes_set_authsize, \
+ .encrypt = gcm_encrypt_##suffix, \
+ .decrypt = gcm_decrypt_##suffix, \
+ .ivsize = GCM_AES_IV_SIZE, \
+ .chunksize = AES_BLOCK_SIZE, \
+ .maxauthsize = 16, \
+ .base = { \
+ .cra_name = "__gcm(aes)", \
+ .cra_driver_name = "__" generic_driver_name, \
+ .cra_priority = (priority), \
+ .cra_flags = CRYPTO_ALG_INTERNAL, \
+ .cra_blocksize = 1, \
+ .cra_ctxsize = (ctxsize), \
+ .cra_module = THIS_MODULE, \
+ }, \
+}, { \
+ .setkey = rfc4106_setkey_##suffix, \
+ .setauthsize = common_rfc4106_set_authsize, \
+ .encrypt = rfc4106_encrypt_##suffix, \
+ .decrypt = rfc4106_decrypt_##suffix, \
+ .ivsize = GCM_RFC4106_IV_SIZE, \
+ .chunksize = AES_BLOCK_SIZE, \
+ .maxauthsize = 16, \
+ .base = { \
+ .cra_name = "__rfc4106(gcm(aes))", \
+ .cra_driver_name = "__" rfc_driver_name, \
+ .cra_priority = (priority), \
+ .cra_flags = CRYPTO_ALG_INTERNAL, \
+ .cra_blocksize = 1, \
+ .cra_ctxsize = (ctxsize), \
+ .cra_module = THIS_MODULE, \
+ }, \
+} }; \
+ \
+static struct simd_aead_alg *aes_gcm_simdalgs_##suffix[2] \
+
+/* aes_gcm_algs_aesni */
+DEFINE_GCM_ALGS(aesni, /* no flags */ 0,
+ "generic-gcm-aesni", "rfc4106-gcm-aesni",
+ AES_GCM_KEY_AESNI_SIZE, 400);
+
+/* aes_gcm_algs_aesni_avx */
+DEFINE_GCM_ALGS(aesni_avx, FLAG_AVX,
+ "generic-gcm-aesni-avx", "rfc4106-gcm-aesni-avx",
+ AES_GCM_KEY_AESNI_SIZE, 500);
+
+#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
+/* aes_gcm_algs_vaes_avx10_256 */
+DEFINE_GCM_ALGS(vaes_avx10_256, FLAG_AVX10_256,
+ "generic-gcm-vaes-avx10_256", "rfc4106-gcm-vaes-avx10_256",
+ AES_GCM_KEY_AVX10_SIZE, 700);
+
+/* aes_gcm_algs_vaes_avx10_512 */
+DEFINE_GCM_ALGS(vaes_avx10_512, FLAG_AVX10_512,
+ "generic-gcm-vaes-avx10_512", "rfc4106-gcm-vaes-avx10_512",
+ AES_GCM_KEY_AVX10_SIZE, 800);
+#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */
+
+/*
+ * This is a list of CPU models that are known to suffer from downclocking when
+ * zmm registers (512-bit vectors) are used. On these CPUs, the AES mode
+ * implementations with zmm registers won't be used by default. Implementations
+ * with ymm registers (256-bit vectors) will be used by default instead.
+ */
+static const struct x86_cpu_id zmm_exclusion_list[] = {
+ X86_MATCH_VFM(INTEL_SKYLAKE_X, 0),
+ X86_MATCH_VFM(INTEL_ICELAKE_X, 0),
+ X86_MATCH_VFM(INTEL_ICELAKE_D, 0),
+ X86_MATCH_VFM(INTEL_ICELAKE, 0),
+ X86_MATCH_VFM(INTEL_ICELAKE_L, 0),
+ X86_MATCH_VFM(INTEL_ICELAKE_NNPI, 0),
+ X86_MATCH_VFM(INTEL_TIGERLAKE_L, 0),
+ X86_MATCH_VFM(INTEL_TIGERLAKE, 0),
+ /* Allow Rocket Lake and later, and Sapphire Rapids and later. */
+ /* Also allow AMD CPUs (starting with Zen 4, the first with AVX-512). */
+ {},
+};
+
+static int __init register_avx_algs(void)
+{
+ int err;
+
+ if (!boot_cpu_has(X86_FEATURE_AVX))
+ return 0;
+ err = simd_register_skciphers_compat(&aes_xts_alg_aesni_avx, 1,
+ &aes_xts_simdalg_aesni_avx);
+ if (err)
+ return err;
+ err = simd_register_aeads_compat(aes_gcm_algs_aesni_avx,
+ ARRAY_SIZE(aes_gcm_algs_aesni_avx),
+ aes_gcm_simdalgs_aesni_avx);
+ if (err)
+ return err;
+#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
+ if (!boot_cpu_has(X86_FEATURE_AVX2) ||
+ !boot_cpu_has(X86_FEATURE_VAES) ||
+ !boot_cpu_has(X86_FEATURE_VPCLMULQDQ) ||
+ !boot_cpu_has(X86_FEATURE_PCLMULQDQ) ||
+ !cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
+ return 0;
+ err = simd_register_skciphers_compat(&aes_xts_alg_vaes_avx2, 1,
+ &aes_xts_simdalg_vaes_avx2);
+ if (err)
+ return err;
+
+ if (!boot_cpu_has(X86_FEATURE_AVX512BW) ||
+ !boot_cpu_has(X86_FEATURE_AVX512VL) ||
+ !boot_cpu_has(X86_FEATURE_BMI2) ||
+ !cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
+ XFEATURE_MASK_AVX512, NULL))
+ return 0;
+
+ err = simd_register_skciphers_compat(&aes_xts_alg_vaes_avx10_256, 1,
+ &aes_xts_simdalg_vaes_avx10_256);
+ if (err)
+ return err;
+ err = simd_register_aeads_compat(aes_gcm_algs_vaes_avx10_256,
+ ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256),
+ aes_gcm_simdalgs_vaes_avx10_256);
+ if (err)
+ return err;
+
+ if (x86_match_cpu(zmm_exclusion_list)) {
+ int i;
+
+ aes_xts_alg_vaes_avx10_512.base.cra_priority = 1;
+ for (i = 0; i < ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512); i++)
+ aes_gcm_algs_vaes_avx10_512[i].base.cra_priority = 1;
+ }
+
+ err = simd_register_skciphers_compat(&aes_xts_alg_vaes_avx10_512, 1,
+ &aes_xts_simdalg_vaes_avx10_512);
+ if (err)
+ return err;
+ err = simd_register_aeads_compat(aes_gcm_algs_vaes_avx10_512,
+ ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512),
+ aes_gcm_simdalgs_vaes_avx10_512);
+ if (err)
+ return err;
+#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */
+ return 0;
+}
+
+static void unregister_avx_algs(void)
+{
+ if (aes_xts_simdalg_aesni_avx)
+ simd_unregister_skciphers(&aes_xts_alg_aesni_avx, 1,
+ &aes_xts_simdalg_aesni_avx);
+ if (aes_gcm_simdalgs_aesni_avx[0])
+ simd_unregister_aeads(aes_gcm_algs_aesni_avx,
+ ARRAY_SIZE(aes_gcm_algs_aesni_avx),
+ aes_gcm_simdalgs_aesni_avx);
+#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
+ if (aes_xts_simdalg_vaes_avx2)
+ simd_unregister_skciphers(&aes_xts_alg_vaes_avx2, 1,
+ &aes_xts_simdalg_vaes_avx2);
+ if (aes_xts_simdalg_vaes_avx10_256)
+ simd_unregister_skciphers(&aes_xts_alg_vaes_avx10_256, 1,
+ &aes_xts_simdalg_vaes_avx10_256);
+ if (aes_gcm_simdalgs_vaes_avx10_256[0])
+ simd_unregister_aeads(aes_gcm_algs_vaes_avx10_256,
+ ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256),
+ aes_gcm_simdalgs_vaes_avx10_256);
+ if (aes_xts_simdalg_vaes_avx10_512)
+ simd_unregister_skciphers(&aes_xts_alg_vaes_avx10_512, 1,
+ &aes_xts_simdalg_vaes_avx10_512);
+ if (aes_gcm_simdalgs_vaes_avx10_512[0])
+ simd_unregister_aeads(aes_gcm_algs_vaes_avx10_512,
+ ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512),
+ aes_gcm_simdalgs_vaes_avx10_512);
#endif
+}
+#else /* CONFIG_X86_64 */
+static struct aead_alg aes_gcm_algs_aesni[0];
+static struct simd_aead_alg *aes_gcm_simdalgs_aesni[0];
-static struct simd_aead_alg *aesni_simd_aeads[ARRAY_SIZE(aesni_aeads)];
+static int __init register_avx_algs(void)
+{
+ return 0;
+}
+
+static void unregister_avx_algs(void)
+{
+}
+#endif /* !CONFIG_X86_64 */
static const struct x86_cpu_id aesni_cpu_id[] = {
X86_MATCH_FEATURE(X86_FEATURE_AES, NULL),
@@ -1235,17 +1677,6 @@ static int __init aesni_init(void)
if (!x86_match_cpu(aesni_cpu_id))
return -ENODEV;
#ifdef CONFIG_X86_64
- if (boot_cpu_has(X86_FEATURE_AVX2)) {
- pr_info("AVX2 version of gcm_enc/dec engaged.\n");
- static_branch_enable(&gcm_use_avx);
- static_branch_enable(&gcm_use_avx2);
- } else
- if (boot_cpu_has(X86_FEATURE_AVX)) {
- pr_info("AVX version of gcm_enc/dec engaged.\n");
- static_branch_enable(&gcm_use_avx);
- } else {
- pr_info("SSE version of gcm_enc/dec engaged.\n");
- }
if (boot_cpu_has(X86_FEATURE_AVX)) {
/* optimize performance of ctr mode encryption transform */
static_call_update(aesni_ctr_enc_tfm, aesni_ctr_enc_avx_tfm);
@@ -1263,8 +1694,9 @@ static int __init aesni_init(void)
if (err)
goto unregister_cipher;
- err = simd_register_aeads_compat(aesni_aeads, ARRAY_SIZE(aesni_aeads),
- aesni_simd_aeads);
+ err = simd_register_aeads_compat(aes_gcm_algs_aesni,
+ ARRAY_SIZE(aes_gcm_algs_aesni),
+ aes_gcm_simdalgs_aesni);
if (err)
goto unregister_skciphers;
@@ -1276,14 +1708,22 @@ static int __init aesni_init(void)
goto unregister_aeads;
#endif /* CONFIG_X86_64 */
+ err = register_avx_algs();
+ if (err)
+ goto unregister_avx;
+
return 0;
+unregister_avx:
+ unregister_avx_algs();
#ifdef CONFIG_X86_64
+ if (aesni_simd_xctr)
+ simd_unregister_skciphers(&aesni_xctr, 1, &aesni_simd_xctr);
unregister_aeads:
- simd_unregister_aeads(aesni_aeads, ARRAY_SIZE(aesni_aeads),
- aesni_simd_aeads);
#endif /* CONFIG_X86_64 */
-
+ simd_unregister_aeads(aes_gcm_algs_aesni,
+ ARRAY_SIZE(aes_gcm_algs_aesni),
+ aes_gcm_simdalgs_aesni);
unregister_skciphers:
simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers),
aesni_simd_skciphers);
@@ -1294,8 +1734,9 @@ unregister_cipher:
static void __exit aesni_exit(void)
{
- simd_unregister_aeads(aesni_aeads, ARRAY_SIZE(aesni_aeads),
- aesni_simd_aeads);
+ simd_unregister_aeads(aes_gcm_algs_aesni,
+ ARRAY_SIZE(aes_gcm_algs_aesni),
+ aes_gcm_simdalgs_aesni);
simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers),
aesni_simd_skciphers);
crypto_unregister_alg(&aesni_cipher_alg);
@@ -1303,11 +1744,12 @@ static void __exit aesni_exit(void)
if (boot_cpu_has(X86_FEATURE_AVX))
simd_unregister_skciphers(&aesni_xctr, 1, &aesni_simd_xctr);
#endif /* CONFIG_X86_64 */
+ unregister_avx_algs();
}
-late_initcall(aesni_init);
+module_init(aesni_init);
module_exit(aesni_exit);
-MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, Intel AES-NI instructions optimized");
+MODULE_DESCRIPTION("AES cipher and modes, optimized with AES-NI or VAES instructions");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("aes");