diff options
Diffstat (limited to 'arch/s390/crypto')
-rw-r--r-- | arch/s390/crypto/Kconfig | 22 | ||||
-rw-r--r-- | arch/s390/crypto/Makefile | 3 | ||||
-rw-r--r-- | arch/s390/crypto/aes_s390.c | 122 | ||||
-rw-r--r-- | arch/s390/crypto/chacha-glue.c | 4 | ||||
-rw-r--r-- | arch/s390/crypto/chacha-s390.S | 2 | ||||
-rw-r--r-- | arch/s390/crypto/crc32-vx.c | 310 | ||||
-rw-r--r-- | arch/s390/crypto/crc32be-vx.S | 213 | ||||
-rw-r--r-- | arch/s390/crypto/crc32le-vx.S | 275 | ||||
-rw-r--r-- | arch/s390/crypto/hmac_s390.c | 359 | ||||
-rw-r--r-- | arch/s390/crypto/paes_s390.c | 423 | ||||
-rw-r--r-- | arch/s390/crypto/prng.c | 14 | ||||
-rw-r--r-- | arch/s390/crypto/sha.h | 1 | ||||
-rw-r--r-- | arch/s390/crypto/sha3_256_s390.c | 11 | ||||
-rw-r--r-- | arch/s390/crypto/sha3_512_s390.c | 11 | ||||
-rw-r--r-- | arch/s390/crypto/sha_common.c | 20 |
15 files changed, 829 insertions, 961 deletions
diff --git a/arch/s390/crypto/Kconfig b/arch/s390/crypto/Kconfig index 06ee706b0d78..b760232537f1 100644 --- a/arch/s390/crypto/Kconfig +++ b/arch/s390/crypto/Kconfig @@ -2,18 +2,6 @@ menu "Accelerated Cryptographic Algorithms for CPU (s390)" -config CRYPTO_CRC32_S390 - tristate "CRC32c and CRC32" - depends on S390 - select CRYPTO_HASH - select CRC32 - help - CRC32c and CRC32 CRC algorithms - - Architecture: s390 - - It is available with IBM z13 or later. - config CRYPTO_SHA512_S390 tristate "Hash functions: SHA-384 and SHA-512" depends on S390 @@ -132,4 +120,14 @@ config CRYPTO_CHACHA_S390 It is available as of z13. +config CRYPTO_HMAC_S390 + tristate "Keyed-hash message authentication code: HMAC" + depends on S390 + select CRYPTO_HASH + help + s390 specific HMAC hardware support for SHA224, SHA256, SHA384 and + SHA512. + + Architecture: s390 + endmenu diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile index 1b1cc478fa94..14dafadbcbed 100644 --- a/arch/s390/crypto/Makefile +++ b/arch/s390/crypto/Makefile @@ -14,8 +14,7 @@ obj-$(CONFIG_CRYPTO_PAES_S390) += paes_s390.o obj-$(CONFIG_CRYPTO_CHACHA_S390) += chacha_s390.o obj-$(CONFIG_S390_PRNG) += prng.o obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o -obj-$(CONFIG_CRYPTO_CRC32_S390) += crc32-vx_s390.o +obj-$(CONFIG_CRYPTO_HMAC_S390) += hmac_s390.o obj-y += arch_random.o -crc32-vx_s390-y := crc32-vx.o crc32le-vx.o crc32be-vx.o chacha_s390-y := chacha-glue.o chacha-s390.o diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index c6fe5405de4a..9c46b1b630b1 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -51,8 +51,13 @@ struct s390_aes_ctx { }; struct s390_xts_ctx { - u8 key[32]; - u8 pcc_key[32]; + union { + u8 keys[64]; + struct { + u8 key[32]; + u8 pcc_key[32]; + }; + }; int key_len; unsigned long fc; struct crypto_skcipher *fallback; @@ -526,6 +531,108 @@ static struct skcipher_alg xts_aes_alg = { .decrypt = xts_aes_decrypt, }; +static int fullxts_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm); + unsigned long fc; + int err; + + err = xts_fallback_setkey(tfm, in_key, key_len); + if (err) + return err; + + /* Pick the correct function code based on the key length */ + fc = (key_len == 32) ? CPACF_KM_XTS_128_FULL : + (key_len == 64) ? CPACF_KM_XTS_256_FULL : 0; + + /* Check if the function code is available */ + xts_ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0; + if (!xts_ctx->fc) + return 0; + + /* Store double-key */ + memcpy(xts_ctx->keys, in_key, key_len); + xts_ctx->key_len = key_len; + return 0; +} + +static int fullxts_aes_crypt(struct skcipher_request *req, unsigned long modifier) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm); + unsigned int offset, nbytes, n; + struct skcipher_walk walk; + int ret; + struct { + __u8 key[64]; + __u8 tweak[16]; + __u8 nap[16]; + } fxts_param = { + .nap = {0}, + }; + + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; + + if (unlikely(!xts_ctx->fc || (req->cryptlen % AES_BLOCK_SIZE) != 0)) { + struct skcipher_request *subreq = skcipher_request_ctx(req); + + *subreq = *req; + skcipher_request_set_tfm(subreq, xts_ctx->fallback); + return (modifier & CPACF_DECRYPT) ? + crypto_skcipher_decrypt(subreq) : + crypto_skcipher_encrypt(subreq); + } + + ret = skcipher_walk_virt(&walk, req, false); + if (ret) + return ret; + + offset = xts_ctx->key_len & 0x20; + memcpy(fxts_param.key + offset, xts_ctx->keys, xts_ctx->key_len); + memcpy(fxts_param.tweak, req->iv, AES_BLOCK_SIZE); + fxts_param.nap[0] = 0x01; /* initial alpha power (1, little-endian) */ + + while ((nbytes = walk.nbytes) != 0) { + /* only use complete blocks */ + n = nbytes & ~(AES_BLOCK_SIZE - 1); + cpacf_km(xts_ctx->fc | modifier, fxts_param.key + offset, + walk.dst.virt.addr, walk.src.virt.addr, n); + ret = skcipher_walk_done(&walk, nbytes - n); + } + memzero_explicit(&fxts_param, sizeof(fxts_param)); + return ret; +} + +static int fullxts_aes_encrypt(struct skcipher_request *req) +{ + return fullxts_aes_crypt(req, 0); +} + +static int fullxts_aes_decrypt(struct skcipher_request *req) +{ + return fullxts_aes_crypt(req, CPACF_DECRYPT); +} + +static struct skcipher_alg fullxts_aes_alg = { + .base.cra_name = "xts(aes)", + .base.cra_driver_name = "full-xts-aes-s390", + .base.cra_priority = 403, /* aes-xts-s390 + 1 */ + .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct s390_xts_ctx), + .base.cra_module = THIS_MODULE, + .init = xts_fallback_init, + .exit = xts_fallback_exit, + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = fullxts_aes_set_key, + .encrypt = fullxts_aes_encrypt, + .decrypt = fullxts_aes_decrypt, +}; + static int ctr_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { @@ -955,7 +1062,7 @@ static struct aead_alg gcm_aes_aead = { }; static struct crypto_alg *aes_s390_alg; -static struct skcipher_alg *aes_s390_skcipher_algs[4]; +static struct skcipher_alg *aes_s390_skcipher_algs[5]; static int aes_s390_skciphers_num; static struct aead_alg *aes_s390_aead_alg; @@ -1012,6 +1119,13 @@ static int __init aes_s390_init(void) goto out_err; } + if (cpacf_test_func(&km_functions, CPACF_KM_XTS_128_FULL) || + cpacf_test_func(&km_functions, CPACF_KM_XTS_256_FULL)) { + ret = aes_s390_register_skcipher(&fullxts_aes_alg); + if (ret) + goto out_err; + } + if (cpacf_test_func(&km_functions, CPACF_KM_XTS_128) || cpacf_test_func(&km_functions, CPACF_KM_XTS_256)) { ret = aes_s390_register_skcipher(&xts_aes_alg); @@ -1054,4 +1168,4 @@ MODULE_ALIAS_CRYPTO("aes-all"); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm"); MODULE_LICENSE("GPL"); -MODULE_IMPORT_NS(CRYPTO_INTERNAL); +MODULE_IMPORT_NS("CRYPTO_INTERNAL"); diff --git a/arch/s390/crypto/chacha-glue.c b/arch/s390/crypto/chacha-glue.c index ed9959e6f714..f8b0c52e77a4 100644 --- a/arch/s390/crypto/chacha-glue.c +++ b/arch/s390/crypto/chacha-glue.c @@ -15,14 +15,14 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/sizes.h> -#include <asm/fpu/api.h> +#include <asm/fpu.h> #include "chacha-s390.h" static void chacha20_crypt_s390(u32 *state, u8 *dst, const u8 *src, unsigned int nbytes, const u32 *key, u32 *counter) { - struct kernel_fpu vxstate; + DECLARE_KERNEL_FPU_ONSTACK32(vxstate); kernel_fpu_begin(&vxstate, KERNEL_VXR); chacha20_vx(dst, src, nbytes, key, counter); diff --git a/arch/s390/crypto/chacha-s390.S b/arch/s390/crypto/chacha-s390.S index 37cb63f25b17..63f3102678c0 100644 --- a/arch/s390/crypto/chacha-s390.S +++ b/arch/s390/crypto/chacha-s390.S @@ -8,7 +8,7 @@ #include <linux/linkage.h> #include <asm/nospec-insn.h> -#include <asm/vx-insn.h> +#include <asm/fpu-insn.h> #define SP %r15 #define FRAME (16 * 8 + 4 * 8) diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c deleted file mode 100644 index 017143e9cef7..000000000000 --- a/arch/s390/crypto/crc32-vx.c +++ /dev/null @@ -1,310 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Crypto-API module for CRC-32 algorithms implemented with the - * z/Architecture Vector Extension Facility. - * - * Copyright IBM Corp. 2015 - * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> - */ -#define KMSG_COMPONENT "crc32-vx" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - -#include <linux/module.h> -#include <linux/cpufeature.h> -#include <linux/crc32.h> -#include <crypto/internal/hash.h> -#include <asm/fpu/api.h> - - -#define CRC32_BLOCK_SIZE 1 -#define CRC32_DIGEST_SIZE 4 - -#define VX_MIN_LEN 64 -#define VX_ALIGNMENT 16L -#define VX_ALIGN_MASK (VX_ALIGNMENT - 1) - -struct crc_ctx { - u32 key; -}; - -struct crc_desc_ctx { - u32 crc; -}; - -/* Prototypes for functions in assembly files */ -u32 crc32_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size); -u32 crc32_be_vgfm_16(u32 crc, unsigned char const *buf, size_t size); -u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size); - -/* - * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension - * - * Creates a function to perform a particular CRC-32 computation. Depending - * on the message buffer, the hardware-accelerated or software implementation - * is used. Note that the message buffer is aligned to improve fetch - * operations of VECTOR LOAD MULTIPLE instructions. - * - */ -#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw) \ - static u32 __pure ___fname(u32 crc, \ - unsigned char const *data, size_t datalen) \ - { \ - struct kernel_fpu vxstate; \ - unsigned long prealign, aligned, remaining; \ - \ - if (datalen < VX_MIN_LEN + VX_ALIGN_MASK) \ - return ___crc32_sw(crc, data, datalen); \ - \ - if ((unsigned long)data & VX_ALIGN_MASK) { \ - prealign = VX_ALIGNMENT - \ - ((unsigned long)data & VX_ALIGN_MASK); \ - datalen -= prealign; \ - crc = ___crc32_sw(crc, data, prealign); \ - data = (void *)((unsigned long)data + prealign); \ - } \ - \ - aligned = datalen & ~VX_ALIGN_MASK; \ - remaining = datalen & VX_ALIGN_MASK; \ - \ - kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW); \ - crc = ___crc32_vx(crc, data, aligned); \ - kernel_fpu_end(&vxstate, KERNEL_VXR_LOW); \ - \ - if (remaining) \ - crc = ___crc32_sw(crc, data + aligned, remaining); \ - \ - return crc; \ - } - -DEFINE_CRC32_VX(crc32_le_vx, crc32_le_vgfm_16, crc32_le) -DEFINE_CRC32_VX(crc32_be_vx, crc32_be_vgfm_16, crc32_be) -DEFINE_CRC32_VX(crc32c_le_vx, crc32c_le_vgfm_16, __crc32c_le) - - -static int crc32_vx_cra_init_zero(struct crypto_tfm *tfm) -{ - struct crc_ctx *mctx = crypto_tfm_ctx(tfm); - - mctx->key = 0; - return 0; -} - -static int crc32_vx_cra_init_invert(struct crypto_tfm *tfm) -{ - struct crc_ctx *mctx = crypto_tfm_ctx(tfm); - - mctx->key = ~0; - return 0; -} - -static int crc32_vx_init(struct shash_desc *desc) -{ - struct crc_ctx *mctx = crypto_shash_ctx(desc->tfm); - struct crc_desc_ctx *ctx = shash_desc_ctx(desc); - - ctx->crc = mctx->key; - return 0; -} - -static int crc32_vx_setkey(struct crypto_shash *tfm, const u8 *newkey, - unsigned int newkeylen) -{ - struct crc_ctx *mctx = crypto_shash_ctx(tfm); - - if (newkeylen != sizeof(mctx->key)) - return -EINVAL; - mctx->key = le32_to_cpu(*(__le32 *)newkey); - return 0; -} - -static int crc32be_vx_setkey(struct crypto_shash *tfm, const u8 *newkey, - unsigned int newkeylen) -{ - struct crc_ctx *mctx = crypto_shash_ctx(tfm); - - if (newkeylen != sizeof(mctx->key)) - return -EINVAL; - mctx->key = be32_to_cpu(*(__be32 *)newkey); - return 0; -} - -static int crc32le_vx_final(struct shash_desc *desc, u8 *out) -{ - struct crc_desc_ctx *ctx = shash_desc_ctx(desc); - - *(__le32 *)out = cpu_to_le32p(&ctx->crc); - return 0; -} - -static int crc32be_vx_final(struct shash_desc *desc, u8 *out) -{ - struct crc_desc_ctx *ctx = shash_desc_ctx(desc); - - *(__be32 *)out = cpu_to_be32p(&ctx->crc); - return 0; -} - -static int crc32c_vx_final(struct shash_desc *desc, u8 *out) -{ - struct crc_desc_ctx *ctx = shash_desc_ctx(desc); - - /* - * Perform a final XOR with 0xFFFFFFFF to be in sync - * with the generic crc32c shash implementation. - */ - *(__le32 *)out = ~cpu_to_le32p(&ctx->crc); - return 0; -} - -static int __crc32le_vx_finup(u32 *crc, const u8 *data, unsigned int len, - u8 *out) -{ - *(__le32 *)out = cpu_to_le32(crc32_le_vx(*crc, data, len)); - return 0; -} - -static int __crc32be_vx_finup(u32 *crc, const u8 *data, unsigned int len, - u8 *out) -{ - *(__be32 *)out = cpu_to_be32(crc32_be_vx(*crc, data, len)); - return 0; -} - -static int __crc32c_vx_finup(u32 *crc, const u8 *data, unsigned int len, - u8 *out) -{ - /* - * Perform a final XOR with 0xFFFFFFFF to be in sync - * with the generic crc32c shash implementation. - */ - *(__le32 *)out = ~cpu_to_le32(crc32c_le_vx(*crc, data, len)); - return 0; -} - - -#define CRC32_VX_FINUP(alg, func) \ - static int alg ## _vx_finup(struct shash_desc *desc, const u8 *data, \ - unsigned int datalen, u8 *out) \ - { \ - return __ ## alg ## _vx_finup(shash_desc_ctx(desc), \ - data, datalen, out); \ - } - -CRC32_VX_FINUP(crc32le, crc32_le_vx) -CRC32_VX_FINUP(crc32be, crc32_be_vx) -CRC32_VX_FINUP(crc32c, crc32c_le_vx) - -#define CRC32_VX_DIGEST(alg, func) \ - static int alg ## _vx_digest(struct shash_desc *desc, const u8 *data, \ - unsigned int len, u8 *out) \ - { \ - return __ ## alg ## _vx_finup(crypto_shash_ctx(desc->tfm), \ - data, len, out); \ - } - -CRC32_VX_DIGEST(crc32le, crc32_le_vx) -CRC32_VX_DIGEST(crc32be, crc32_be_vx) -CRC32_VX_DIGEST(crc32c, crc32c_le_vx) - -#define CRC32_VX_UPDATE(alg, func) \ - static int alg ## _vx_update(struct shash_desc *desc, const u8 *data, \ - unsigned int datalen) \ - { \ - struct crc_desc_ctx *ctx = shash_desc_ctx(desc); \ - ctx->crc = func(ctx->crc, data, datalen); \ - return 0; \ - } - -CRC32_VX_UPDATE(crc32le, crc32_le_vx) -CRC32_VX_UPDATE(crc32be, crc32_be_vx) -CRC32_VX_UPDATE(crc32c, crc32c_le_vx) - - -static struct shash_alg crc32_vx_algs[] = { - /* CRC-32 LE */ - { - .init = crc32_vx_init, - .setkey = crc32_vx_setkey, - .update = crc32le_vx_update, - .final = crc32le_vx_final, - .finup = crc32le_vx_finup, - .digest = crc32le_vx_digest, - .descsize = sizeof(struct crc_desc_ctx), - .digestsize = CRC32_DIGEST_SIZE, - .base = { - .cra_name = "crc32", - .cra_driver_name = "crc32-vx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize = CRC32_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crc_ctx), - .cra_module = THIS_MODULE, - .cra_init = crc32_vx_cra_init_zero, - }, - }, - /* CRC-32 BE */ - { - .init = crc32_vx_init, - .setkey = crc32be_vx_setkey, - .update = crc32be_vx_update, - .final = crc32be_vx_final, - .finup = crc32be_vx_finup, - .digest = crc32be_vx_digest, - .descsize = sizeof(struct crc_desc_ctx), - .digestsize = CRC32_DIGEST_SIZE, - .base = { - .cra_name = "crc32be", - .cra_driver_name = "crc32be-vx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize = CRC32_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crc_ctx), - .cra_module = THIS_MODULE, - .cra_init = crc32_vx_cra_init_zero, - }, - }, - /* CRC-32C LE */ - { - .init = crc32_vx_init, - .setkey = crc32_vx_setkey, - .update = crc32c_vx_update, - .final = crc32c_vx_final, - .finup = crc32c_vx_finup, - .digest = crc32c_vx_digest, - .descsize = sizeof(struct crc_desc_ctx), - .digestsize = CRC32_DIGEST_SIZE, - .base = { - .cra_name = "crc32c", - .cra_driver_name = "crc32c-vx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize = CRC32_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crc_ctx), - .cra_module = THIS_MODULE, - .cra_init = crc32_vx_cra_init_invert, - }, - }, -}; - - -static int __init crc_vx_mod_init(void) -{ - return crypto_register_shashes(crc32_vx_algs, - ARRAY_SIZE(crc32_vx_algs)); -} - -static void __exit crc_vx_mod_exit(void) -{ - crypto_unregister_shashes(crc32_vx_algs, ARRAY_SIZE(crc32_vx_algs)); -} - -module_cpu_feature_match(S390_CPU_FEATURE_VXRS, crc_vx_mod_init); -module_exit(crc_vx_mod_exit); - -MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>"); -MODULE_LICENSE("GPL"); - -MODULE_ALIAS_CRYPTO("crc32"); -MODULE_ALIAS_CRYPTO("crc32-vx"); -MODULE_ALIAS_CRYPTO("crc32c"); -MODULE_ALIAS_CRYPTO("crc32c-vx"); diff --git a/arch/s390/crypto/crc32be-vx.S b/arch/s390/crypto/crc32be-vx.S deleted file mode 100644 index 34ee47926891..000000000000 --- a/arch/s390/crypto/crc32be-vx.S +++ /dev/null @@ -1,213 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Hardware-accelerated CRC-32 variants for Linux on z Systems - * - * Use the z/Architecture Vector Extension Facility to accelerate the - * computing of CRC-32 checksums. - * - * This CRC-32 implementation algorithm processes the most-significant - * bit first (BE). - * - * Copyright IBM Corp. 2015 - * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> - */ - -#include <linux/linkage.h> -#include <asm/nospec-insn.h> -#include <asm/vx-insn.h> - -/* Vector register range containing CRC-32 constants */ -#define CONST_R1R2 %v9 -#define CONST_R3R4 %v10 -#define CONST_R5 %v11 -#define CONST_R6 %v12 -#define CONST_RU_POLY %v13 -#define CONST_CRC_POLY %v14 - - .data - .balign 8 - -/* - * The CRC-32 constant block contains reduction constants to fold and - * process particular chunks of the input data stream in parallel. - * - * For the CRC-32 variants, the constants are precomputed according to - * these definitions: - * - * R1 = x4*128+64 mod P(x) - * R2 = x4*128 mod P(x) - * R3 = x128+64 mod P(x) - * R4 = x128 mod P(x) - * R5 = x96 mod P(x) - * R6 = x64 mod P(x) - * - * Barret reduction constant, u, is defined as floor(x**64 / P(x)). - * - * where P(x) is the polynomial in the normal domain and the P'(x) is the - * polynomial in the reversed (bitreflected) domain. - * - * Note that the constant definitions below are extended in order to compute - * intermediate results with a single VECTOR GALOIS FIELD MULTIPLY instruction. - * The rightmost doubleword can be 0 to prevent contribution to the result or - * can be multiplied by 1 to perform an XOR without the need for a separate - * VECTOR EXCLUSIVE OR instruction. - * - * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials: - * - * P(x) = 0x04C11DB7 - * P'(x) = 0xEDB88320 - */ - -SYM_DATA_START_LOCAL(constants_CRC_32_BE) - .quad 0x08833794c, 0x0e6228b11 # R1, R2 - .quad 0x0c5b9cd4c, 0x0e8a45605 # R3, R4 - .quad 0x0f200aa66, 1 << 32 # R5, x32 - .quad 0x0490d678d, 1 # R6, 1 - .quad 0x104d101df, 0 # u - .quad 0x104C11DB7, 0 # P(x) -SYM_DATA_END(constants_CRC_32_BE) - - .previous - - GEN_BR_THUNK %r14 - - .text -/* - * The CRC-32 function(s) use these calling conventions: - * - * Parameters: - * - * %r2: Initial CRC value, typically ~0; and final CRC (return) value. - * %r3: Input buffer pointer, performance might be improved if the - * buffer is on a doubleword boundary. - * %r4: Length of the buffer, must be 64 bytes or greater. - * - * Register usage: - * - * %r5: CRC-32 constant pool base pointer. - * V0: Initial CRC value and intermediate constants and results. - * V1..V4: Data for CRC computation. - * V5..V8: Next data chunks that are fetched from the input buffer. - * - * V9..V14: CRC-32 constants. - */ -SYM_FUNC_START(crc32_be_vgfm_16) - /* Load CRC-32 constants */ - larl %r5,constants_CRC_32_BE - VLM CONST_R1R2,CONST_CRC_POLY,0,%r5 - - /* Load the initial CRC value into the leftmost word of V0. */ - VZERO %v0 - VLVGF %v0,%r2,0 - - /* Load a 64-byte data chunk and XOR with CRC */ - VLM %v1,%v4,0,%r3 /* 64-bytes into V1..V4 */ - VX %v1,%v0,%v1 /* V1 ^= CRC */ - aghi %r3,64 /* BUF = BUF + 64 */ - aghi %r4,-64 /* LEN = LEN - 64 */ - - /* Check remaining buffer size and jump to proper folding method */ - cghi %r4,64 - jl .Lless_than_64bytes - -.Lfold_64bytes_loop: - /* Load the next 64-byte data chunk into V5 to V8 */ - VLM %v5,%v8,0,%r3 - - /* - * Perform a GF(2) multiplication of the doublewords in V1 with - * the reduction constants in V0. The intermediate result is - * then folded (accumulated) with the next data chunk in V5 and - * stored in V1. Repeat this step for the register contents - * in V2, V3, and V4 respectively. - */ - VGFMAG %v1,CONST_R1R2,%v1,%v5 - VGFMAG %v2,CONST_R1R2,%v2,%v6 - VGFMAG %v3,CONST_R1R2,%v3,%v7 - VGFMAG %v4,CONST_R1R2,%v4,%v8 - - /* Adjust buffer pointer and length for next loop */ - aghi %r3,64 /* BUF = BUF + 64 */ - aghi %r4,-64 /* LEN = LEN - 64 */ - - cghi %r4,64 - jnl .Lfold_64bytes_loop - -.Lless_than_64bytes: - /* Fold V1 to V4 into a single 128-bit value in V1 */ - VGFMAG %v1,CONST_R3R4,%v1,%v2 - VGFMAG %v1,CONST_R3R4,%v1,%v3 - VGFMAG %v1,CONST_R3R4,%v1,%v4 - - /* Check whether to continue with 64-bit folding */ - cghi %r4,16 - jl .Lfinal_fold - -.Lfold_16bytes_loop: - - VL %v2,0,,%r3 /* Load next data chunk */ - VGFMAG %v1,CONST_R3R4,%v1,%v2 /* Fold next data chunk */ - - /* Adjust buffer pointer and size for folding next data chunk */ - aghi %r3,16 - aghi %r4,-16 - - /* Process remaining data chunks */ - cghi %r4,16 - jnl .Lfold_16bytes_loop - -.Lfinal_fold: - /* - * The R5 constant is used to fold a 128-bit value into an 96-bit value - * that is XORed with the next 96-bit input data chunk. To use a single - * VGFMG instruction, multiply the rightmost 64-bit with x^32 (1<<32) to - * form an intermediate 96-bit value (with appended zeros) which is then - * XORed with the intermediate reduction result. - */ - VGFMG %v1,CONST_R5,%v1 - - /* - * Further reduce the remaining 96-bit value to a 64-bit value using a - * single VGFMG, the rightmost doubleword is multiplied with 0x1. The - * intermediate result is then XORed with the product of the leftmost - * doubleword with R6. The result is a 64-bit value and is subject to - * the Barret reduction. - */ - VGFMG %v1,CONST_R6,%v1 - - /* - * The input values to the Barret reduction are the degree-63 polynomial - * in V1 (R(x)), degree-32 generator polynomial, and the reduction - * constant u. The Barret reduction result is the CRC value of R(x) mod - * P(x). - * - * The Barret reduction algorithm is defined as: - * - * 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u - * 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x) - * 3. C(x) = R(x) XOR T2(x) mod x^32 - * - * Note: To compensate the division by x^32, use the vector unpack - * instruction to move the leftmost word into the leftmost doubleword - * of the vector register. The rightmost doubleword is multiplied - * with zero to not contribute to the intermediate results. - */ - - /* T1(x) = floor( R(x) / x^32 ) GF2MUL u */ - VUPLLF %v2,%v1 - VGFMG %v2,CONST_RU_POLY,%v2 - - /* - * Compute the GF(2) product of the CRC polynomial in VO with T1(x) in - * V2 and XOR the intermediate result, T2(x), with the value in V1. - * The final result is in the rightmost word of V2. - */ - VUPLLF %v2,%v2 - VGFMAG %v2,CONST_CRC_POLY,%v2,%v1 - -.Ldone: - VLGVF %r2,%v2,3 - BR_EX %r14 -SYM_FUNC_END(crc32_be_vgfm_16) - -.previous diff --git a/arch/s390/crypto/crc32le-vx.S b/arch/s390/crypto/crc32le-vx.S deleted file mode 100644 index 5a819ae09a0b..000000000000 --- a/arch/s390/crypto/crc32le-vx.S +++ /dev/null @@ -1,275 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Hardware-accelerated CRC-32 variants for Linux on z Systems - * - * Use the z/Architecture Vector Extension Facility to accelerate the - * computing of bitreflected CRC-32 checksums for IEEE 802.3 Ethernet - * and Castagnoli. - * - * This CRC-32 implementation algorithm is bitreflected and processes - * the least-significant bit first (Little-Endian). - * - * Copyright IBM Corp. 2015 - * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> - */ - -#include <linux/linkage.h> -#include <asm/nospec-insn.h> -#include <asm/vx-insn.h> - -/* Vector register range containing CRC-32 constants */ -#define CONST_PERM_LE2BE %v9 -#define CONST_R2R1 %v10 -#define CONST_R4R3 %v11 -#define CONST_R5 %v12 -#define CONST_RU_POLY %v13 -#define CONST_CRC_POLY %v14 - - .data - .balign 8 - -/* - * The CRC-32 constant block contains reduction constants to fold and - * process particular chunks of the input data stream in parallel. - * - * For the CRC-32 variants, the constants are precomputed according to - * these definitions: - * - * R1 = [(x4*128+32 mod P'(x) << 32)]' << 1 - * R2 = [(x4*128-32 mod P'(x) << 32)]' << 1 - * R3 = [(x128+32 mod P'(x) << 32)]' << 1 - * R4 = [(x128-32 mod P'(x) << 32)]' << 1 - * R5 = [(x64 mod P'(x) << 32)]' << 1 - * R6 = [(x32 mod P'(x) << 32)]' << 1 - * - * The bitreflected Barret reduction constant, u', is defined as - * the bit reversal of floor(x**64 / P(x)). - * - * where P(x) is the polynomial in the normal domain and the P'(x) is the - * polynomial in the reversed (bitreflected) domain. - * - * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials: - * - * P(x) = 0x04C11DB7 - * P'(x) = 0xEDB88320 - * - * CRC-32C (Castagnoli) polynomials: - * - * P(x) = 0x1EDC6F41 - * P'(x) = 0x82F63B78 - */ - -SYM_DATA_START_LOCAL(constants_CRC_32_LE) - .octa 0x0F0E0D0C0B0A09080706050403020100 # BE->LE mask - .quad 0x1c6e41596, 0x154442bd4 # R2, R1 - .quad 0x0ccaa009e, 0x1751997d0 # R4, R3 - .octa 0x163cd6124 # R5 - .octa 0x1F7011641 # u' - .octa 0x1DB710641 # P'(x) << 1 -SYM_DATA_END(constants_CRC_32_LE) - -SYM_DATA_START_LOCAL(constants_CRC_32C_LE) - .octa 0x0F0E0D0C0B0A09080706050403020100 # BE->LE mask - .quad 0x09e4addf8, 0x740eef02 # R2, R1 - .quad 0x14cd00bd6, 0xf20c0dfe # R4, R3 - .octa 0x0dd45aab8 # R5 - .octa 0x0dea713f1 # u' - .octa 0x105ec76f0 # P'(x) << 1 -SYM_DATA_END(constants_CRC_32C_LE) - - .previous - - GEN_BR_THUNK %r14 - - .text - -/* - * The CRC-32 functions use these calling conventions: - * - * Parameters: - * - * %r2: Initial CRC value, typically ~0; and final CRC (return) value. - * %r3: Input buffer pointer, performance might be improved if the - * buffer is on a doubleword boundary. - * %r4: Length of the buffer, must be 64 bytes or greater. - * - * Register usage: - * - * %r5: CRC-32 constant pool base pointer. - * V0: Initial CRC value and intermediate constants and results. - * V1..V4: Data for CRC computation. - * V5..V8: Next data chunks that are fetched from the input buffer. - * V9: Constant for BE->LE conversion and shift operations - * - * V10..V14: CRC-32 constants. - */ - -SYM_FUNC_START(crc32_le_vgfm_16) - larl %r5,constants_CRC_32_LE - j crc32_le_vgfm_generic -SYM_FUNC_END(crc32_le_vgfm_16) - -SYM_FUNC_START(crc32c_le_vgfm_16) - larl %r5,constants_CRC_32C_LE - j crc32_le_vgfm_generic -SYM_FUNC_END(crc32c_le_vgfm_16) - -SYM_FUNC_START(crc32_le_vgfm_generic) - /* Load CRC-32 constants */ - VLM CONST_PERM_LE2BE,CONST_CRC_POLY,0,%r5 - - /* - * Load the initial CRC value. - * - * The CRC value is loaded into the rightmost word of the - * vector register and is later XORed with the LSB portion - * of the loaded input data. - */ - VZERO %v0 /* Clear V0 */ - VLVGF %v0,%r2,3 /* Load CRC into rightmost word */ - - /* Load a 64-byte data chunk and XOR with CRC */ - VLM %v1,%v4,0,%r3 /* 64-bytes into V1..V4 */ - VPERM %v1,%v1,%v1,CONST_PERM_LE2BE - VPERM %v2,%v2,%v2,CONST_PERM_LE2BE - VPERM %v3,%v3,%v3,CONST_PERM_LE2BE - VPERM %v4,%v4,%v4,CONST_PERM_LE2BE - - VX %v1,%v0,%v1 /* V1 ^= CRC */ - aghi %r3,64 /* BUF = BUF + 64 */ - aghi %r4,-64 /* LEN = LEN - 64 */ - - cghi %r4,64 - jl .Lless_than_64bytes - -.Lfold_64bytes_loop: - /* Load the next 64-byte data chunk into V5 to V8 */ - VLM %v5,%v8,0,%r3 - VPERM %v5,%v5,%v5,CONST_PERM_LE2BE - VPERM %v6,%v6,%v6,CONST_PERM_LE2BE - VPERM %v7,%v7,%v7,CONST_PERM_LE2BE - VPERM %v8,%v8,%v8,CONST_PERM_LE2BE - - /* - * Perform a GF(2) multiplication of the doublewords in V1 with - * the R1 and R2 reduction constants in V0. The intermediate result - * is then folded (accumulated) with the next data chunk in V5 and - * stored in V1. Repeat this step for the register contents - * in V2, V3, and V4 respectively. - */ - VGFMAG %v1,CONST_R2R1,%v1,%v5 - VGFMAG %v2,CONST_R2R1,%v2,%v6 - VGFMAG %v3,CONST_R2R1,%v3,%v7 - VGFMAG %v4,CONST_R2R1,%v4,%v8 - - aghi %r3,64 /* BUF = BUF + 64 */ - aghi %r4,-64 /* LEN = LEN - 64 */ - - cghi %r4,64 - jnl .Lfold_64bytes_loop - -.Lless_than_64bytes: - /* - * Fold V1 to V4 into a single 128-bit value in V1. Multiply V1 with R3 - * and R4 and accumulating the next 128-bit chunk until a single 128-bit - * value remains. - */ - VGFMAG %v1,CONST_R4R3,%v1,%v2 - VGFMAG %v1,CONST_R4R3,%v1,%v3 - VGFMAG %v1,CONST_R4R3,%v1,%v4 - - cghi %r4,16 - jl .Lfinal_fold - -.Lfold_16bytes_loop: - - VL %v2,0,,%r3 /* Load next data chunk */ - VPERM %v2,%v2,%v2,CONST_PERM_LE2BE - VGFMAG %v1,CONST_R4R3,%v1,%v2 /* Fold next data chunk */ - - aghi %r3,16 - aghi %r4,-16 - - cghi %r4,16 - jnl .Lfold_16bytes_loop - -.Lfinal_fold: - /* - * Set up a vector register for byte shifts. The shift value must - * be loaded in bits 1-4 in byte element 7 of a vector register. - * Shift by 8 bytes: 0x40 - * Shift by 4 bytes: 0x20 - */ - VLEIB %v9,0x40,7 - - /* - * Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes - * to move R4 into the rightmost doubleword and set the leftmost - * doubleword to 0x1. - */ - VSRLB %v0,CONST_R4R3,%v9 - VLEIG %v0,1,0 - - /* - * Compute GF(2) product of V1 and V0. The rightmost doubleword - * of V1 is multiplied with R4. The leftmost doubleword of V1 is - * multiplied by 0x1 and is then XORed with rightmost product. - * Implicitly, the intermediate leftmost product becomes padded - */ - VGFMG %v1,%v0,%v1 - - /* - * Now do the final 32-bit fold by multiplying the rightmost word - * in V1 with R5 and XOR the result with the remaining bits in V1. - * - * To achieve this by a single VGFMAG, right shift V1 by a word - * and store the result in V2 which is then accumulated. Use the - * vector unpack instruction to load the rightmost half of the - * doubleword into the rightmost doubleword element of V1; the other - * half is loaded in the leftmost doubleword. - * The vector register with CONST_R5 contains the R5 constant in the - * rightmost doubleword and the leftmost doubleword is zero to ignore - * the leftmost product of V1. - */ - VLEIB %v9,0x20,7 /* Shift by words */ - VSRLB %v2,%v1,%v9 /* Store remaining bits in V2 */ - VUPLLF %v1,%v1 /* Split rightmost doubleword */ - VGFMAG %v1,CONST_R5,%v1,%v2 /* V1 = (V1 * R5) XOR V2 */ - - /* - * Apply a Barret reduction to compute the final 32-bit CRC value. - * - * The input values to the Barret reduction are the degree-63 polynomial - * in V1 (R(x)), degree-32 generator polynomial, and the reduction - * constant u. The Barret reduction result is the CRC value of R(x) mod - * P(x). - * - * The Barret reduction algorithm is defined as: - * - * 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u - * 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x) - * 3. C(x) = R(x) XOR T2(x) mod x^32 - * - * Note: The leftmost doubleword of vector register containing - * CONST_RU_POLY is zero and, thus, the intermediate GF(2) product - * is zero and does not contribute to the final result. - */ - - /* T1(x) = floor( R(x) / x^32 ) GF2MUL u */ - VUPLLF %v2,%v1 - VGFMG %v2,CONST_RU_POLY,%v2 - - /* - * Compute the GF(2) product of the CRC polynomial with T1(x) in - * V2 and XOR the intermediate result, T2(x), with the value in V1. - * The final result is stored in word element 2 of V2. - */ - VUPLLF %v2,%v2 - VGFMAG %v2,CONST_CRC_POLY,%v2,%v1 - -.Ldone: - VLGVF %r2,%v2,2 - BR_EX %r14 -SYM_FUNC_END(crc32_le_vgfm_generic) - -.previous diff --git a/arch/s390/crypto/hmac_s390.c b/arch/s390/crypto/hmac_s390.c new file mode 100644 index 000000000000..bba9a818dfdc --- /dev/null +++ b/arch/s390/crypto/hmac_s390.c @@ -0,0 +1,359 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright IBM Corp. 2024 + * + * s390 specific HMAC support. + */ + +#define KMSG_COMPONENT "hmac_s390" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <asm/cpacf.h> +#include <crypto/sha2.h> +#include <crypto/internal/hash.h> +#include <linux/cpufeature.h> +#include <linux/module.h> + +/* + * KMAC param block layout for sha2 function codes: + * The layout of the param block for the KMAC instruction depends on the + * blocksize of the used hashing sha2-algorithm function codes. The param block + * contains the hash chaining value (cv), the input message bit-length (imbl) + * and the hmac-secret (key). To prevent code duplication, the sizes of all + * these are calculated based on the blocksize. + * + * param-block: + * +-------+ + * | cv | + * +-------+ + * | imbl | + * +-------+ + * | key | + * +-------+ + * + * sizes: + * part | sh2-alg | calculation | size | type + * -----+---------+-------------+------+-------- + * cv | 224/256 | blocksize/2 | 32 | u64[8] + * | 384/512 | | 64 | u128[8] + * imbl | 224/256 | blocksize/8 | 8 | u64 + * | 384/512 | | 16 | u128 + * key | 224/256 | blocksize | 64 | u8[64] + * | 384/512 | | 128 | u8[128] + */ + +#define MAX_DIGEST_SIZE SHA512_DIGEST_SIZE +#define MAX_IMBL_SIZE sizeof(u128) +#define MAX_BLOCK_SIZE SHA512_BLOCK_SIZE + +#define SHA2_CV_SIZE(bs) ((bs) >> 1) +#define SHA2_IMBL_SIZE(bs) ((bs) >> 3) + +#define SHA2_IMBL_OFFSET(bs) (SHA2_CV_SIZE(bs)) +#define SHA2_KEY_OFFSET(bs) (SHA2_CV_SIZE(bs) + SHA2_IMBL_SIZE(bs)) + +struct s390_hmac_ctx { + u8 key[MAX_BLOCK_SIZE]; +}; + +union s390_kmac_gr0 { + unsigned long reg; + struct { + unsigned long : 48; + unsigned long ikp : 1; + unsigned long iimp : 1; + unsigned long ccup : 1; + unsigned long : 6; + unsigned long fc : 7; + }; +}; + +struct s390_kmac_sha2_ctx { + u8 param[MAX_DIGEST_SIZE + MAX_IMBL_SIZE + MAX_BLOCK_SIZE]; + union s390_kmac_gr0 gr0; + u8 buf[MAX_BLOCK_SIZE]; + unsigned int buflen; +}; + +/* + * kmac_sha2_set_imbl - sets the input message bit-length based on the blocksize + */ +static inline void kmac_sha2_set_imbl(u8 *param, unsigned int buflen, + unsigned int blocksize) +{ + u8 *imbl = param + SHA2_IMBL_OFFSET(blocksize); + + switch (blocksize) { + case SHA256_BLOCK_SIZE: + *(u64 *)imbl = (u64)buflen * BITS_PER_BYTE; + break; + case SHA512_BLOCK_SIZE: + *(u128 *)imbl = (u128)buflen * BITS_PER_BYTE; + break; + default: + break; + } +} + +static int hash_key(const u8 *in, unsigned int inlen, + u8 *digest, unsigned int digestsize) +{ + unsigned long func; + union { + struct sha256_paramblock { + u32 h[8]; + u64 mbl; + } sha256; + struct sha512_paramblock { + u64 h[8]; + u128 mbl; + } sha512; + } __packed param; + +#define PARAM_INIT(x, y, z) \ + param.sha##x.h[0] = SHA##y ## _H0; \ + param.sha##x.h[1] = SHA##y ## _H1; \ + param.sha##x.h[2] = SHA##y ## _H2; \ + param.sha##x.h[3] = SHA##y ## _H3; \ + param.sha##x.h[4] = SHA##y ## _H4; \ + param.sha##x.h[5] = SHA##y ## _H5; \ + param.sha##x.h[6] = SHA##y ## _H6; \ + param.sha##x.h[7] = SHA##y ## _H7; \ + param.sha##x.mbl = (z) + + switch (digestsize) { + case SHA224_DIGEST_SIZE: + func = CPACF_KLMD_SHA_256; + PARAM_INIT(256, 224, inlen * 8); + break; + case SHA256_DIGEST_SIZE: + func = CPACF_KLMD_SHA_256; + PARAM_INIT(256, 256, inlen * 8); + break; + case SHA384_DIGEST_SIZE: + func = CPACF_KLMD_SHA_512; + PARAM_INIT(512, 384, inlen * 8); + break; + case SHA512_DIGEST_SIZE: + func = CPACF_KLMD_SHA_512; + PARAM_INIT(512, 512, inlen * 8); + break; + default: + return -EINVAL; + } + +#undef PARAM_INIT + + cpacf_klmd(func, ¶m, in, inlen); + + memcpy(digest, ¶m, digestsize); + + return 0; +} + +static int s390_hmac_sha2_setkey(struct crypto_shash *tfm, + const u8 *key, unsigned int keylen) +{ + struct s390_hmac_ctx *tfm_ctx = crypto_shash_ctx(tfm); + unsigned int ds = crypto_shash_digestsize(tfm); + unsigned int bs = crypto_shash_blocksize(tfm); + + memset(tfm_ctx, 0, sizeof(*tfm_ctx)); + + if (keylen > bs) + return hash_key(key, keylen, tfm_ctx->key, ds); + + memcpy(tfm_ctx->key, key, keylen); + return 0; +} + +static int s390_hmac_sha2_init(struct shash_desc *desc) +{ + struct s390_hmac_ctx *tfm_ctx = crypto_shash_ctx(desc->tfm); + struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc); + unsigned int bs = crypto_shash_blocksize(desc->tfm); + + memcpy(ctx->param + SHA2_KEY_OFFSET(bs), + tfm_ctx->key, bs); + + ctx->buflen = 0; + ctx->gr0.reg = 0; + switch (crypto_shash_digestsize(desc->tfm)) { + case SHA224_DIGEST_SIZE: + ctx->gr0.fc = CPACF_KMAC_HMAC_SHA_224; + break; + case SHA256_DIGEST_SIZE: + ctx->gr0.fc = CPACF_KMAC_HMAC_SHA_256; + break; + case SHA384_DIGEST_SIZE: + ctx->gr0.fc = CPACF_KMAC_HMAC_SHA_384; + break; + case SHA512_DIGEST_SIZE: + ctx->gr0.fc = CPACF_KMAC_HMAC_SHA_512; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int s390_hmac_sha2_update(struct shash_desc *desc, + const u8 *data, unsigned int len) +{ + struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc); + unsigned int bs = crypto_shash_blocksize(desc->tfm); + unsigned int offset, n; + + /* check current buffer */ + offset = ctx->buflen % bs; + ctx->buflen += len; + if (offset + len < bs) + goto store; + + /* process one stored block */ + if (offset) { + n = bs - offset; + memcpy(ctx->buf + offset, data, n); + ctx->gr0.iimp = 1; + _cpacf_kmac(&ctx->gr0.reg, ctx->param, ctx->buf, bs); + data += n; + len -= n; + offset = 0; + } + /* process as many blocks as possible */ + if (len >= bs) { + n = (len / bs) * bs; + ctx->gr0.iimp = 1; + _cpacf_kmac(&ctx->gr0.reg, ctx->param, data, n); + data += n; + len -= n; + } +store: + /* store incomplete block in buffer */ + if (len) + memcpy(ctx->buf + offset, data, len); + + return 0; +} + +static int s390_hmac_sha2_final(struct shash_desc *desc, u8 *out) +{ + struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc); + unsigned int bs = crypto_shash_blocksize(desc->tfm); + + ctx->gr0.iimp = 0; + kmac_sha2_set_imbl(ctx->param, ctx->buflen, bs); + _cpacf_kmac(&ctx->gr0.reg, ctx->param, ctx->buf, ctx->buflen % bs); + memcpy(out, ctx->param, crypto_shash_digestsize(desc->tfm)); + + return 0; +} + +static int s390_hmac_sha2_digest(struct shash_desc *desc, + const u8 *data, unsigned int len, u8 *out) +{ + struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc); + unsigned int ds = crypto_shash_digestsize(desc->tfm); + int rc; + + rc = s390_hmac_sha2_init(desc); + if (rc) + return rc; + + ctx->gr0.iimp = 0; + kmac_sha2_set_imbl(ctx->param, len, + crypto_shash_blocksize(desc->tfm)); + _cpacf_kmac(&ctx->gr0.reg, ctx->param, data, len); + memcpy(out, ctx->param, ds); + + return 0; +} + +#define S390_HMAC_SHA2_ALG(x) { \ + .fc = CPACF_KMAC_HMAC_SHA_##x, \ + .alg = { \ + .init = s390_hmac_sha2_init, \ + .update = s390_hmac_sha2_update, \ + .final = s390_hmac_sha2_final, \ + .digest = s390_hmac_sha2_digest, \ + .setkey = s390_hmac_sha2_setkey, \ + .descsize = sizeof(struct s390_kmac_sha2_ctx), \ + .halg = { \ + .digestsize = SHA##x##_DIGEST_SIZE, \ + .base = { \ + .cra_name = "hmac(sha" #x ")", \ + .cra_driver_name = "hmac_s390_sha" #x, \ + .cra_blocksize = SHA##x##_BLOCK_SIZE, \ + .cra_priority = 400, \ + .cra_ctxsize = sizeof(struct s390_hmac_ctx), \ + .cra_module = THIS_MODULE, \ + }, \ + }, \ + }, \ +} + +static struct s390_hmac_alg { + bool registered; + unsigned int fc; + struct shash_alg alg; +} s390_hmac_algs[] = { + S390_HMAC_SHA2_ALG(224), + S390_HMAC_SHA2_ALG(256), + S390_HMAC_SHA2_ALG(384), + S390_HMAC_SHA2_ALG(512), +}; + +static __always_inline void _s390_hmac_algs_unregister(void) +{ + struct s390_hmac_alg *hmac; + int i; + + for (i = ARRAY_SIZE(s390_hmac_algs) - 1; i >= 0; i--) { + hmac = &s390_hmac_algs[i]; + if (!hmac->registered) + continue; + crypto_unregister_shash(&hmac->alg); + } +} + +static int __init hmac_s390_init(void) +{ + struct s390_hmac_alg *hmac; + int i, rc = -ENODEV; + + if (!cpacf_query_func(CPACF_KLMD, CPACF_KLMD_SHA_256)) + return -ENODEV; + if (!cpacf_query_func(CPACF_KLMD, CPACF_KLMD_SHA_512)) + return -ENODEV; + + for (i = 0; i < ARRAY_SIZE(s390_hmac_algs); i++) { + hmac = &s390_hmac_algs[i]; + if (!cpacf_query_func(CPACF_KMAC, hmac->fc)) + continue; + + rc = crypto_register_shash(&hmac->alg); + if (rc) { + pr_err("unable to register %s\n", + hmac->alg.halg.base.cra_name); + goto out; + } + hmac->registered = true; + pr_debug("registered %s\n", hmac->alg.halg.base.cra_name); + } + return rc; +out: + _s390_hmac_algs_unregister(); + return rc; +} + +static void __exit hmac_s390_exit(void) +{ + _s390_hmac_algs_unregister(); +} + +module_cpu_feature_match(S390_CPU_FEATURE_MSA, hmac_s390_init); +module_exit(hmac_s390_exit); + +MODULE_DESCRIPTION("S390 HMAC driver"); +MODULE_LICENSE("GPL"); diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index 55ee5567a5ea..511093713a6f 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -34,14 +34,22 @@ * is called. As paes can handle different kinds of key blobs * and padding is also possible, the limits need to be generous. */ -#define PAES_MIN_KEYSIZE 16 -#define PAES_MAX_KEYSIZE MAXEP11AESKEYBLOBSIZE +#define PAES_MIN_KEYSIZE 16 +#define PAES_MAX_KEYSIZE MAXEP11AESKEYBLOBSIZE +#define PAES_256_PROTKEY_SIZE (32 + 32) /* key + verification pattern */ +#define PXTS_256_PROTKEY_SIZE (32 + 32 + 32) /* k1 + k2 + verification pattern */ static u8 *ctrblk; static DEFINE_MUTEX(ctrblk_lock); static cpacf_mask_t km_functions, kmc_functions, kmctr_functions; +struct paes_protkey { + u32 type; + u32 len; + u8 protkey[PXTS_256_PROTKEY_SIZE]; +}; + struct key_blob { /* * Small keys will be stored in the keybuf. Larger keys are @@ -55,31 +63,43 @@ struct key_blob { unsigned int keylen; }; -static inline int _key_to_kb(struct key_blob *kb, - const u8 *key, - unsigned int keylen) +/* + * make_clrkey_token() - wrap the raw key ck with pkey clearkey token + * information. + * @returns the size of the clearkey token + */ +static inline u32 make_clrkey_token(const u8 *ck, size_t cklen, u8 *dest) { - struct clearkey_header { + struct clrkey_token { u8 type; u8 res0[3]; u8 version; u8 res1[3]; u32 keytype; u32 len; - } __packed * h; + u8 key[]; + } __packed *token = (struct clrkey_token *)dest; + + token->type = 0x00; + token->version = 0x02; + token->keytype = (cklen - 8) >> 3; + token->len = cklen; + memcpy(token->key, ck, cklen); + + return sizeof(*token) + cklen; +} +static inline int _key_to_kb(struct key_blob *kb, + const u8 *key, + unsigned int keylen) +{ switch (keylen) { case 16: case 24: case 32: /* clear key value, prepare pkey clear key token in keybuf */ memset(kb->keybuf, 0, sizeof(kb->keybuf)); - h = (struct clearkey_header *) kb->keybuf; - h->version = 0x02; /* TOKVER_CLEAR_KEY */ - h->keytype = (keylen - 8) >> 3; - h->len = keylen; - memcpy(kb->keybuf + sizeof(*h), key, keylen); - kb->keylen = sizeof(*h) + keylen; + kb->keylen = make_clrkey_token(key, keylen, kb->keybuf); kb->key = kb->keybuf; break; default: @@ -99,6 +119,40 @@ static inline int _key_to_kb(struct key_blob *kb, return 0; } +static inline int _xts_key_to_kb(struct key_blob *kb, + const u8 *key, + unsigned int keylen) +{ + size_t cklen = keylen / 2; + + memset(kb->keybuf, 0, sizeof(kb->keybuf)); + + switch (keylen) { + case 32: + case 64: + /* clear key value, prepare pkey clear key tokens in keybuf */ + kb->key = kb->keybuf; + kb->keylen = make_clrkey_token(key, cklen, kb->key); + kb->keylen += make_clrkey_token(key + cklen, cklen, + kb->key + kb->keylen); + break; + default: + /* other key material, let pkey handle this */ + if (keylen <= sizeof(kb->keybuf)) { + kb->key = kb->keybuf; + } else { + kb->key = kmalloc(keylen, GFP_KERNEL); + if (!kb->key) + return -ENOMEM; + } + memcpy(kb->key, key, keylen); + kb->keylen = keylen; + break; + } + + return 0; +} + static inline void _free_kb_keybuf(struct key_blob *kb) { if (kb->key && kb->key != kb->keybuf @@ -106,53 +160,53 @@ static inline void _free_kb_keybuf(struct key_blob *kb) kfree_sensitive(kb->key); kb->key = NULL; } + memzero_explicit(kb->keybuf, sizeof(kb->keybuf)); } struct s390_paes_ctx { struct key_blob kb; - struct pkey_protkey pk; + struct paes_protkey pk; spinlock_t pk_lock; unsigned long fc; }; struct s390_pxts_ctx { - struct key_blob kb[2]; - struct pkey_protkey pk[2]; + struct key_blob kb; + struct paes_protkey pk[2]; spinlock_t pk_lock; unsigned long fc; }; -static inline int __paes_keyblob2pkey(struct key_blob *kb, - struct pkey_protkey *pk) +static inline int __paes_keyblob2pkey(const u8 *key, unsigned int keylen, + struct paes_protkey *pk) { - int i, ret; + int i, rc = -EIO; - /* try three times in case of failure */ - for (i = 0; i < 3; i++) { - if (i > 0 && ret == -EAGAIN && in_task()) + /* try three times in case of busy card */ + for (i = 0; rc && i < 3; i++) { + if (rc == -EBUSY && in_task()) { if (msleep_interruptible(1000)) return -EINTR; - ret = pkey_keyblob2pkey(kb->key, kb->keylen, - pk->protkey, &pk->len, &pk->type); - if (ret == 0) - break; + } + rc = pkey_key2protkey(key, keylen, pk->protkey, &pk->len, + &pk->type); } - return ret; + return rc; } static inline int __paes_convert_key(struct s390_paes_ctx *ctx) { - int ret; - struct pkey_protkey pkey; + struct paes_protkey pk; + int rc; - pkey.len = sizeof(pkey.protkey); - ret = __paes_keyblob2pkey(&ctx->kb, &pkey); - if (ret) - return ret; + pk.len = sizeof(pk.protkey); + rc = __paes_keyblob2pkey(ctx->kb.key, ctx->kb.keylen, &pk); + if (rc) + return rc; spin_lock_bh(&ctx->pk_lock); - memcpy(&ctx->pk, &pkey, sizeof(pkey)); + memcpy(&ctx->pk, &pk, sizeof(pk)); spin_unlock_bh(&ctx->pk_lock); return 0; @@ -177,8 +231,8 @@ static void ecb_paes_exit(struct crypto_skcipher *tfm) static inline int __ecb_paes_set_key(struct s390_paes_ctx *ctx) { - int rc; unsigned long fc; + int rc; rc = __paes_convert_key(ctx); if (rc) @@ -198,8 +252,8 @@ static inline int __ecb_paes_set_key(struct s390_paes_ctx *ctx) static int ecb_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { - int rc; struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + int rc; _free_kb_keybuf(&ctx->kb); rc = _key_to_kb(&ctx->kb, in_key, key_len); @@ -213,19 +267,19 @@ static int ecb_paes_crypt(struct skcipher_request *req, unsigned long modifier) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - unsigned int nbytes, n, k; - int ret; struct { - u8 key[MAXPROTKEYSIZE]; + u8 key[PAES_256_PROTKEY_SIZE]; } param; + struct skcipher_walk walk; + unsigned int nbytes, n, k; + int rc; - ret = skcipher_walk_virt(&walk, req, false); - if (ret) - return ret; + rc = skcipher_walk_virt(&walk, req, false); + if (rc) + return rc; spin_lock_bh(&ctx->pk_lock); - memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE); + memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE); spin_unlock_bh(&ctx->pk_lock); while ((nbytes = walk.nbytes) != 0) { @@ -234,16 +288,16 @@ static int ecb_paes_crypt(struct skcipher_request *req, unsigned long modifier) k = cpacf_km(ctx->fc | modifier, ¶m, walk.dst.virt.addr, walk.src.virt.addr, n); if (k) - ret = skcipher_walk_done(&walk, nbytes - k); + rc = skcipher_walk_done(&walk, nbytes - k); if (k < n) { if (__paes_convert_key(ctx)) return skcipher_walk_done(&walk, -EIO); spin_lock_bh(&ctx->pk_lock); - memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE); + memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE); spin_unlock_bh(&ctx->pk_lock); } } - return ret; + return rc; } static int ecb_paes_encrypt(struct skcipher_request *req) @@ -292,8 +346,8 @@ static void cbc_paes_exit(struct crypto_skcipher *tfm) static inline int __cbc_paes_set_key(struct s390_paes_ctx *ctx) { - int rc; unsigned long fc; + int rc; rc = __paes_convert_key(ctx); if (rc) @@ -313,8 +367,8 @@ static inline int __cbc_paes_set_key(struct s390_paes_ctx *ctx) static int cbc_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { - int rc; struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + int rc; _free_kb_keybuf(&ctx->kb); rc = _key_to_kb(&ctx->kb, in_key, key_len); @@ -328,21 +382,21 @@ static int cbc_paes_crypt(struct skcipher_request *req, unsigned long modifier) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - unsigned int nbytes, n, k; - int ret; struct { u8 iv[AES_BLOCK_SIZE]; - u8 key[MAXPROTKEYSIZE]; + u8 key[PAES_256_PROTKEY_SIZE]; } param; + struct skcipher_walk walk; + unsigned int nbytes, n, k; + int rc; - ret = skcipher_walk_virt(&walk, req, false); - if (ret) - return ret; + rc = skcipher_walk_virt(&walk, req, false); + if (rc) + return rc; memcpy(param.iv, walk.iv, AES_BLOCK_SIZE); spin_lock_bh(&ctx->pk_lock); - memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE); + memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE); spin_unlock_bh(&ctx->pk_lock); while ((nbytes = walk.nbytes) != 0) { @@ -352,17 +406,17 @@ static int cbc_paes_crypt(struct skcipher_request *req, unsigned long modifier) walk.dst.virt.addr, walk.src.virt.addr, n); if (k) { memcpy(walk.iv, param.iv, AES_BLOCK_SIZE); - ret = skcipher_walk_done(&walk, nbytes - k); + rc = skcipher_walk_done(&walk, nbytes - k); } if (k < n) { if (__paes_convert_key(ctx)) return skcipher_walk_done(&walk, -EIO); spin_lock_bh(&ctx->pk_lock); - memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE); + memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE); spin_unlock_bh(&ctx->pk_lock); } } - return ret; + return rc; } static int cbc_paes_encrypt(struct skcipher_request *req) @@ -397,8 +451,7 @@ static int xts_paes_init(struct crypto_skcipher *tfm) { struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); - ctx->kb[0].key = NULL; - ctx->kb[1].key = NULL; + ctx->kb.key = NULL; spin_lock_init(&ctx->pk_lock); return 0; @@ -408,24 +461,51 @@ static void xts_paes_exit(struct crypto_skcipher *tfm) { struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); - _free_kb_keybuf(&ctx->kb[0]); - _free_kb_keybuf(&ctx->kb[1]); + _free_kb_keybuf(&ctx->kb); } static inline int __xts_paes_convert_key(struct s390_pxts_ctx *ctx) { - struct pkey_protkey pkey0, pkey1; + struct paes_protkey pk0, pk1; + size_t split_keylen; + int rc; - pkey0.len = sizeof(pkey0.protkey); - pkey1.len = sizeof(pkey1.protkey); + pk0.len = sizeof(pk0.protkey); + pk1.len = sizeof(pk1.protkey); - if (__paes_keyblob2pkey(&ctx->kb[0], &pkey0) || - __paes_keyblob2pkey(&ctx->kb[1], &pkey1)) + rc = __paes_keyblob2pkey(ctx->kb.key, ctx->kb.keylen, &pk0); + if (rc) + return rc; + + switch (pk0.type) { + case PKEY_KEYTYPE_AES_128: + case PKEY_KEYTYPE_AES_256: + /* second keytoken required */ + if (ctx->kb.keylen % 2) + return -EINVAL; + split_keylen = ctx->kb.keylen / 2; + + rc = __paes_keyblob2pkey(ctx->kb.key + split_keylen, + split_keylen, &pk1); + if (rc) + return rc; + + if (pk0.type != pk1.type) + return -EINVAL; + break; + case PKEY_KEYTYPE_AES_XTS_128: + case PKEY_KEYTYPE_AES_XTS_256: + /* single key */ + pk1.type = 0; + break; + default: + /* unsupported protected keytype */ return -EINVAL; + } spin_lock_bh(&ctx->pk_lock); - memcpy(&ctx->pk[0], &pkey0, sizeof(pkey0)); - memcpy(&ctx->pk[1], &pkey1, sizeof(pkey1)); + ctx->pk[0] = pk0; + ctx->pk[1] = pk1; spin_unlock_bh(&ctx->pk_lock); return 0; @@ -434,17 +514,30 @@ static inline int __xts_paes_convert_key(struct s390_pxts_ctx *ctx) static inline int __xts_paes_set_key(struct s390_pxts_ctx *ctx) { unsigned long fc; + int rc; - if (__xts_paes_convert_key(ctx)) - return -EINVAL; - - if (ctx->pk[0].type != ctx->pk[1].type) - return -EINVAL; + rc = __xts_paes_convert_key(ctx); + if (rc) + return rc; /* Pick the correct function code based on the protected key type */ - fc = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? CPACF_KM_PXTS_128 : - (ctx->pk[0].type == PKEY_KEYTYPE_AES_256) ? - CPACF_KM_PXTS_256 : 0; + switch (ctx->pk[0].type) { + case PKEY_KEYTYPE_AES_128: + fc = CPACF_KM_PXTS_128; + break; + case PKEY_KEYTYPE_AES_256: + fc = CPACF_KM_PXTS_256; + break; + case PKEY_KEYTYPE_AES_XTS_128: + fc = CPACF_KM_PXTS_128_FULL; + break; + case PKEY_KEYTYPE_AES_XTS_256: + fc = CPACF_KM_PXTS_256_FULL; + break; + default: + fc = 0; + break; + } /* Check if the function code is available */ ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0; @@ -453,24 +546,19 @@ static inline int __xts_paes_set_key(struct s390_pxts_ctx *ctx) } static int xts_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, - unsigned int xts_key_len) + unsigned int in_keylen) { - int rc; struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); u8 ckey[2 * AES_MAX_KEY_SIZE]; - unsigned int ckey_len, key_len; + unsigned int ckey_len; + int rc; - if (xts_key_len % 2) + if ((in_keylen == 32 || in_keylen == 64) && + xts_verify_key(tfm, in_key, in_keylen)) return -EINVAL; - key_len = xts_key_len / 2; - - _free_kb_keybuf(&ctx->kb[0]); - _free_kb_keybuf(&ctx->kb[1]); - rc = _key_to_kb(&ctx->kb[0], in_key, key_len); - if (rc) - return rc; - rc = _key_to_kb(&ctx->kb[1], in_key + key_len, key_len); + _free_kb_keybuf(&ctx->kb); + rc = _xts_key_to_kb(&ctx->kb, in_key, in_keylen); if (rc) return rc; @@ -479,6 +567,13 @@ static int xts_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, return rc; /* + * It is not possible on a single protected key (e.g. full AES-XTS) to + * check, if k1 and k2 are the same. + */ + if (ctx->pk[0].type == PKEY_KEYTYPE_AES_XTS_128 || + ctx->pk[0].type == PKEY_KEYTYPE_AES_XTS_256) + return 0; + /* * xts_verify_key verifies the key length is not odd and makes * sure that the two keys are not the same. This can be done * on the two protected keys as well @@ -490,28 +585,82 @@ static int xts_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, return xts_verify_key(tfm, ckey, 2*ckey_len); } -static int xts_paes_crypt(struct skcipher_request *req, unsigned long modifier) +static int paes_xts_crypt_full(struct skcipher_request *req, + unsigned long modifier) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); + unsigned int keylen, offset, nbytes, n, k; + struct { + u8 key[64]; + u8 tweak[16]; + u8 nap[16]; + u8 wkvp[32]; + } fxts_param = { + .nap = {0}, + }; struct skcipher_walk walk; + int rc; + + rc = skcipher_walk_virt(&walk, req, false); + if (rc) + return rc; + + keylen = (ctx->pk[0].type == PKEY_KEYTYPE_AES_XTS_128) ? 32 : 64; + offset = (ctx->pk[0].type == PKEY_KEYTYPE_AES_XTS_128) ? 32 : 0; + + spin_lock_bh(&ctx->pk_lock); + memcpy(fxts_param.key + offset, ctx->pk[0].protkey, keylen); + memcpy(fxts_param.wkvp, ctx->pk[0].protkey + keylen, + sizeof(fxts_param.wkvp)); + spin_unlock_bh(&ctx->pk_lock); + memcpy(fxts_param.tweak, walk.iv, sizeof(fxts_param.tweak)); + fxts_param.nap[0] = 0x01; /* initial alpha power (1, little-endian) */ + + while ((nbytes = walk.nbytes) != 0) { + /* only use complete blocks */ + n = nbytes & ~(AES_BLOCK_SIZE - 1); + k = cpacf_km(ctx->fc | modifier, fxts_param.key + offset, + walk.dst.virt.addr, walk.src.virt.addr, n); + if (k) + rc = skcipher_walk_done(&walk, nbytes - k); + if (k < n) { + if (__xts_paes_convert_key(ctx)) + return skcipher_walk_done(&walk, -EIO); + spin_lock_bh(&ctx->pk_lock); + memcpy(fxts_param.key + offset, ctx->pk[0].protkey, + keylen); + memcpy(fxts_param.wkvp, ctx->pk[0].protkey + keylen, + sizeof(fxts_param.wkvp)); + spin_unlock_bh(&ctx->pk_lock); + } + } + + return rc; +} + +static int paes_xts_crypt(struct skcipher_request *req, unsigned long modifier) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); unsigned int keylen, offset, nbytes, n, k; - int ret; struct { - u8 key[MAXPROTKEYSIZE]; /* key + verification pattern */ + u8 key[PAES_256_PROTKEY_SIZE]; u8 tweak[16]; u8 block[16]; u8 bit[16]; u8 xts[16]; } pcc_param; struct { - u8 key[MAXPROTKEYSIZE]; /* key + verification pattern */ + u8 key[PAES_256_PROTKEY_SIZE]; u8 init[16]; } xts_param; + struct skcipher_walk walk; + int rc; - ret = skcipher_walk_virt(&walk, req, false); - if (ret) - return ret; + rc = skcipher_walk_virt(&walk, req, false); + if (rc) + return rc; keylen = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 48 : 64; offset = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 16 : 0; @@ -531,7 +680,7 @@ static int xts_paes_crypt(struct skcipher_request *req, unsigned long modifier) k = cpacf_km(ctx->fc | modifier, xts_param.key + offset, walk.dst.virt.addr, walk.src.virt.addr, n); if (k) - ret = skcipher_walk_done(&walk, nbytes - k); + rc = skcipher_walk_done(&walk, nbytes - k); if (k < n) { if (__xts_paes_convert_key(ctx)) return skcipher_walk_done(&walk, -EIO); @@ -542,7 +691,24 @@ static int xts_paes_crypt(struct skcipher_request *req, unsigned long modifier) } } - return ret; + return rc; +} + +static inline int xts_paes_crypt(struct skcipher_request *req, unsigned long modifier) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); + + switch (ctx->fc) { + case CPACF_KM_PXTS_128: + case CPACF_KM_PXTS_256: + return paes_xts_crypt(req, modifier); + case CPACF_KM_PXTS_128_FULL: + case CPACF_KM_PXTS_256_FULL: + return paes_xts_crypt_full(req, modifier); + default: + return -EINVAL; + } } static int xts_paes_encrypt(struct skcipher_request *req) @@ -592,8 +758,8 @@ static void ctr_paes_exit(struct crypto_skcipher *tfm) static inline int __ctr_paes_set_key(struct s390_paes_ctx *ctx) { - int rc; unsigned long fc; + int rc; rc = __paes_convert_key(ctx); if (rc) @@ -614,8 +780,8 @@ static inline int __ctr_paes_set_key(struct s390_paes_ctx *ctx) static int ctr_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { - int rc; struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + int rc; _free_kb_keybuf(&ctx->kb); rc = _key_to_kb(&ctx->kb, in_key, key_len); @@ -645,19 +811,19 @@ static int ctr_paes_crypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); u8 buf[AES_BLOCK_SIZE], *ctrptr; - struct skcipher_walk walk; - unsigned int nbytes, n, k; - int ret, locked; struct { - u8 key[MAXPROTKEYSIZE]; + u8 key[PAES_256_PROTKEY_SIZE]; } param; + struct skcipher_walk walk; + unsigned int nbytes, n, k; + int rc, locked; - ret = skcipher_walk_virt(&walk, req, false); - if (ret) - return ret; + rc = skcipher_walk_virt(&walk, req, false); + if (rc) + return rc; spin_lock_bh(&ctx->pk_lock); - memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE); + memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE); spin_unlock_bh(&ctx->pk_lock); locked = mutex_trylock(&ctrblk_lock); @@ -674,7 +840,7 @@ static int ctr_paes_crypt(struct skcipher_request *req) memcpy(walk.iv, ctrptr + k - AES_BLOCK_SIZE, AES_BLOCK_SIZE); crypto_inc(walk.iv, AES_BLOCK_SIZE); - ret = skcipher_walk_done(&walk, nbytes - k); + rc = skcipher_walk_done(&walk, nbytes - k); } if (k < n) { if (__paes_convert_key(ctx)) { @@ -683,7 +849,7 @@ static int ctr_paes_crypt(struct skcipher_request *req) return skcipher_walk_done(&walk, -EIO); } spin_lock_bh(&ctx->pk_lock); - memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE); + memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE); spin_unlock_bh(&ctx->pk_lock); } } @@ -703,15 +869,15 @@ static int ctr_paes_crypt(struct skcipher_request *req) if (__paes_convert_key(ctx)) return skcipher_walk_done(&walk, -EIO); spin_lock_bh(&ctx->pk_lock); - memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE); + memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE); spin_unlock_bh(&ctx->pk_lock); } memcpy(walk.dst.virt.addr, buf, nbytes); crypto_inc(walk.iv, AES_BLOCK_SIZE); - ret = skcipher_walk_done(&walk, nbytes); + rc = skcipher_walk_done(&walk, nbytes); } - return ret; + return rc; } static struct skcipher_alg ctr_paes_alg = { @@ -751,7 +917,7 @@ static void paes_s390_fini(void) static int __init paes_s390_init(void) { - int ret; + int rc; /* Query available functions for KM, KMC and KMCTR */ cpacf_query(CPACF_KM, &km_functions); @@ -761,23 +927,23 @@ static int __init paes_s390_init(void) if (cpacf_test_func(&km_functions, CPACF_KM_PAES_128) || cpacf_test_func(&km_functions, CPACF_KM_PAES_192) || cpacf_test_func(&km_functions, CPACF_KM_PAES_256)) { - ret = crypto_register_skcipher(&ecb_paes_alg); - if (ret) + rc = crypto_register_skcipher(&ecb_paes_alg); + if (rc) goto out_err; } if (cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_128) || cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_192) || cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_256)) { - ret = crypto_register_skcipher(&cbc_paes_alg); - if (ret) + rc = crypto_register_skcipher(&cbc_paes_alg); + if (rc) goto out_err; } if (cpacf_test_func(&km_functions, CPACF_KM_PXTS_128) || cpacf_test_func(&km_functions, CPACF_KM_PXTS_256)) { - ret = crypto_register_skcipher(&xts_paes_alg); - if (ret) + rc = crypto_register_skcipher(&xts_paes_alg); + if (rc) goto out_err; } @@ -786,24 +952,27 @@ static int __init paes_s390_init(void) cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_256)) { ctrblk = (u8 *) __get_free_page(GFP_KERNEL); if (!ctrblk) { - ret = -ENOMEM; + rc = -ENOMEM; goto out_err; } - ret = crypto_register_skcipher(&ctr_paes_alg); - if (ret) + rc = crypto_register_skcipher(&ctr_paes_alg); + if (rc) goto out_err; } return 0; out_err: paes_s390_fini(); - return ret; + return rc; } module_init(paes_s390_init); module_exit(paes_s390_fini); -MODULE_ALIAS_CRYPTO("paes"); +MODULE_ALIAS_CRYPTO("ecb(paes)"); +MODULE_ALIAS_CRYPTO("cbc(paes)"); +MODULE_ALIAS_CRYPTO("ctr(paes)"); +MODULE_ALIAS_CRYPTO("xts(paes)"); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm with protected keys"); MODULE_LICENSE("GPL"); diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index a077087bc6cc..2becd77df741 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -679,7 +679,7 @@ static ssize_t prng_chunksize_show(struct device *dev, struct device_attribute *attr, char *buf) { - return scnprintf(buf, PAGE_SIZE, "%u\n", prng_chunk_size); + return sysfs_emit(buf, "%u\n", prng_chunk_size); } static DEVICE_ATTR(chunksize, 0444, prng_chunksize_show, NULL); @@ -698,7 +698,7 @@ static ssize_t prng_counter_show(struct device *dev, counter = prng_data->prngws.byte_counter; mutex_unlock(&prng_data->mutex); - return scnprintf(buf, PAGE_SIZE, "%llu\n", counter); + return sysfs_emit(buf, "%llu\n", counter); } static DEVICE_ATTR(byte_counter, 0444, prng_counter_show, NULL); @@ -707,7 +707,7 @@ static ssize_t prng_errorflag_show(struct device *dev, struct device_attribute *attr, char *buf) { - return scnprintf(buf, PAGE_SIZE, "%d\n", prng_errorflag); + return sysfs_emit(buf, "%d\n", prng_errorflag); } static DEVICE_ATTR(errorflag, 0444, prng_errorflag_show, NULL); @@ -717,9 +717,9 @@ static ssize_t prng_mode_show(struct device *dev, char *buf) { if (prng_mode == PRNG_MODE_TDES) - return scnprintf(buf, PAGE_SIZE, "TDES\n"); + return sysfs_emit(buf, "TDES\n"); else - return scnprintf(buf, PAGE_SIZE, "SHA512\n"); + return sysfs_emit(buf, "SHA512\n"); } static DEVICE_ATTR(mode, 0444, prng_mode_show, NULL); @@ -742,7 +742,7 @@ static ssize_t prng_reseed_limit_show(struct device *dev, struct device_attribute *attr, char *buf) { - return scnprintf(buf, PAGE_SIZE, "%u\n", prng_reseed_limit); + return sysfs_emit(buf, "%u\n", prng_reseed_limit); } static ssize_t prng_reseed_limit_store(struct device *dev, struct device_attribute *attr, @@ -773,7 +773,7 @@ static ssize_t prng_strength_show(struct device *dev, struct device_attribute *attr, char *buf) { - return scnprintf(buf, PAGE_SIZE, "256\n"); + return sysfs_emit(buf, "256\n"); } static DEVICE_ATTR(strength, 0444, prng_strength_show, NULL); diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h index 65ea12fc87a1..2bb22db54c31 100644 --- a/arch/s390/crypto/sha.h +++ b/arch/s390/crypto/sha.h @@ -25,6 +25,7 @@ struct s390_sha_ctx { u32 state[CPACF_MAX_PARMBLOCK_SIZE / sizeof(u32)]; u8 buf[SHA_MAX_BLOCK_SIZE]; int func; /* KIMD function to use */ + int first_message_part; }; struct shash_desc; diff --git a/arch/s390/crypto/sha3_256_s390.c b/arch/s390/crypto/sha3_256_s390.c index e1350e033a32..a84ef692f572 100644 --- a/arch/s390/crypto/sha3_256_s390.c +++ b/arch/s390/crypto/sha3_256_s390.c @@ -21,9 +21,11 @@ static int sha3_256_init(struct shash_desc *desc) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - memset(sctx->state, 0, sizeof(sctx->state)); + if (!test_facility(86)) /* msa 12 */ + memset(sctx->state, 0, sizeof(sctx->state)); sctx->count = 0; sctx->func = CPACF_KIMD_SHA3_256; + sctx->first_message_part = 1; return 0; } @@ -36,6 +38,7 @@ static int sha3_256_export(struct shash_desc *desc, void *out) octx->rsiz = sctx->count; memcpy(octx->st, sctx->state, sizeof(octx->st)); memcpy(octx->buf, sctx->buf, sizeof(octx->buf)); + octx->partial = sctx->first_message_part; return 0; } @@ -48,6 +51,7 @@ static int sha3_256_import(struct shash_desc *desc, const void *in) sctx->count = ictx->rsiz; memcpy(sctx->state, ictx->st, sizeof(ictx->st)); memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); + sctx->first_message_part = ictx->partial; sctx->func = CPACF_KIMD_SHA3_256; return 0; @@ -61,6 +65,7 @@ static int sha3_224_import(struct shash_desc *desc, const void *in) sctx->count = ictx->rsiz; memcpy(sctx->state, ictx->st, sizeof(ictx->st)); memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); + sctx->first_message_part = ictx->partial; sctx->func = CPACF_KIMD_SHA3_224; return 0; @@ -88,9 +93,11 @@ static int sha3_224_init(struct shash_desc *desc) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - memset(sctx->state, 0, sizeof(sctx->state)); + if (!test_facility(86)) /* msa 12 */ + memset(sctx->state, 0, sizeof(sctx->state)); sctx->count = 0; sctx->func = CPACF_KIMD_SHA3_224; + sctx->first_message_part = 1; return 0; } diff --git a/arch/s390/crypto/sha3_512_s390.c b/arch/s390/crypto/sha3_512_s390.c index 06c142ed9bb1..07528fc98ff7 100644 --- a/arch/s390/crypto/sha3_512_s390.c +++ b/arch/s390/crypto/sha3_512_s390.c @@ -20,9 +20,11 @@ static int sha3_512_init(struct shash_desc *desc) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - memset(sctx->state, 0, sizeof(sctx->state)); + if (!test_facility(86)) /* msa 12 */ + memset(sctx->state, 0, sizeof(sctx->state)); sctx->count = 0; sctx->func = CPACF_KIMD_SHA3_512; + sctx->first_message_part = 1; return 0; } @@ -37,6 +39,7 @@ static int sha3_512_export(struct shash_desc *desc, void *out) memcpy(octx->st, sctx->state, sizeof(octx->st)); memcpy(octx->buf, sctx->buf, sizeof(octx->buf)); + octx->partial = sctx->first_message_part; return 0; } @@ -52,6 +55,7 @@ static int sha3_512_import(struct shash_desc *desc, const void *in) memcpy(sctx->state, ictx->st, sizeof(ictx->st)); memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); + sctx->first_message_part = ictx->partial; sctx->func = CPACF_KIMD_SHA3_512; return 0; @@ -68,6 +72,7 @@ static int sha3_384_import(struct shash_desc *desc, const void *in) memcpy(sctx->state, ictx->st, sizeof(ictx->st)); memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); + sctx->first_message_part = ictx->partial; sctx->func = CPACF_KIMD_SHA3_384; return 0; @@ -97,9 +102,11 @@ static int sha3_384_init(struct shash_desc *desc) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - memset(sctx->state, 0, sizeof(sctx->state)); + if (!test_facility(86)) /* msa 12 */ + memset(sctx->state, 0, sizeof(sctx->state)); sctx->count = 0; sctx->func = CPACF_KIMD_SHA3_384; + sctx->first_message_part = 1; return 0; } diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c index 686fe7aa192f..961d7d522af1 100644 --- a/arch/s390/crypto/sha_common.c +++ b/arch/s390/crypto/sha_common.c @@ -18,6 +18,7 @@ int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len) struct s390_sha_ctx *ctx = shash_desc_ctx(desc); unsigned int bsize = crypto_shash_blocksize(desc->tfm); unsigned int index, n; + int fc; /* how much is already in the buffer? */ index = ctx->count % bsize; @@ -26,10 +27,16 @@ int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len) if ((index + len) < bsize) goto store; + fc = ctx->func; + if (ctx->first_message_part) + fc |= test_facility(86) ? CPACF_KIMD_NIP : 0; + /* process one stored block */ if (index) { memcpy(ctx->buf + index, data, bsize - index); - cpacf_kimd(ctx->func, ctx->state, ctx->buf, bsize); + cpacf_kimd(fc, ctx->state, ctx->buf, bsize); + ctx->first_message_part = 0; + fc &= ~CPACF_KIMD_NIP; data += bsize - index; len -= bsize - index; index = 0; @@ -38,7 +45,8 @@ int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len) /* process as many blocks as possible */ if (len >= bsize) { n = (len / bsize) * bsize; - cpacf_kimd(ctx->func, ctx->state, data, n); + cpacf_kimd(fc, ctx->state, data, n); + ctx->first_message_part = 0; data += n; len -= n; } @@ -75,7 +83,7 @@ int s390_sha_final(struct shash_desc *desc, u8 *out) unsigned int bsize = crypto_shash_blocksize(desc->tfm); u64 bits; unsigned int n; - int mbl_offset; + int mbl_offset, fc; n = ctx->count % bsize; bits = ctx->count * 8; @@ -109,7 +117,11 @@ int s390_sha_final(struct shash_desc *desc, u8 *out) return -EINVAL; } - cpacf_klmd(ctx->func, ctx->state, ctx->buf, n); + fc = ctx->func; + fc |= test_facility(86) ? CPACF_KLMD_DUFOP : 0; + if (ctx->first_message_part) + fc |= CPACF_KLMD_NIP; + cpacf_klmd(fc, ctx->state, ctx->buf, n); /* copy digest to out */ memcpy(out, ctx->state, crypto_shash_digestsize(desc->tfm)); |