diff options
Diffstat (limited to 'arch/riscv/lib')
-rw-r--r-- | arch/riscv/lib/Makefile | 7 | ||||
-rw-r--r-- | arch/riscv/lib/crc-clmul-consts.h | 122 | ||||
-rw-r--r-- | arch/riscv/lib/crc-clmul-template.h | 265 | ||||
-rw-r--r-- | arch/riscv/lib/crc-clmul.h | 23 | ||||
-rw-r--r-- | arch/riscv/lib/crc-t10dif.c | 24 | ||||
-rw-r--r-- | arch/riscv/lib/crc16_msb.c | 18 | ||||
-rw-r--r-- | arch/riscv/lib/crc32.c | 53 | ||||
-rw-r--r-- | arch/riscv/lib/crc32_lsb.c | 18 | ||||
-rw-r--r-- | arch/riscv/lib/crc32_msb.c | 18 | ||||
-rw-r--r-- | arch/riscv/lib/crc64.c | 34 | ||||
-rw-r--r-- | arch/riscv/lib/crc64_lsb.c | 18 | ||||
-rw-r--r-- | arch/riscv/lib/crc64_msb.c | 18 | ||||
-rw-r--r-- | arch/riscv/lib/crypto/Kconfig | 16 | ||||
-rw-r--r-- | arch/riscv/lib/crypto/Makefile | 7 | ||||
-rw-r--r-- | arch/riscv/lib/crypto/chacha-riscv64-glue.c | 75 | ||||
-rw-r--r-- | arch/riscv/lib/crypto/chacha-riscv64-zvkb.S | 297 | ||||
-rw-r--r-- | arch/riscv/lib/crypto/sha256-riscv64-zvknha_or_zvknhb-zvkb.S | 225 | ||||
-rw-r--r-- | arch/riscv/lib/crypto/sha256.c | 67 | ||||
-rw-r--r-- | arch/riscv/lib/riscv_v_helpers.c | 11 | ||||
-rw-r--r-- | arch/riscv/lib/uaccess.S | 50 | ||||
-rw-r--r-- | arch/riscv/lib/uaccess_vector.S | 15 |
21 files changed, 54 insertions, 1327 deletions
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 0baec92d2f55..bbc031124974 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -1,5 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-y += crypto/ lib-y += delay.o lib-y += memcpy.o lib-y += memset.o @@ -16,12 +15,6 @@ endif lib-$(CONFIG_MMU) += uaccess.o lib-$(CONFIG_64BIT) += tishift.o lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o -obj-$(CONFIG_CRC32_ARCH) += crc32-riscv.o -crc32-riscv-y := crc32.o crc32_msb.o crc32_lsb.o -obj-$(CONFIG_CRC64_ARCH) += crc64-riscv.o -crc64-riscv-y := crc64.o crc64_msb.o crc64_lsb.o -obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-riscv.o -crc-t10dif-riscv-y := crc-t10dif.o crc16_msb.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o lib-$(CONFIG_RISCV_ISA_V) += xor.o lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o diff --git a/arch/riscv/lib/crc-clmul-consts.h b/arch/riscv/lib/crc-clmul-consts.h deleted file mode 100644 index 8d73449235ef..000000000000 --- a/arch/riscv/lib/crc-clmul-consts.h +++ /dev/null @@ -1,122 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * CRC constants generated by: - * - * ./scripts/gen-crc-consts.py riscv_clmul crc16_msb_0x8bb7,crc32_msb_0x04c11db7,crc32_lsb_0xedb88320,crc32_lsb_0x82f63b78,crc64_msb_0x42f0e1eba9ea3693,crc64_lsb_0x9a6c9329ac4bc9b5 - * - * Do not edit manually. - */ - -struct crc_clmul_consts { - unsigned long fold_across_2_longs_const_hi; - unsigned long fold_across_2_longs_const_lo; - unsigned long barrett_reduction_const_1; - unsigned long barrett_reduction_const_2; -}; - -/* - * Constants generated for most-significant-bit-first CRC-16 using - * G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0 - */ -static const struct crc_clmul_consts crc16_msb_0x8bb7_consts __maybe_unused = { -#ifdef CONFIG_64BIT - .fold_across_2_longs_const_hi = 0x0000000000001faa, /* x^192 mod G */ - .fold_across_2_longs_const_lo = 0x000000000000a010, /* x^128 mod G */ - .barrett_reduction_const_1 = 0xfb2d2bfc0e99d245, /* floor(x^79 / G) */ - .barrett_reduction_const_2 = 0x0000000000008bb7, /* G - x^16 */ -#else - .fold_across_2_longs_const_hi = 0x00005890, /* x^96 mod G */ - .fold_across_2_longs_const_lo = 0x0000f249, /* x^64 mod G */ - .barrett_reduction_const_1 = 0xfb2d2bfc, /* floor(x^47 / G) */ - .barrett_reduction_const_2 = 0x00008bb7, /* G - x^16 */ -#endif -}; - -/* - * Constants generated for most-significant-bit-first CRC-32 using - * G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 + - * x^5 + x^4 + x^2 + x^1 + x^0 - */ -static const struct crc_clmul_consts crc32_msb_0x04c11db7_consts __maybe_unused = { -#ifdef CONFIG_64BIT - .fold_across_2_longs_const_hi = 0x00000000c5b9cd4c, /* x^192 mod G */ - .fold_across_2_longs_const_lo = 0x00000000e8a45605, /* x^128 mod G */ - .barrett_reduction_const_1 = 0x826880efa40da72d, /* floor(x^95 / G) */ - .barrett_reduction_const_2 = 0x0000000004c11db7, /* G - x^32 */ -#else - .fold_across_2_longs_const_hi = 0xf200aa66, /* x^96 mod G */ - .fold_across_2_longs_const_lo = 0x490d678d, /* x^64 mod G */ - .barrett_reduction_const_1 = 0x826880ef, /* floor(x^63 / G) */ - .barrett_reduction_const_2 = 0x04c11db7, /* G - x^32 */ -#endif -}; - -/* - * Constants generated for least-significant-bit-first CRC-32 using - * G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 + - * x^5 + x^4 + x^2 + x^1 + x^0 - */ -static const struct crc_clmul_consts crc32_lsb_0xedb88320_consts __maybe_unused = { -#ifdef CONFIG_64BIT - .fold_across_2_longs_const_hi = 0x65673b4600000000, /* x^191 mod G */ - .fold_across_2_longs_const_lo = 0x9ba54c6f00000000, /* x^127 mod G */ - .barrett_reduction_const_1 = 0xb4e5b025f7011641, /* floor(x^95 / G) */ - .barrett_reduction_const_2 = 0x00000000edb88320, /* (G - x^32) * x^32 */ -#else - .fold_across_2_longs_const_hi = 0xccaa009e, /* x^95 mod G */ - .fold_across_2_longs_const_lo = 0xb8bc6765, /* x^63 mod G */ - .barrett_reduction_const_1 = 0xf7011641, /* floor(x^63 / G) */ - .barrett_reduction_const_2 = 0xedb88320, /* (G - x^32) * x^0 */ -#endif -}; - -/* - * Constants generated for least-significant-bit-first CRC-32 using - * G(x) = x^32 + x^28 + x^27 + x^26 + x^25 + x^23 + x^22 + x^20 + x^19 + x^18 + - * x^14 + x^13 + x^11 + x^10 + x^9 + x^8 + x^6 + x^0 - */ -static const struct crc_clmul_consts crc32_lsb_0x82f63b78_consts __maybe_unused = { -#ifdef CONFIG_64BIT - .fold_across_2_longs_const_hi = 0x3743f7bd00000000, /* x^191 mod G */ - .fold_across_2_longs_const_lo = 0x3171d43000000000, /* x^127 mod G */ - .barrett_reduction_const_1 = 0x4869ec38dea713f1, /* floor(x^95 / G) */ - .barrett_reduction_const_2 = 0x0000000082f63b78, /* (G - x^32) * x^32 */ -#else - .fold_across_2_longs_const_hi = 0x493c7d27, /* x^95 mod G */ - .fold_across_2_longs_const_lo = 0xdd45aab8, /* x^63 mod G */ - .barrett_reduction_const_1 = 0xdea713f1, /* floor(x^63 / G) */ - .barrett_reduction_const_2 = 0x82f63b78, /* (G - x^32) * x^0 */ -#endif -}; - -/* - * Constants generated for most-significant-bit-first CRC-64 using - * G(x) = x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 + - * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 + - * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 + - * x^7 + x^4 + x^1 + x^0 - */ -#ifdef CONFIG_64BIT -static const struct crc_clmul_consts crc64_msb_0x42f0e1eba9ea3693_consts __maybe_unused = { - .fold_across_2_longs_const_hi = 0x4eb938a7d257740e, /* x^192 mod G */ - .fold_across_2_longs_const_lo = 0x05f5c3c7eb52fab6, /* x^128 mod G */ - .barrett_reduction_const_1 = 0xabc694e836627c39, /* floor(x^127 / G) */ - .barrett_reduction_const_2 = 0x42f0e1eba9ea3693, /* G - x^64 */ -}; -#endif - -/* - * Constants generated for least-significant-bit-first CRC-64 using - * G(x) = x^64 + x^63 + x^61 + x^59 + x^58 + x^56 + x^55 + x^52 + x^49 + x^48 + - * x^47 + x^46 + x^44 + x^41 + x^37 + x^36 + x^34 + x^32 + x^31 + x^28 + - * x^26 + x^23 + x^22 + x^19 + x^16 + x^13 + x^12 + x^10 + x^9 + x^6 + - * x^4 + x^3 + x^0 - */ -#ifdef CONFIG_64BIT -static const struct crc_clmul_consts crc64_lsb_0x9a6c9329ac4bc9b5_consts __maybe_unused = { - .fold_across_2_longs_const_hi = 0xeadc41fd2ba3d420, /* x^191 mod G */ - .fold_across_2_longs_const_lo = 0x21e9761e252621ac, /* x^127 mod G */ - .barrett_reduction_const_1 = 0x27ecfa329aef9f77, /* floor(x^127 / G) */ - .barrett_reduction_const_2 = 0x9a6c9329ac4bc9b5, /* (G - x^64) * x^0 */ -}; -#endif diff --git a/arch/riscv/lib/crc-clmul-template.h b/arch/riscv/lib/crc-clmul-template.h deleted file mode 100644 index 77187e7f1762..000000000000 --- a/arch/riscv/lib/crc-clmul-template.h +++ /dev/null @@ -1,265 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* Copyright 2025 Google LLC */ - -/* - * This file is a "template" that generates a CRC function optimized using the - * RISC-V Zbc (scalar carryless multiplication) extension. The includer of this - * file must define the following parameters to specify the type of CRC: - * - * crc_t: the data type of the CRC, e.g. u32 for a 32-bit CRC - * LSB_CRC: 0 for a msb (most-significant-bit) first CRC, i.e. natural - * mapping between bits and polynomial coefficients - * 1 for a lsb (least-significant-bit) first CRC, i.e. reflected - * mapping between bits and polynomial coefficients - */ - -#include <asm/byteorder.h> -#include <linux/minmax.h> - -#define CRC_BITS (8 * sizeof(crc_t)) /* a.k.a. 'n' */ - -static inline unsigned long clmul(unsigned long a, unsigned long b) -{ - unsigned long res; - - asm(".option push\n" - ".option arch,+zbc\n" - "clmul %0, %1, %2\n" - ".option pop\n" - : "=r" (res) : "r" (a), "r" (b)); - return res; -} - -static inline unsigned long clmulh(unsigned long a, unsigned long b) -{ - unsigned long res; - - asm(".option push\n" - ".option arch,+zbc\n" - "clmulh %0, %1, %2\n" - ".option pop\n" - : "=r" (res) : "r" (a), "r" (b)); - return res; -} - -static inline unsigned long clmulr(unsigned long a, unsigned long b) -{ - unsigned long res; - - asm(".option push\n" - ".option arch,+zbc\n" - "clmulr %0, %1, %2\n" - ".option pop\n" - : "=r" (res) : "r" (a), "r" (b)); - return res; -} - -/* - * crc_load_long() loads one "unsigned long" of aligned data bytes, producing a - * polynomial whose bit order matches the CRC's bit order. - */ -#ifdef CONFIG_64BIT -# if LSB_CRC -# define crc_load_long(x) le64_to_cpup(x) -# else -# define crc_load_long(x) be64_to_cpup(x) -# endif -#else -# if LSB_CRC -# define crc_load_long(x) le32_to_cpup(x) -# else -# define crc_load_long(x) be32_to_cpup(x) -# endif -#endif - -/* XOR @crc into the end of @msgpoly that represents the high-order terms. */ -static inline unsigned long -crc_clmul_prep(crc_t crc, unsigned long msgpoly) -{ -#if LSB_CRC - return msgpoly ^ crc; -#else - return msgpoly ^ ((unsigned long)crc << (BITS_PER_LONG - CRC_BITS)); -#endif -} - -/* - * Multiply the long-sized @msgpoly by x^n (a.k.a. x^CRC_BITS) and reduce it - * modulo the generator polynomial G. This gives the CRC of @msgpoly. - */ -static inline crc_t -crc_clmul_long(unsigned long msgpoly, const struct crc_clmul_consts *consts) -{ - unsigned long tmp; - - /* - * First step of Barrett reduction with integrated multiplication by - * x^n: calculate floor((msgpoly * x^n) / G). This is the value by - * which G needs to be multiplied to cancel out the x^n and higher terms - * of msgpoly * x^n. Do it using the following formula: - * - * msb-first: - * floor((msgpoly * floor(x^(BITS_PER_LONG-1+n) / G)) / x^(BITS_PER_LONG-1)) - * lsb-first: - * floor((msgpoly * floor(x^(BITS_PER_LONG-1+n) / G) * x) / x^BITS_PER_LONG) - * - * barrett_reduction_const_1 contains floor(x^(BITS_PER_LONG-1+n) / G), - * which fits a long exactly. Using any lower power of x there would - * not carry enough precision through the calculation, while using any - * higher power of x would require extra instructions to handle a wider - * multiplication. In the msb-first case, using this power of x results - * in needing a floored division by x^(BITS_PER_LONG-1), which matches - * what clmulr produces. In the lsb-first case, a factor of x gets - * implicitly introduced by each carryless multiplication (shown as - * '* x' above), and the floored division instead needs to be by - * x^BITS_PER_LONG which matches what clmul produces. - */ -#if LSB_CRC - tmp = clmul(msgpoly, consts->barrett_reduction_const_1); -#else - tmp = clmulr(msgpoly, consts->barrett_reduction_const_1); -#endif - - /* - * Second step of Barrett reduction: - * - * crc := (msgpoly * x^n) + (G * floor((msgpoly * x^n) / G)) - * - * This reduces (msgpoly * x^n) modulo G by adding the appropriate - * multiple of G to it. The result uses only the x^0..x^(n-1) terms. - * HOWEVER, since the unreduced value (msgpoly * x^n) is zero in those - * terms in the first place, it is more efficient to do the equivalent: - * - * crc := ((G - x^n) * floor((msgpoly * x^n) / G)) mod x^n - * - * In the lsb-first case further modify it to the following which avoids - * a shift, as the crc ends up in the physically low n bits from clmulr: - * - * product := ((G - x^n) * x^(BITS_PER_LONG - n)) * floor((msgpoly * x^n) / G) * x - * crc := floor(product / x^(BITS_PER_LONG + 1 - n)) mod x^n - * - * barrett_reduction_const_2 contains the constant multiplier (G - x^n) - * or (G - x^n) * x^(BITS_PER_LONG - n) from the formulas above. The - * cast of the result to crc_t is essential, as it applies the mod x^n! - */ -#if LSB_CRC - return clmulr(tmp, consts->barrett_reduction_const_2); -#else - return clmul(tmp, consts->barrett_reduction_const_2); -#endif -} - -/* Update @crc with the data from @msgpoly. */ -static inline crc_t -crc_clmul_update_long(crc_t crc, unsigned long msgpoly, - const struct crc_clmul_consts *consts) -{ - return crc_clmul_long(crc_clmul_prep(crc, msgpoly), consts); -} - -/* Update @crc with 1 <= @len < sizeof(unsigned long) bytes of data. */ -static inline crc_t -crc_clmul_update_partial(crc_t crc, const u8 *p, size_t len, - const struct crc_clmul_consts *consts) -{ - unsigned long msgpoly; - size_t i; - -#if LSB_CRC - msgpoly = (unsigned long)p[0] << (BITS_PER_LONG - 8); - for (i = 1; i < len; i++) - msgpoly = (msgpoly >> 8) ^ ((unsigned long)p[i] << (BITS_PER_LONG - 8)); -#else - msgpoly = p[0]; - for (i = 1; i < len; i++) - msgpoly = (msgpoly << 8) ^ p[i]; -#endif - - if (len >= sizeof(crc_t)) { - #if LSB_CRC - msgpoly ^= (unsigned long)crc << (BITS_PER_LONG - 8*len); - #else - msgpoly ^= (unsigned long)crc << (8*len - CRC_BITS); - #endif - return crc_clmul_long(msgpoly, consts); - } -#if LSB_CRC - msgpoly ^= (unsigned long)crc << (BITS_PER_LONG - 8*len); - return crc_clmul_long(msgpoly, consts) ^ (crc >> (8*len)); -#else - msgpoly ^= crc >> (CRC_BITS - 8*len); - return crc_clmul_long(msgpoly, consts) ^ (crc << (8*len)); -#endif -} - -static inline crc_t -crc_clmul(crc_t crc, const void *p, size_t len, - const struct crc_clmul_consts *consts) -{ - size_t align; - - /* This implementation assumes that the CRC fits in an unsigned long. */ - BUILD_BUG_ON(sizeof(crc_t) > sizeof(unsigned long)); - - /* If the buffer is not long-aligned, align it. */ - align = (unsigned long)p % sizeof(unsigned long); - if (align && len) { - align = min(sizeof(unsigned long) - align, len); - crc = crc_clmul_update_partial(crc, p, align, consts); - p += align; - len -= align; - } - - if (len >= 4 * sizeof(unsigned long)) { - unsigned long m0, m1; - - m0 = crc_clmul_prep(crc, crc_load_long(p)); - m1 = crc_load_long(p + sizeof(unsigned long)); - p += 2 * sizeof(unsigned long); - len -= 2 * sizeof(unsigned long); - /* - * Main loop. Each iteration starts with a message polynomial - * (x^BITS_PER_LONG)*m0 + m1, then logically extends it by two - * more longs of data to form x^(3*BITS_PER_LONG)*m0 + - * x^(2*BITS_PER_LONG)*m1 + x^BITS_PER_LONG*m2 + m3, then - * "folds" that back into a congruent (modulo G) value that uses - * just m0 and m1 again. This is done by multiplying m0 by the - * precomputed constant (x^(3*BITS_PER_LONG) mod G) and m1 by - * the precomputed constant (x^(2*BITS_PER_LONG) mod G), then - * adding the results to m2 and m3 as appropriate. Each such - * multiplication produces a result twice the length of a long, - * which in RISC-V is two instructions clmul and clmulh. - * - * This could be changed to fold across more than 2 longs at a - * time if there is a CPU that can take advantage of it. - */ - do { - unsigned long p0, p1, p2, p3; - - p0 = clmulh(m0, consts->fold_across_2_longs_const_hi); - p1 = clmul(m0, consts->fold_across_2_longs_const_hi); - p2 = clmulh(m1, consts->fold_across_2_longs_const_lo); - p3 = clmul(m1, consts->fold_across_2_longs_const_lo); - m0 = (LSB_CRC ? p1 ^ p3 : p0 ^ p2) ^ crc_load_long(p); - m1 = (LSB_CRC ? p0 ^ p2 : p1 ^ p3) ^ - crc_load_long(p + sizeof(unsigned long)); - - p += 2 * sizeof(unsigned long); - len -= 2 * sizeof(unsigned long); - } while (len >= 2 * sizeof(unsigned long)); - - crc = crc_clmul_long(m0, consts); - crc = crc_clmul_update_long(crc, m1, consts); - } - - while (len >= sizeof(unsigned long)) { - crc = crc_clmul_update_long(crc, crc_load_long(p), consts); - p += sizeof(unsigned long); - len -= sizeof(unsigned long); - } - - if (len) - crc = crc_clmul_update_partial(crc, p, len, consts); - - return crc; -} diff --git a/arch/riscv/lib/crc-clmul.h b/arch/riscv/lib/crc-clmul.h deleted file mode 100644 index dd1736245815..000000000000 --- a/arch/riscv/lib/crc-clmul.h +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* Copyright 2025 Google LLC */ - -#ifndef _RISCV_CRC_CLMUL_H -#define _RISCV_CRC_CLMUL_H - -#include <linux/types.h> -#include "crc-clmul-consts.h" - -u16 crc16_msb_clmul(u16 crc, const void *p, size_t len, - const struct crc_clmul_consts *consts); -u32 crc32_msb_clmul(u32 crc, const void *p, size_t len, - const struct crc_clmul_consts *consts); -u32 crc32_lsb_clmul(u32 crc, const void *p, size_t len, - const struct crc_clmul_consts *consts); -#ifdef CONFIG_64BIT -u64 crc64_msb_clmul(u64 crc, const void *p, size_t len, - const struct crc_clmul_consts *consts); -u64 crc64_lsb_clmul(u64 crc, const void *p, size_t len, - const struct crc_clmul_consts *consts); -#endif - -#endif /* _RISCV_CRC_CLMUL_H */ diff --git a/arch/riscv/lib/crc-t10dif.c b/arch/riscv/lib/crc-t10dif.c deleted file mode 100644 index e6b0051ccd86..000000000000 --- a/arch/riscv/lib/crc-t10dif.c +++ /dev/null @@ -1,24 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * RISC-V optimized CRC-T10DIF function - * - * Copyright 2025 Google LLC - */ - -#include <asm/hwcap.h> -#include <asm/alternative-macros.h> -#include <linux/crc-t10dif.h> -#include <linux/module.h> - -#include "crc-clmul.h" - -u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len) -{ - if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) - return crc16_msb_clmul(crc, p, len, &crc16_msb_0x8bb7_consts); - return crc_t10dif_generic(crc, p, len); -} -EXPORT_SYMBOL(crc_t10dif_arch); - -MODULE_DESCRIPTION("RISC-V optimized CRC-T10DIF function"); -MODULE_LICENSE("GPL"); diff --git a/arch/riscv/lib/crc16_msb.c b/arch/riscv/lib/crc16_msb.c deleted file mode 100644 index 554d295e95f5..000000000000 --- a/arch/riscv/lib/crc16_msb.c +++ /dev/null @@ -1,18 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * RISC-V optimized most-significant-bit-first CRC16 - * - * Copyright 2025 Google LLC - */ - -#include "crc-clmul.h" - -typedef u16 crc_t; -#define LSB_CRC 0 -#include "crc-clmul-template.h" - -u16 crc16_msb_clmul(u16 crc, const void *p, size_t len, - const struct crc_clmul_consts *consts) -{ - return crc_clmul(crc, p, len, consts); -} diff --git a/arch/riscv/lib/crc32.c b/arch/riscv/lib/crc32.c deleted file mode 100644 index a3188b7d9c40..000000000000 --- a/arch/riscv/lib/crc32.c +++ /dev/null @@ -1,53 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * RISC-V optimized CRC32 functions - * - * Copyright 2025 Google LLC - */ - -#include <asm/hwcap.h> -#include <asm/alternative-macros.h> -#include <linux/crc32.h> -#include <linux/module.h> - -#include "crc-clmul.h" - -u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) -{ - if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) - return crc32_lsb_clmul(crc, p, len, - &crc32_lsb_0xedb88320_consts); - return crc32_le_base(crc, p, len); -} -EXPORT_SYMBOL(crc32_le_arch); - -u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) -{ - if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) - return crc32_msb_clmul(crc, p, len, - &crc32_msb_0x04c11db7_consts); - return crc32_be_base(crc, p, len); -} -EXPORT_SYMBOL(crc32_be_arch); - -u32 crc32c_arch(u32 crc, const u8 *p, size_t len) -{ - if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) - return crc32_lsb_clmul(crc, p, len, - &crc32_lsb_0x82f63b78_consts); - return crc32c_base(crc, p, len); -} -EXPORT_SYMBOL(crc32c_arch); - -u32 crc32_optimizations(void) -{ - if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) - return CRC32_LE_OPTIMIZATION | - CRC32_BE_OPTIMIZATION | - CRC32C_OPTIMIZATION; - return 0; -} -EXPORT_SYMBOL(crc32_optimizations); - -MODULE_DESCRIPTION("RISC-V optimized CRC32 functions"); -MODULE_LICENSE("GPL"); diff --git a/arch/riscv/lib/crc32_lsb.c b/arch/riscv/lib/crc32_lsb.c deleted file mode 100644 index 72fd67e7470c..000000000000 --- a/arch/riscv/lib/crc32_lsb.c +++ /dev/null @@ -1,18 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * RISC-V optimized least-significant-bit-first CRC32 - * - * Copyright 2025 Google LLC - */ - -#include "crc-clmul.h" - -typedef u32 crc_t; -#define LSB_CRC 1 -#include "crc-clmul-template.h" - -u32 crc32_lsb_clmul(u32 crc, const void *p, size_t len, - const struct crc_clmul_consts *consts) -{ - return crc_clmul(crc, p, len, consts); -} diff --git a/arch/riscv/lib/crc32_msb.c b/arch/riscv/lib/crc32_msb.c deleted file mode 100644 index fdbeaccc369f..000000000000 --- a/arch/riscv/lib/crc32_msb.c +++ /dev/null @@ -1,18 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * RISC-V optimized most-significant-bit-first CRC32 - * - * Copyright 2025 Google LLC - */ - -#include "crc-clmul.h" - -typedef u32 crc_t; -#define LSB_CRC 0 -#include "crc-clmul-template.h" - -u32 crc32_msb_clmul(u32 crc, const void *p, size_t len, - const struct crc_clmul_consts *consts) -{ - return crc_clmul(crc, p, len, consts); -} diff --git a/arch/riscv/lib/crc64.c b/arch/riscv/lib/crc64.c deleted file mode 100644 index f0015a27836a..000000000000 --- a/arch/riscv/lib/crc64.c +++ /dev/null @@ -1,34 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * RISC-V optimized CRC64 functions - * - * Copyright 2025 Google LLC - */ - -#include <asm/hwcap.h> -#include <asm/alternative-macros.h> -#include <linux/crc64.h> -#include <linux/module.h> - -#include "crc-clmul.h" - -u64 crc64_be_arch(u64 crc, const u8 *p, size_t len) -{ - if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) - return crc64_msb_clmul(crc, p, len, - &crc64_msb_0x42f0e1eba9ea3693_consts); - return crc64_be_generic(crc, p, len); -} -EXPORT_SYMBOL(crc64_be_arch); - -u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len) -{ - if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) - return crc64_lsb_clmul(crc, p, len, - &crc64_lsb_0x9a6c9329ac4bc9b5_consts); - return crc64_nvme_generic(crc, p, len); -} -EXPORT_SYMBOL(crc64_nvme_arch); - -MODULE_DESCRIPTION("RISC-V optimized CRC64 functions"); -MODULE_LICENSE("GPL"); diff --git a/arch/riscv/lib/crc64_lsb.c b/arch/riscv/lib/crc64_lsb.c deleted file mode 100644 index c5371bb85d90..000000000000 --- a/arch/riscv/lib/crc64_lsb.c +++ /dev/null @@ -1,18 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * RISC-V optimized least-significant-bit-first CRC64 - * - * Copyright 2025 Google LLC - */ - -#include "crc-clmul.h" - -typedef u64 crc_t; -#define LSB_CRC 1 -#include "crc-clmul-template.h" - -u64 crc64_lsb_clmul(u64 crc, const void *p, size_t len, - const struct crc_clmul_consts *consts) -{ - return crc_clmul(crc, p, len, consts); -} diff --git a/arch/riscv/lib/crc64_msb.c b/arch/riscv/lib/crc64_msb.c deleted file mode 100644 index 1925d1dbe225..000000000000 --- a/arch/riscv/lib/crc64_msb.c +++ /dev/null @@ -1,18 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * RISC-V optimized most-significant-bit-first CRC64 - * - * Copyright 2025 Google LLC - */ - -#include "crc-clmul.h" - -typedef u64 crc_t; -#define LSB_CRC 0 -#include "crc-clmul-template.h" - -u64 crc64_msb_clmul(u64 crc, const void *p, size_t len, - const struct crc_clmul_consts *consts) -{ - return crc_clmul(crc, p, len, consts); -} diff --git a/arch/riscv/lib/crypto/Kconfig b/arch/riscv/lib/crypto/Kconfig deleted file mode 100644 index 47c99ea97ce2..000000000000 --- a/arch/riscv/lib/crypto/Kconfig +++ /dev/null @@ -1,16 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -config CRYPTO_CHACHA_RISCV64 - tristate - depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO - default CRYPTO_LIB_CHACHA - select CRYPTO_ARCH_HAVE_LIB_CHACHA - select CRYPTO_LIB_CHACHA_GENERIC - -config CRYPTO_SHA256_RISCV64 - tristate - depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO - default CRYPTO_LIB_SHA256 - select CRYPTO_ARCH_HAVE_LIB_SHA256 - select CRYPTO_ARCH_HAVE_LIB_SHA256_SIMD - select CRYPTO_LIB_SHA256_GENERIC diff --git a/arch/riscv/lib/crypto/Makefile b/arch/riscv/lib/crypto/Makefile deleted file mode 100644 index b7cb877a2c07..000000000000 --- a/arch/riscv/lib/crypto/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -obj-$(CONFIG_CRYPTO_CHACHA_RISCV64) += chacha-riscv64.o -chacha-riscv64-y := chacha-riscv64-glue.o chacha-riscv64-zvkb.o - -obj-$(CONFIG_CRYPTO_SHA256_RISCV64) += sha256-riscv64.o -sha256-riscv64-y := sha256.o sha256-riscv64-zvknha_or_zvknhb-zvkb.o diff --git a/arch/riscv/lib/crypto/chacha-riscv64-glue.c b/arch/riscv/lib/crypto/chacha-riscv64-glue.c deleted file mode 100644 index 8c3f11d79be3..000000000000 --- a/arch/riscv/lib/crypto/chacha-riscv64-glue.c +++ /dev/null @@ -1,75 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * ChaCha stream cipher (RISC-V optimized) - * - * Copyright (C) 2023 SiFive, Inc. - * Author: Jerry Shih <jerry.shih@sifive.com> - */ - -#include <asm/simd.h> -#include <asm/vector.h> -#include <crypto/chacha.h> -#include <crypto/internal/simd.h> -#include <linux/linkage.h> -#include <linux/module.h> - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_zvkb); - -asmlinkage void chacha_zvkb(struct chacha_state *state, const u8 *in, u8 *out, - size_t nblocks, int nrounds); - -void hchacha_block_arch(const struct chacha_state *state, - u32 out[HCHACHA_OUT_WORDS], int nrounds) -{ - hchacha_block_generic(state, out, nrounds); -} -EXPORT_SYMBOL(hchacha_block_arch); - -void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) -{ - u8 block_buffer[CHACHA_BLOCK_SIZE]; - unsigned int full_blocks = bytes / CHACHA_BLOCK_SIZE; - unsigned int tail_bytes = bytes % CHACHA_BLOCK_SIZE; - - if (!static_branch_likely(&use_zvkb) || !crypto_simd_usable()) - return chacha_crypt_generic(state, dst, src, bytes, nrounds); - - kernel_vector_begin(); - if (full_blocks) { - chacha_zvkb(state, src, dst, full_blocks, nrounds); - src += full_blocks * CHACHA_BLOCK_SIZE; - dst += full_blocks * CHACHA_BLOCK_SIZE; - } - if (tail_bytes) { - memcpy(block_buffer, src, tail_bytes); - chacha_zvkb(state, block_buffer, block_buffer, 1, nrounds); - memcpy(dst, block_buffer, tail_bytes); - } - kernel_vector_end(); -} -EXPORT_SYMBOL(chacha_crypt_arch); - -bool chacha_is_arch_optimized(void) -{ - return static_key_enabled(&use_zvkb); -} -EXPORT_SYMBOL(chacha_is_arch_optimized); - -static int __init riscv64_chacha_mod_init(void) -{ - if (riscv_isa_extension_available(NULL, ZVKB) && - riscv_vector_vlen() >= 128) - static_branch_enable(&use_zvkb); - return 0; -} -subsys_initcall(riscv64_chacha_mod_init); - -static void __exit riscv64_chacha_mod_exit(void) -{ -} -module_exit(riscv64_chacha_mod_exit); - -MODULE_DESCRIPTION("ChaCha stream cipher (RISC-V optimized)"); -MODULE_AUTHOR("Jerry Shih <jerry.shih@sifive.com>"); -MODULE_LICENSE("GPL"); diff --git a/arch/riscv/lib/crypto/chacha-riscv64-zvkb.S b/arch/riscv/lib/crypto/chacha-riscv64-zvkb.S deleted file mode 100644 index b777d0b4e379..000000000000 --- a/arch/riscv/lib/crypto/chacha-riscv64-zvkb.S +++ /dev/null @@ -1,297 +0,0 @@ -/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */ -// -// This file is dual-licensed, meaning that you can use it under your -// choice of either of the following two licenses: -// -// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. -// -// Licensed under the Apache License 2.0 (the "License"). You can obtain -// a copy in the file LICENSE in the source distribution or at -// https://www.openssl.org/source/license.html -// -// or -// -// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com> -// Copyright 2024 Google LLC -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// The generated code of this file depends on the following RISC-V extensions: -// - RV64I -// - RISC-V Vector ('V') with VLEN >= 128 -// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb') - -#include <linux/linkage.h> - -.text -.option arch, +zvkb - -#define STATEP a0 -#define INP a1 -#define OUTP a2 -#define NBLOCKS a3 -#define NROUNDS a4 - -#define CONSTS0 a5 -#define CONSTS1 a6 -#define CONSTS2 a7 -#define CONSTS3 t0 -#define TMP t1 -#define VL t2 -#define STRIDE t3 -#define ROUND_CTR t4 -#define KEY0 s0 -#define KEY1 s1 -#define KEY2 s2 -#define KEY3 s3 -#define KEY4 s4 -#define KEY5 s5 -#define KEY6 s6 -#define KEY7 s7 -#define COUNTER s8 -#define NONCE0 s9 -#define NONCE1 s10 -#define NONCE2 s11 - -.macro chacha_round a0, b0, c0, d0, a1, b1, c1, d1, \ - a2, b2, c2, d2, a3, b3, c3, d3 - // a += b; d ^= a; d = rol(d, 16); - vadd.vv \a0, \a0, \b0 - vadd.vv \a1, \a1, \b1 - vadd.vv \a2, \a2, \b2 - vadd.vv \a3, \a3, \b3 - vxor.vv \d0, \d0, \a0 - vxor.vv \d1, \d1, \a1 - vxor.vv \d2, \d2, \a2 - vxor.vv \d3, \d3, \a3 - vror.vi \d0, \d0, 32 - 16 - vror.vi \d1, \d1, 32 - 16 - vror.vi \d2, \d2, 32 - 16 - vror.vi \d3, \d3, 32 - 16 - - // c += d; b ^= c; b = rol(b, 12); - vadd.vv \c0, \c0, \d0 - vadd.vv \c1, \c1, \d1 - vadd.vv \c2, \c2, \d2 - vadd.vv \c3, \c3, \d3 - vxor.vv \b0, \b0, \c0 - vxor.vv \b1, \b1, \c1 - vxor.vv \b2, \b2, \c2 - vxor.vv \b3, \b3, \c3 - vror.vi \b0, \b0, 32 - 12 - vror.vi \b1, \b1, 32 - 12 - vror.vi \b2, \b2, 32 - 12 - vror.vi \b3, \b3, 32 - 12 - - // a += b; d ^= a; d = rol(d, 8); - vadd.vv \a0, \a0, \b0 - vadd.vv \a1, \a1, \b1 - vadd.vv \a2, \a2, \b2 - vadd.vv \a3, \a3, \b3 - vxor.vv \d0, \d0, \a0 - vxor.vv \d1, \d1, \a1 - vxor.vv \d2, \d2, \a2 - vxor.vv \d3, \d3, \a3 - vror.vi \d0, \d0, 32 - 8 - vror.vi \d1, \d1, 32 - 8 - vror.vi \d2, \d2, 32 - 8 - vror.vi \d3, \d3, 32 - 8 - - // c += d; b ^= c; b = rol(b, 7); - vadd.vv \c0, \c0, \d0 - vadd.vv \c1, \c1, \d1 - vadd.vv \c2, \c2, \d2 - vadd.vv \c3, \c3, \d3 - vxor.vv \b0, \b0, \c0 - vxor.vv \b1, \b1, \c1 - vxor.vv \b2, \b2, \c2 - vxor.vv \b3, \b3, \c3 - vror.vi \b0, \b0, 32 - 7 - vror.vi \b1, \b1, 32 - 7 - vror.vi \b2, \b2, 32 - 7 - vror.vi \b3, \b3, 32 - 7 -.endm - -// void chacha_zvkb(struct chacha_state *state, const u8 *in, u8 *out, -// size_t nblocks, int nrounds); -// -// |nblocks| is the number of 64-byte blocks to process, and must be nonzero. -// -// |state| gives the ChaCha state matrix, including the 32-bit counter in -// state->x[12] following the RFC7539 convention; note that this differs from -// the original Salsa20 paper which uses a 64-bit counter in state->x[12..13]. -// The updated 32-bit counter is written back to state->x[12] before returning. -SYM_FUNC_START(chacha_zvkb) - addi sp, sp, -96 - sd s0, 0(sp) - sd s1, 8(sp) - sd s2, 16(sp) - sd s3, 24(sp) - sd s4, 32(sp) - sd s5, 40(sp) - sd s6, 48(sp) - sd s7, 56(sp) - sd s8, 64(sp) - sd s9, 72(sp) - sd s10, 80(sp) - sd s11, 88(sp) - - li STRIDE, 64 - - // Set up the initial state matrix in scalar registers. - lw CONSTS0, 0(STATEP) - lw CONSTS1, 4(STATEP) - lw CONSTS2, 8(STATEP) - lw CONSTS3, 12(STATEP) - lw KEY0, 16(STATEP) - lw KEY1, 20(STATEP) - lw KEY2, 24(STATEP) - lw KEY3, 28(STATEP) - lw KEY4, 32(STATEP) - lw KEY5, 36(STATEP) - lw KEY6, 40(STATEP) - lw KEY7, 44(STATEP) - lw COUNTER, 48(STATEP) - lw NONCE0, 52(STATEP) - lw NONCE1, 56(STATEP) - lw NONCE2, 60(STATEP) - -.Lblock_loop: - // Set vl to the number of blocks to process in this iteration. - vsetvli VL, NBLOCKS, e32, m1, ta, ma - - // Set up the initial state matrix for the next VL blocks in v0-v15. - // v{i} holds the i'th 32-bit word of the state matrix for all blocks. - // Note that only the counter word, at index 12, differs across blocks. - vmv.v.x v0, CONSTS0 - vmv.v.x v1, CONSTS1 - vmv.v.x v2, CONSTS2 - vmv.v.x v3, CONSTS3 - vmv.v.x v4, KEY0 - vmv.v.x v5, KEY1 - vmv.v.x v6, KEY2 - vmv.v.x v7, KEY3 - vmv.v.x v8, KEY4 - vmv.v.x v9, KEY5 - vmv.v.x v10, KEY6 - vmv.v.x v11, KEY7 - vid.v v12 - vadd.vx v12, v12, COUNTER - vmv.v.x v13, NONCE0 - vmv.v.x v14, NONCE1 - vmv.v.x v15, NONCE2 - - // Load the first half of the input data for each block into v16-v23. - // v{16+i} holds the i'th 32-bit word for all blocks. - vlsseg8e32.v v16, (INP), STRIDE - - mv ROUND_CTR, NROUNDS -.Lnext_doubleround: - addi ROUND_CTR, ROUND_CTR, -2 - // column round - chacha_round v0, v4, v8, v12, v1, v5, v9, v13, \ - v2, v6, v10, v14, v3, v7, v11, v15 - // diagonal round - chacha_round v0, v5, v10, v15, v1, v6, v11, v12, \ - v2, v7, v8, v13, v3, v4, v9, v14 - bnez ROUND_CTR, .Lnext_doubleround - - // Load the second half of the input data for each block into v24-v31. - // v{24+i} holds the {8+i}'th 32-bit word for all blocks. - addi TMP, INP, 32 - vlsseg8e32.v v24, (TMP), STRIDE - - // Finalize the first half of the keystream for each block. - vadd.vx v0, v0, CONSTS0 - vadd.vx v1, v1, CONSTS1 - vadd.vx v2, v2, CONSTS2 - vadd.vx v3, v3, CONSTS3 - vadd.vx v4, v4, KEY0 - vadd.vx v5, v5, KEY1 - vadd.vx v6, v6, KEY2 - vadd.vx v7, v7, KEY3 - - // Encrypt/decrypt the first half of the data for each block. - vxor.vv v16, v16, v0 - vxor.vv v17, v17, v1 - vxor.vv v18, v18, v2 - vxor.vv v19, v19, v3 - vxor.vv v20, v20, v4 - vxor.vv v21, v21, v5 - vxor.vv v22, v22, v6 - vxor.vv v23, v23, v7 - - // Store the first half of the output data for each block. - vssseg8e32.v v16, (OUTP), STRIDE - - // Finalize the second half of the keystream for each block. - vadd.vx v8, v8, KEY4 - vadd.vx v9, v9, KEY5 - vadd.vx v10, v10, KEY6 - vadd.vx v11, v11, KEY7 - vid.v v0 - vadd.vx v12, v12, COUNTER - vadd.vx v13, v13, NONCE0 - vadd.vx v14, v14, NONCE1 - vadd.vx v15, v15, NONCE2 - vadd.vv v12, v12, v0 - - // Encrypt/decrypt the second half of the data for each block. - vxor.vv v24, v24, v8 - vxor.vv v25, v25, v9 - vxor.vv v26, v26, v10 - vxor.vv v27, v27, v11 - vxor.vv v29, v29, v13 - vxor.vv v28, v28, v12 - vxor.vv v30, v30, v14 - vxor.vv v31, v31, v15 - - // Store the second half of the output data for each block. - addi TMP, OUTP, 32 - vssseg8e32.v v24, (TMP), STRIDE - - // Update the counter, the remaining number of blocks, and the input and - // output pointers according to the number of blocks processed (VL). - add COUNTER, COUNTER, VL - sub NBLOCKS, NBLOCKS, VL - slli TMP, VL, 6 - add OUTP, OUTP, TMP - add INP, INP, TMP - bnez NBLOCKS, .Lblock_loop - - sw COUNTER, 48(STATEP) - ld s0, 0(sp) - ld s1, 8(sp) - ld s2, 16(sp) - ld s3, 24(sp) - ld s4, 32(sp) - ld s5, 40(sp) - ld s6, 48(sp) - ld s7, 56(sp) - ld s8, 64(sp) - ld s9, 72(sp) - ld s10, 80(sp) - ld s11, 88(sp) - addi sp, sp, 96 - ret -SYM_FUNC_END(chacha_zvkb) diff --git a/arch/riscv/lib/crypto/sha256-riscv64-zvknha_or_zvknhb-zvkb.S b/arch/riscv/lib/crypto/sha256-riscv64-zvknha_or_zvknhb-zvkb.S deleted file mode 100644 index fad501ad0617..000000000000 --- a/arch/riscv/lib/crypto/sha256-riscv64-zvknha_or_zvknhb-zvkb.S +++ /dev/null @@ -1,225 +0,0 @@ -/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */ -// -// This file is dual-licensed, meaning that you can use it under your -// choice of either of the following two licenses: -// -// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. -// -// Licensed under the Apache License 2.0 (the "License"). You can obtain -// a copy in the file LICENSE in the source distribution or at -// https://www.openssl.org/source/license.html -// -// or -// -// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu> -// Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com> -// Copyright 2024 Google LLC -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// The generated code of this file depends on the following RISC-V extensions: -// - RV64I -// - RISC-V Vector ('V') with VLEN >= 128 -// - RISC-V Vector SHA-2 Secure Hash extension ('Zvknha' or 'Zvknhb') -// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb') - -#include <linux/linkage.h> - -.text -.option arch, +zvknha, +zvkb - -#define STATEP a0 -#define DATA a1 -#define NUM_BLOCKS a2 - -#define STATEP_C a3 - -#define MASK v0 -#define INDICES v1 -#define W0 v2 -#define W1 v3 -#define W2 v4 -#define W3 v5 -#define VTMP v6 -#define FEBA v7 -#define HGDC v8 -#define K0 v10 -#define K1 v11 -#define K2 v12 -#define K3 v13 -#define K4 v14 -#define K5 v15 -#define K6 v16 -#define K7 v17 -#define K8 v18 -#define K9 v19 -#define K10 v20 -#define K11 v21 -#define K12 v22 -#define K13 v23 -#define K14 v24 -#define K15 v25 -#define PREV_FEBA v26 -#define PREV_HGDC v27 - -// Do 4 rounds of SHA-256. w0 contains the current 4 message schedule words. -// -// If not all the message schedule words have been computed yet, then this also -// computes 4 more message schedule words. w1-w3 contain the next 3 groups of 4 -// message schedule words; this macro computes the group after w3 and writes it -// to w0. This means that the next (w0, w1, w2, w3) is the current (w1, w2, w3, -// w0), so the caller must cycle through the registers accordingly. -.macro sha256_4rounds last, k, w0, w1, w2, w3 - vadd.vv VTMP, \k, \w0 - vsha2cl.vv HGDC, FEBA, VTMP - vsha2ch.vv FEBA, HGDC, VTMP -.if !\last - vmerge.vvm VTMP, \w2, \w1, MASK - vsha2ms.vv \w0, VTMP, \w3 -.endif -.endm - -.macro sha256_16rounds last, k0, k1, k2, k3 - sha256_4rounds \last, \k0, W0, W1, W2, W3 - sha256_4rounds \last, \k1, W1, W2, W3, W0 - sha256_4rounds \last, \k2, W2, W3, W0, W1 - sha256_4rounds \last, \k3, W3, W0, W1, W2 -.endm - -// void sha256_transform_zvknha_or_zvknhb_zvkb(u32 state[SHA256_STATE_WORDS], -// const u8 *data, size_t nblocks); -SYM_FUNC_START(sha256_transform_zvknha_or_zvknhb_zvkb) - - // Load the round constants into K0-K15. - vsetivli zero, 4, e32, m1, ta, ma - la t0, K256 - vle32.v K0, (t0) - addi t0, t0, 16 - vle32.v K1, (t0) - addi t0, t0, 16 - vle32.v K2, (t0) - addi t0, t0, 16 - vle32.v K3, (t0) - addi t0, t0, 16 - vle32.v K4, (t0) - addi t0, t0, 16 - vle32.v K5, (t0) - addi t0, t0, 16 - vle32.v K6, (t0) - addi t0, t0, 16 - vle32.v K7, (t0) - addi t0, t0, 16 - vle32.v K8, (t0) - addi t0, t0, 16 - vle32.v K9, (t0) - addi t0, t0, 16 - vle32.v K10, (t0) - addi t0, t0, 16 - vle32.v K11, (t0) - addi t0, t0, 16 - vle32.v K12, (t0) - addi t0, t0, 16 - vle32.v K13, (t0) - addi t0, t0, 16 - vle32.v K14, (t0) - addi t0, t0, 16 - vle32.v K15, (t0) - - // Setup mask for the vmerge to replace the first word (idx==0) in - // message scheduling. There are 4 words, so an 8-bit mask suffices. - vsetivli zero, 1, e8, m1, ta, ma - vmv.v.i MASK, 0x01 - - // Load the state. The state is stored as {a,b,c,d,e,f,g,h}, but we - // need {f,e,b,a},{h,g,d,c}. The dst vtype is e32m1 and the index vtype - // is e8mf4. We use index-load with the i8 indices {20, 16, 4, 0}, - // loaded using the 32-bit little endian value 0x00041014. - li t0, 0x00041014 - vsetivli zero, 1, e32, m1, ta, ma - vmv.v.x INDICES, t0 - addi STATEP_C, STATEP, 8 - vsetivli zero, 4, e32, m1, ta, ma - vluxei8.v FEBA, (STATEP), INDICES - vluxei8.v HGDC, (STATEP_C), INDICES - -.Lnext_block: - addi NUM_BLOCKS, NUM_BLOCKS, -1 - - // Save the previous state, as it's needed later. - vmv.v.v PREV_FEBA, FEBA - vmv.v.v PREV_HGDC, HGDC - - // Load the next 512-bit message block and endian-swap each 32-bit word. - vle32.v W0, (DATA) - vrev8.v W0, W0 - addi DATA, DATA, 16 - vle32.v W1, (DATA) - vrev8.v W1, W1 - addi DATA, DATA, 16 - vle32.v W2, (DATA) - vrev8.v W2, W2 - addi DATA, DATA, 16 - vle32.v W3, (DATA) - vrev8.v W3, W3 - addi DATA, DATA, 16 - - // Do the 64 rounds of SHA-256. - sha256_16rounds 0, K0, K1, K2, K3 - sha256_16rounds 0, K4, K5, K6, K7 - sha256_16rounds 0, K8, K9, K10, K11 - sha256_16rounds 1, K12, K13, K14, K15 - - // Add the previous state. - vadd.vv FEBA, FEBA, PREV_FEBA - vadd.vv HGDC, HGDC, PREV_HGDC - - // Repeat if more blocks remain. - bnez NUM_BLOCKS, .Lnext_block - - // Store the new state and return. - vsuxei8.v FEBA, (STATEP), INDICES - vsuxei8.v HGDC, (STATEP_C), INDICES - ret -SYM_FUNC_END(sha256_transform_zvknha_or_zvknhb_zvkb) - -.section ".rodata" -.p2align 2 -.type K256, @object -K256: - .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 - .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 - .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 - .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 - .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc - .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da - .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 - .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 - .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 - .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 - .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 - .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 - .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 - .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 - .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 - .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 -.size K256, . - K256 diff --git a/arch/riscv/lib/crypto/sha256.c b/arch/riscv/lib/crypto/sha256.c deleted file mode 100644 index 71808397dff4..000000000000 --- a/arch/riscv/lib/crypto/sha256.c +++ /dev/null @@ -1,67 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * SHA-256 (RISC-V accelerated) - * - * Copyright (C) 2022 VRULL GmbH - * Author: Heiko Stuebner <heiko.stuebner@vrull.eu> - * - * Copyright (C) 2023 SiFive, Inc. - * Author: Jerry Shih <jerry.shih@sifive.com> - */ - -#include <asm/vector.h> -#include <crypto/internal/sha2.h> -#include <linux/kernel.h> -#include <linux/module.h> - -asmlinkage void sha256_transform_zvknha_or_zvknhb_zvkb( - u32 state[SHA256_STATE_WORDS], const u8 *data, size_t nblocks); - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_extensions); - -void sha256_blocks_simd(u32 state[SHA256_STATE_WORDS], - const u8 *data, size_t nblocks) -{ - if (static_branch_likely(&have_extensions)) { - kernel_vector_begin(); - sha256_transform_zvknha_or_zvknhb_zvkb(state, data, nblocks); - kernel_vector_end(); - } else { - sha256_blocks_generic(state, data, nblocks); - } -} -EXPORT_SYMBOL_GPL(sha256_blocks_simd); - -void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS], - const u8 *data, size_t nblocks) -{ - sha256_blocks_generic(state, data, nblocks); -} -EXPORT_SYMBOL_GPL(sha256_blocks_arch); - -bool sha256_is_arch_optimized(void) -{ - return static_key_enabled(&have_extensions); -} -EXPORT_SYMBOL_GPL(sha256_is_arch_optimized); - -static int __init riscv64_sha256_mod_init(void) -{ - /* Both zvknha and zvknhb provide the SHA-256 instructions. */ - if ((riscv_isa_extension_available(NULL, ZVKNHA) || - riscv_isa_extension_available(NULL, ZVKNHB)) && - riscv_isa_extension_available(NULL, ZVKB) && - riscv_vector_vlen() >= 128) - static_branch_enable(&have_extensions); - return 0; -} -subsys_initcall(riscv64_sha256_mod_init); - -static void __exit riscv64_sha256_mod_exit(void) -{ -} -module_exit(riscv64_sha256_mod_exit); - -MODULE_DESCRIPTION("SHA-256 (RISC-V accelerated)"); -MODULE_AUTHOR("Heiko Stuebner <heiko.stuebner@vrull.eu>"); -MODULE_LICENSE("GPL"); diff --git a/arch/riscv/lib/riscv_v_helpers.c b/arch/riscv/lib/riscv_v_helpers.c index be38a93cedae..7bbdfc6d4552 100644 --- a/arch/riscv/lib/riscv_v_helpers.c +++ b/arch/riscv/lib/riscv_v_helpers.c @@ -16,8 +16,11 @@ #ifdef CONFIG_MMU size_t riscv_v_usercopy_threshold = CONFIG_RISCV_ISA_V_UCOPY_THRESHOLD; int __asm_vector_usercopy(void *dst, void *src, size_t n); +int __asm_vector_usercopy_sum_enabled(void *dst, void *src, size_t n); int fallback_scalar_usercopy(void *dst, void *src, size_t n); -asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n) +int fallback_scalar_usercopy_sum_enabled(void *dst, void *src, size_t n); +asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n, + bool enable_sum) { size_t remain, copied; @@ -26,7 +29,8 @@ asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n) goto fallback; kernel_vector_begin(); - remain = __asm_vector_usercopy(dst, src, n); + remain = enable_sum ? __asm_vector_usercopy(dst, src, n) : + __asm_vector_usercopy_sum_enabled(dst, src, n); kernel_vector_end(); if (remain) { @@ -40,6 +44,7 @@ asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n) return remain; fallback: - return fallback_scalar_usercopy(dst, src, n); + return enable_sum ? fallback_scalar_usercopy(dst, src, n) : + fallback_scalar_usercopy_sum_enabled(dst, src, n); } #endif diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S index 6a9f116bb545..4efea1b3326c 100644 --- a/arch/riscv/lib/uaccess.S +++ b/arch/riscv/lib/uaccess.S @@ -17,14 +17,43 @@ SYM_FUNC_START(__asm_copy_to_user) ALTERNATIVE("j fallback_scalar_usercopy", "nop", 0, RISCV_ISA_EXT_ZVE32X, CONFIG_RISCV_ISA_V) REG_L t0, riscv_v_usercopy_threshold bltu a2, t0, fallback_scalar_usercopy - tail enter_vector_usercopy + li a3, 1 + tail enter_vector_usercopy #endif -SYM_FUNC_START(fallback_scalar_usercopy) +SYM_FUNC_END(__asm_copy_to_user) +EXPORT_SYMBOL(__asm_copy_to_user) +SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user) +EXPORT_SYMBOL(__asm_copy_from_user) +SYM_FUNC_START(fallback_scalar_usercopy) /* Enable access to user memory */ - li t6, SR_SUM - csrs CSR_STATUS, t6 + li t6, SR_SUM + csrs CSR_STATUS, t6 + mv t6, ra + call fallback_scalar_usercopy_sum_enabled + + /* Disable access to user memory */ + mv ra, t6 + li t6, SR_SUM + csrc CSR_STATUS, t6 + ret +SYM_FUNC_END(fallback_scalar_usercopy) + +SYM_FUNC_START(__asm_copy_to_user_sum_enabled) +#ifdef CONFIG_RISCV_ISA_V + ALTERNATIVE("j fallback_scalar_usercopy_sum_enabled", "nop", 0, RISCV_ISA_EXT_ZVE32X, CONFIG_RISCV_ISA_V) + REG_L t0, riscv_v_usercopy_threshold + bltu a2, t0, fallback_scalar_usercopy_sum_enabled + li a3, 0 + tail enter_vector_usercopy +#endif +SYM_FUNC_END(__asm_copy_to_user_sum_enabled) +SYM_FUNC_ALIAS(__asm_copy_from_user_sum_enabled, __asm_copy_to_user_sum_enabled) +EXPORT_SYMBOL(__asm_copy_from_user_sum_enabled) +EXPORT_SYMBOL(__asm_copy_to_user_sum_enabled) + +SYM_FUNC_START(fallback_scalar_usercopy_sum_enabled) /* * Save the terminal address which will be used to compute the number * of bytes copied in case of a fixup exception. @@ -178,23 +207,12 @@ SYM_FUNC_START(fallback_scalar_usercopy) bltu a0, t0, 4b /* t0 - end of dst */ .Lout_copy_user: - /* Disable access to user memory */ - csrc CSR_STATUS, t6 li a0, 0 ret - - /* Exception fixup code */ 10: - /* Disable access to user memory */ - csrc CSR_STATUS, t6 sub a0, t5, a0 ret -SYM_FUNC_END(__asm_copy_to_user) -SYM_FUNC_END(fallback_scalar_usercopy) -EXPORT_SYMBOL(__asm_copy_to_user) -SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user) -EXPORT_SYMBOL(__asm_copy_from_user) - +SYM_FUNC_END(fallback_scalar_usercopy_sum_enabled) SYM_FUNC_START(__clear_user) diff --git a/arch/riscv/lib/uaccess_vector.S b/arch/riscv/lib/uaccess_vector.S index 7c45f26de4f7..03b5560609a2 100644 --- a/arch/riscv/lib/uaccess_vector.S +++ b/arch/riscv/lib/uaccess_vector.S @@ -24,7 +24,18 @@ SYM_FUNC_START(__asm_vector_usercopy) /* Enable access to user memory */ li t6, SR_SUM csrs CSR_STATUS, t6 + mv t6, ra + call __asm_vector_usercopy_sum_enabled + + /* Disable access to user memory */ + mv ra, t6 + li t6, SR_SUM + csrc CSR_STATUS, t6 + ret +SYM_FUNC_END(__asm_vector_usercopy) + +SYM_FUNC_START(__asm_vector_usercopy_sum_enabled) loop: vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma fixup vle8.v vData, (pSrc), 10f @@ -36,8 +47,6 @@ loop: /* Exception fixup for vector load is shared with normal exit */ 10: - /* Disable access to user memory */ - csrc CSR_STATUS, t6 mv a0, iNum ret @@ -49,4 +58,4 @@ loop: csrr t2, CSR_VSTART sub iNum, iNum, t2 j 10b -SYM_FUNC_END(__asm_vector_usercopy) +SYM_FUNC_END(__asm_vector_usercopy_sum_enabled) |