summaryrefslogtreecommitdiff
path: root/arch/riscv/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/riscv/lib')
-rw-r--r--arch/riscv/lib/Makefile7
-rw-r--r--arch/riscv/lib/crc-clmul-consts.h122
-rw-r--r--arch/riscv/lib/crc-clmul-template.h265
-rw-r--r--arch/riscv/lib/crc-clmul.h23
-rw-r--r--arch/riscv/lib/crc-t10dif.c24
-rw-r--r--arch/riscv/lib/crc16_msb.c18
-rw-r--r--arch/riscv/lib/crc32.c53
-rw-r--r--arch/riscv/lib/crc32_lsb.c18
-rw-r--r--arch/riscv/lib/crc32_msb.c18
-rw-r--r--arch/riscv/lib/crc64.c34
-rw-r--r--arch/riscv/lib/crc64_lsb.c18
-rw-r--r--arch/riscv/lib/crc64_msb.c18
-rw-r--r--arch/riscv/lib/crypto/Kconfig16
-rw-r--r--arch/riscv/lib/crypto/Makefile7
-rw-r--r--arch/riscv/lib/crypto/chacha-riscv64-glue.c75
-rw-r--r--arch/riscv/lib/crypto/chacha-riscv64-zvkb.S297
-rw-r--r--arch/riscv/lib/crypto/sha256-riscv64-zvknha_or_zvknhb-zvkb.S225
-rw-r--r--arch/riscv/lib/crypto/sha256.c67
-rw-r--r--arch/riscv/lib/riscv_v_helpers.c11
-rw-r--r--arch/riscv/lib/uaccess.S50
-rw-r--r--arch/riscv/lib/uaccess_vector.S15
21 files changed, 54 insertions, 1327 deletions
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 0baec92d2f55..bbc031124974 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -1,5 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-y += crypto/
lib-y += delay.o
lib-y += memcpy.o
lib-y += memset.o
@@ -16,12 +15,6 @@ endif
lib-$(CONFIG_MMU) += uaccess.o
lib-$(CONFIG_64BIT) += tishift.o
lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
-obj-$(CONFIG_CRC32_ARCH) += crc32-riscv.o
-crc32-riscv-y := crc32.o crc32_msb.o crc32_lsb.o
-obj-$(CONFIG_CRC64_ARCH) += crc64-riscv.o
-crc64-riscv-y := crc64.o crc64_msb.o crc64_lsb.o
-obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-riscv.o
-crc-t10dif-riscv-y := crc-t10dif.o crc16_msb.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
lib-$(CONFIG_RISCV_ISA_V) += xor.o
lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o
diff --git a/arch/riscv/lib/crc-clmul-consts.h b/arch/riscv/lib/crc-clmul-consts.h
deleted file mode 100644
index 8d73449235ef..000000000000
--- a/arch/riscv/lib/crc-clmul-consts.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * CRC constants generated by:
- *
- * ./scripts/gen-crc-consts.py riscv_clmul crc16_msb_0x8bb7,crc32_msb_0x04c11db7,crc32_lsb_0xedb88320,crc32_lsb_0x82f63b78,crc64_msb_0x42f0e1eba9ea3693,crc64_lsb_0x9a6c9329ac4bc9b5
- *
- * Do not edit manually.
- */
-
-struct crc_clmul_consts {
- unsigned long fold_across_2_longs_const_hi;
- unsigned long fold_across_2_longs_const_lo;
- unsigned long barrett_reduction_const_1;
- unsigned long barrett_reduction_const_2;
-};
-
-/*
- * Constants generated for most-significant-bit-first CRC-16 using
- * G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0
- */
-static const struct crc_clmul_consts crc16_msb_0x8bb7_consts __maybe_unused = {
-#ifdef CONFIG_64BIT
- .fold_across_2_longs_const_hi = 0x0000000000001faa, /* x^192 mod G */
- .fold_across_2_longs_const_lo = 0x000000000000a010, /* x^128 mod G */
- .barrett_reduction_const_1 = 0xfb2d2bfc0e99d245, /* floor(x^79 / G) */
- .barrett_reduction_const_2 = 0x0000000000008bb7, /* G - x^16 */
-#else
- .fold_across_2_longs_const_hi = 0x00005890, /* x^96 mod G */
- .fold_across_2_longs_const_lo = 0x0000f249, /* x^64 mod G */
- .barrett_reduction_const_1 = 0xfb2d2bfc, /* floor(x^47 / G) */
- .barrett_reduction_const_2 = 0x00008bb7, /* G - x^16 */
-#endif
-};
-
-/*
- * Constants generated for most-significant-bit-first CRC-32 using
- * G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 +
- * x^5 + x^4 + x^2 + x^1 + x^0
- */
-static const struct crc_clmul_consts crc32_msb_0x04c11db7_consts __maybe_unused = {
-#ifdef CONFIG_64BIT
- .fold_across_2_longs_const_hi = 0x00000000c5b9cd4c, /* x^192 mod G */
- .fold_across_2_longs_const_lo = 0x00000000e8a45605, /* x^128 mod G */
- .barrett_reduction_const_1 = 0x826880efa40da72d, /* floor(x^95 / G) */
- .barrett_reduction_const_2 = 0x0000000004c11db7, /* G - x^32 */
-#else
- .fold_across_2_longs_const_hi = 0xf200aa66, /* x^96 mod G */
- .fold_across_2_longs_const_lo = 0x490d678d, /* x^64 mod G */
- .barrett_reduction_const_1 = 0x826880ef, /* floor(x^63 / G) */
- .barrett_reduction_const_2 = 0x04c11db7, /* G - x^32 */
-#endif
-};
-
-/*
- * Constants generated for least-significant-bit-first CRC-32 using
- * G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 +
- * x^5 + x^4 + x^2 + x^1 + x^0
- */
-static const struct crc_clmul_consts crc32_lsb_0xedb88320_consts __maybe_unused = {
-#ifdef CONFIG_64BIT
- .fold_across_2_longs_const_hi = 0x65673b4600000000, /* x^191 mod G */
- .fold_across_2_longs_const_lo = 0x9ba54c6f00000000, /* x^127 mod G */
- .barrett_reduction_const_1 = 0xb4e5b025f7011641, /* floor(x^95 / G) */
- .barrett_reduction_const_2 = 0x00000000edb88320, /* (G - x^32) * x^32 */
-#else
- .fold_across_2_longs_const_hi = 0xccaa009e, /* x^95 mod G */
- .fold_across_2_longs_const_lo = 0xb8bc6765, /* x^63 mod G */
- .barrett_reduction_const_1 = 0xf7011641, /* floor(x^63 / G) */
- .barrett_reduction_const_2 = 0xedb88320, /* (G - x^32) * x^0 */
-#endif
-};
-
-/*
- * Constants generated for least-significant-bit-first CRC-32 using
- * G(x) = x^32 + x^28 + x^27 + x^26 + x^25 + x^23 + x^22 + x^20 + x^19 + x^18 +
- * x^14 + x^13 + x^11 + x^10 + x^9 + x^8 + x^6 + x^0
- */
-static const struct crc_clmul_consts crc32_lsb_0x82f63b78_consts __maybe_unused = {
-#ifdef CONFIG_64BIT
- .fold_across_2_longs_const_hi = 0x3743f7bd00000000, /* x^191 mod G */
- .fold_across_2_longs_const_lo = 0x3171d43000000000, /* x^127 mod G */
- .barrett_reduction_const_1 = 0x4869ec38dea713f1, /* floor(x^95 / G) */
- .barrett_reduction_const_2 = 0x0000000082f63b78, /* (G - x^32) * x^32 */
-#else
- .fold_across_2_longs_const_hi = 0x493c7d27, /* x^95 mod G */
- .fold_across_2_longs_const_lo = 0xdd45aab8, /* x^63 mod G */
- .barrett_reduction_const_1 = 0xdea713f1, /* floor(x^63 / G) */
- .barrett_reduction_const_2 = 0x82f63b78, /* (G - x^32) * x^0 */
-#endif
-};
-
-/*
- * Constants generated for most-significant-bit-first CRC-64 using
- * G(x) = x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
- * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
- * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
- * x^7 + x^4 + x^1 + x^0
- */
-#ifdef CONFIG_64BIT
-static const struct crc_clmul_consts crc64_msb_0x42f0e1eba9ea3693_consts __maybe_unused = {
- .fold_across_2_longs_const_hi = 0x4eb938a7d257740e, /* x^192 mod G */
- .fold_across_2_longs_const_lo = 0x05f5c3c7eb52fab6, /* x^128 mod G */
- .barrett_reduction_const_1 = 0xabc694e836627c39, /* floor(x^127 / G) */
- .barrett_reduction_const_2 = 0x42f0e1eba9ea3693, /* G - x^64 */
-};
-#endif
-
-/*
- * Constants generated for least-significant-bit-first CRC-64 using
- * G(x) = x^64 + x^63 + x^61 + x^59 + x^58 + x^56 + x^55 + x^52 + x^49 + x^48 +
- * x^47 + x^46 + x^44 + x^41 + x^37 + x^36 + x^34 + x^32 + x^31 + x^28 +
- * x^26 + x^23 + x^22 + x^19 + x^16 + x^13 + x^12 + x^10 + x^9 + x^6 +
- * x^4 + x^3 + x^0
- */
-#ifdef CONFIG_64BIT
-static const struct crc_clmul_consts crc64_lsb_0x9a6c9329ac4bc9b5_consts __maybe_unused = {
- .fold_across_2_longs_const_hi = 0xeadc41fd2ba3d420, /* x^191 mod G */
- .fold_across_2_longs_const_lo = 0x21e9761e252621ac, /* x^127 mod G */
- .barrett_reduction_const_1 = 0x27ecfa329aef9f77, /* floor(x^127 / G) */
- .barrett_reduction_const_2 = 0x9a6c9329ac4bc9b5, /* (G - x^64) * x^0 */
-};
-#endif
diff --git a/arch/riscv/lib/crc-clmul-template.h b/arch/riscv/lib/crc-clmul-template.h
deleted file mode 100644
index 77187e7f1762..000000000000
--- a/arch/riscv/lib/crc-clmul-template.h
+++ /dev/null
@@ -1,265 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* Copyright 2025 Google LLC */
-
-/*
- * This file is a "template" that generates a CRC function optimized using the
- * RISC-V Zbc (scalar carryless multiplication) extension. The includer of this
- * file must define the following parameters to specify the type of CRC:
- *
- * crc_t: the data type of the CRC, e.g. u32 for a 32-bit CRC
- * LSB_CRC: 0 for a msb (most-significant-bit) first CRC, i.e. natural
- * mapping between bits and polynomial coefficients
- * 1 for a lsb (least-significant-bit) first CRC, i.e. reflected
- * mapping between bits and polynomial coefficients
- */
-
-#include <asm/byteorder.h>
-#include <linux/minmax.h>
-
-#define CRC_BITS (8 * sizeof(crc_t)) /* a.k.a. 'n' */
-
-static inline unsigned long clmul(unsigned long a, unsigned long b)
-{
- unsigned long res;
-
- asm(".option push\n"
- ".option arch,+zbc\n"
- "clmul %0, %1, %2\n"
- ".option pop\n"
- : "=r" (res) : "r" (a), "r" (b));
- return res;
-}
-
-static inline unsigned long clmulh(unsigned long a, unsigned long b)
-{
- unsigned long res;
-
- asm(".option push\n"
- ".option arch,+zbc\n"
- "clmulh %0, %1, %2\n"
- ".option pop\n"
- : "=r" (res) : "r" (a), "r" (b));
- return res;
-}
-
-static inline unsigned long clmulr(unsigned long a, unsigned long b)
-{
- unsigned long res;
-
- asm(".option push\n"
- ".option arch,+zbc\n"
- "clmulr %0, %1, %2\n"
- ".option pop\n"
- : "=r" (res) : "r" (a), "r" (b));
- return res;
-}
-
-/*
- * crc_load_long() loads one "unsigned long" of aligned data bytes, producing a
- * polynomial whose bit order matches the CRC's bit order.
- */
-#ifdef CONFIG_64BIT
-# if LSB_CRC
-# define crc_load_long(x) le64_to_cpup(x)
-# else
-# define crc_load_long(x) be64_to_cpup(x)
-# endif
-#else
-# if LSB_CRC
-# define crc_load_long(x) le32_to_cpup(x)
-# else
-# define crc_load_long(x) be32_to_cpup(x)
-# endif
-#endif
-
-/* XOR @crc into the end of @msgpoly that represents the high-order terms. */
-static inline unsigned long
-crc_clmul_prep(crc_t crc, unsigned long msgpoly)
-{
-#if LSB_CRC
- return msgpoly ^ crc;
-#else
- return msgpoly ^ ((unsigned long)crc << (BITS_PER_LONG - CRC_BITS));
-#endif
-}
-
-/*
- * Multiply the long-sized @msgpoly by x^n (a.k.a. x^CRC_BITS) and reduce it
- * modulo the generator polynomial G. This gives the CRC of @msgpoly.
- */
-static inline crc_t
-crc_clmul_long(unsigned long msgpoly, const struct crc_clmul_consts *consts)
-{
- unsigned long tmp;
-
- /*
- * First step of Barrett reduction with integrated multiplication by
- * x^n: calculate floor((msgpoly * x^n) / G). This is the value by
- * which G needs to be multiplied to cancel out the x^n and higher terms
- * of msgpoly * x^n. Do it using the following formula:
- *
- * msb-first:
- * floor((msgpoly * floor(x^(BITS_PER_LONG-1+n) / G)) / x^(BITS_PER_LONG-1))
- * lsb-first:
- * floor((msgpoly * floor(x^(BITS_PER_LONG-1+n) / G) * x) / x^BITS_PER_LONG)
- *
- * barrett_reduction_const_1 contains floor(x^(BITS_PER_LONG-1+n) / G),
- * which fits a long exactly. Using any lower power of x there would
- * not carry enough precision through the calculation, while using any
- * higher power of x would require extra instructions to handle a wider
- * multiplication. In the msb-first case, using this power of x results
- * in needing a floored division by x^(BITS_PER_LONG-1), which matches
- * what clmulr produces. In the lsb-first case, a factor of x gets
- * implicitly introduced by each carryless multiplication (shown as
- * '* x' above), and the floored division instead needs to be by
- * x^BITS_PER_LONG which matches what clmul produces.
- */
-#if LSB_CRC
- tmp = clmul(msgpoly, consts->barrett_reduction_const_1);
-#else
- tmp = clmulr(msgpoly, consts->barrett_reduction_const_1);
-#endif
-
- /*
- * Second step of Barrett reduction:
- *
- * crc := (msgpoly * x^n) + (G * floor((msgpoly * x^n) / G))
- *
- * This reduces (msgpoly * x^n) modulo G by adding the appropriate
- * multiple of G to it. The result uses only the x^0..x^(n-1) terms.
- * HOWEVER, since the unreduced value (msgpoly * x^n) is zero in those
- * terms in the first place, it is more efficient to do the equivalent:
- *
- * crc := ((G - x^n) * floor((msgpoly * x^n) / G)) mod x^n
- *
- * In the lsb-first case further modify it to the following which avoids
- * a shift, as the crc ends up in the physically low n bits from clmulr:
- *
- * product := ((G - x^n) * x^(BITS_PER_LONG - n)) * floor((msgpoly * x^n) / G) * x
- * crc := floor(product / x^(BITS_PER_LONG + 1 - n)) mod x^n
- *
- * barrett_reduction_const_2 contains the constant multiplier (G - x^n)
- * or (G - x^n) * x^(BITS_PER_LONG - n) from the formulas above. The
- * cast of the result to crc_t is essential, as it applies the mod x^n!
- */
-#if LSB_CRC
- return clmulr(tmp, consts->barrett_reduction_const_2);
-#else
- return clmul(tmp, consts->barrett_reduction_const_2);
-#endif
-}
-
-/* Update @crc with the data from @msgpoly. */
-static inline crc_t
-crc_clmul_update_long(crc_t crc, unsigned long msgpoly,
- const struct crc_clmul_consts *consts)
-{
- return crc_clmul_long(crc_clmul_prep(crc, msgpoly), consts);
-}
-
-/* Update @crc with 1 <= @len < sizeof(unsigned long) bytes of data. */
-static inline crc_t
-crc_clmul_update_partial(crc_t crc, const u8 *p, size_t len,
- const struct crc_clmul_consts *consts)
-{
- unsigned long msgpoly;
- size_t i;
-
-#if LSB_CRC
- msgpoly = (unsigned long)p[0] << (BITS_PER_LONG - 8);
- for (i = 1; i < len; i++)
- msgpoly = (msgpoly >> 8) ^ ((unsigned long)p[i] << (BITS_PER_LONG - 8));
-#else
- msgpoly = p[0];
- for (i = 1; i < len; i++)
- msgpoly = (msgpoly << 8) ^ p[i];
-#endif
-
- if (len >= sizeof(crc_t)) {
- #if LSB_CRC
- msgpoly ^= (unsigned long)crc << (BITS_PER_LONG - 8*len);
- #else
- msgpoly ^= (unsigned long)crc << (8*len - CRC_BITS);
- #endif
- return crc_clmul_long(msgpoly, consts);
- }
-#if LSB_CRC
- msgpoly ^= (unsigned long)crc << (BITS_PER_LONG - 8*len);
- return crc_clmul_long(msgpoly, consts) ^ (crc >> (8*len));
-#else
- msgpoly ^= crc >> (CRC_BITS - 8*len);
- return crc_clmul_long(msgpoly, consts) ^ (crc << (8*len));
-#endif
-}
-
-static inline crc_t
-crc_clmul(crc_t crc, const void *p, size_t len,
- const struct crc_clmul_consts *consts)
-{
- size_t align;
-
- /* This implementation assumes that the CRC fits in an unsigned long. */
- BUILD_BUG_ON(sizeof(crc_t) > sizeof(unsigned long));
-
- /* If the buffer is not long-aligned, align it. */
- align = (unsigned long)p % sizeof(unsigned long);
- if (align && len) {
- align = min(sizeof(unsigned long) - align, len);
- crc = crc_clmul_update_partial(crc, p, align, consts);
- p += align;
- len -= align;
- }
-
- if (len >= 4 * sizeof(unsigned long)) {
- unsigned long m0, m1;
-
- m0 = crc_clmul_prep(crc, crc_load_long(p));
- m1 = crc_load_long(p + sizeof(unsigned long));
- p += 2 * sizeof(unsigned long);
- len -= 2 * sizeof(unsigned long);
- /*
- * Main loop. Each iteration starts with a message polynomial
- * (x^BITS_PER_LONG)*m0 + m1, then logically extends it by two
- * more longs of data to form x^(3*BITS_PER_LONG)*m0 +
- * x^(2*BITS_PER_LONG)*m1 + x^BITS_PER_LONG*m2 + m3, then
- * "folds" that back into a congruent (modulo G) value that uses
- * just m0 and m1 again. This is done by multiplying m0 by the
- * precomputed constant (x^(3*BITS_PER_LONG) mod G) and m1 by
- * the precomputed constant (x^(2*BITS_PER_LONG) mod G), then
- * adding the results to m2 and m3 as appropriate. Each such
- * multiplication produces a result twice the length of a long,
- * which in RISC-V is two instructions clmul and clmulh.
- *
- * This could be changed to fold across more than 2 longs at a
- * time if there is a CPU that can take advantage of it.
- */
- do {
- unsigned long p0, p1, p2, p3;
-
- p0 = clmulh(m0, consts->fold_across_2_longs_const_hi);
- p1 = clmul(m0, consts->fold_across_2_longs_const_hi);
- p2 = clmulh(m1, consts->fold_across_2_longs_const_lo);
- p3 = clmul(m1, consts->fold_across_2_longs_const_lo);
- m0 = (LSB_CRC ? p1 ^ p3 : p0 ^ p2) ^ crc_load_long(p);
- m1 = (LSB_CRC ? p0 ^ p2 : p1 ^ p3) ^
- crc_load_long(p + sizeof(unsigned long));
-
- p += 2 * sizeof(unsigned long);
- len -= 2 * sizeof(unsigned long);
- } while (len >= 2 * sizeof(unsigned long));
-
- crc = crc_clmul_long(m0, consts);
- crc = crc_clmul_update_long(crc, m1, consts);
- }
-
- while (len >= sizeof(unsigned long)) {
- crc = crc_clmul_update_long(crc, crc_load_long(p), consts);
- p += sizeof(unsigned long);
- len -= sizeof(unsigned long);
- }
-
- if (len)
- crc = crc_clmul_update_partial(crc, p, len, consts);
-
- return crc;
-}
diff --git a/arch/riscv/lib/crc-clmul.h b/arch/riscv/lib/crc-clmul.h
deleted file mode 100644
index dd1736245815..000000000000
--- a/arch/riscv/lib/crc-clmul.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* Copyright 2025 Google LLC */
-
-#ifndef _RISCV_CRC_CLMUL_H
-#define _RISCV_CRC_CLMUL_H
-
-#include <linux/types.h>
-#include "crc-clmul-consts.h"
-
-u16 crc16_msb_clmul(u16 crc, const void *p, size_t len,
- const struct crc_clmul_consts *consts);
-u32 crc32_msb_clmul(u32 crc, const void *p, size_t len,
- const struct crc_clmul_consts *consts);
-u32 crc32_lsb_clmul(u32 crc, const void *p, size_t len,
- const struct crc_clmul_consts *consts);
-#ifdef CONFIG_64BIT
-u64 crc64_msb_clmul(u64 crc, const void *p, size_t len,
- const struct crc_clmul_consts *consts);
-u64 crc64_lsb_clmul(u64 crc, const void *p, size_t len,
- const struct crc_clmul_consts *consts);
-#endif
-
-#endif /* _RISCV_CRC_CLMUL_H */
diff --git a/arch/riscv/lib/crc-t10dif.c b/arch/riscv/lib/crc-t10dif.c
deleted file mode 100644
index e6b0051ccd86..000000000000
--- a/arch/riscv/lib/crc-t10dif.c
+++ /dev/null
@@ -1,24 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * RISC-V optimized CRC-T10DIF function
- *
- * Copyright 2025 Google LLC
- */
-
-#include <asm/hwcap.h>
-#include <asm/alternative-macros.h>
-#include <linux/crc-t10dif.h>
-#include <linux/module.h>
-
-#include "crc-clmul.h"
-
-u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len)
-{
- if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
- return crc16_msb_clmul(crc, p, len, &crc16_msb_0x8bb7_consts);
- return crc_t10dif_generic(crc, p, len);
-}
-EXPORT_SYMBOL(crc_t10dif_arch);
-
-MODULE_DESCRIPTION("RISC-V optimized CRC-T10DIF function");
-MODULE_LICENSE("GPL");
diff --git a/arch/riscv/lib/crc16_msb.c b/arch/riscv/lib/crc16_msb.c
deleted file mode 100644
index 554d295e95f5..000000000000
--- a/arch/riscv/lib/crc16_msb.c
+++ /dev/null
@@ -1,18 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * RISC-V optimized most-significant-bit-first CRC16
- *
- * Copyright 2025 Google LLC
- */
-
-#include "crc-clmul.h"
-
-typedef u16 crc_t;
-#define LSB_CRC 0
-#include "crc-clmul-template.h"
-
-u16 crc16_msb_clmul(u16 crc, const void *p, size_t len,
- const struct crc_clmul_consts *consts)
-{
- return crc_clmul(crc, p, len, consts);
-}
diff --git a/arch/riscv/lib/crc32.c b/arch/riscv/lib/crc32.c
deleted file mode 100644
index a3188b7d9c40..000000000000
--- a/arch/riscv/lib/crc32.c
+++ /dev/null
@@ -1,53 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * RISC-V optimized CRC32 functions
- *
- * Copyright 2025 Google LLC
- */
-
-#include <asm/hwcap.h>
-#include <asm/alternative-macros.h>
-#include <linux/crc32.h>
-#include <linux/module.h>
-
-#include "crc-clmul.h"
-
-u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
-{
- if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
- return crc32_lsb_clmul(crc, p, len,
- &crc32_lsb_0xedb88320_consts);
- return crc32_le_base(crc, p, len);
-}
-EXPORT_SYMBOL(crc32_le_arch);
-
-u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
-{
- if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
- return crc32_msb_clmul(crc, p, len,
- &crc32_msb_0x04c11db7_consts);
- return crc32_be_base(crc, p, len);
-}
-EXPORT_SYMBOL(crc32_be_arch);
-
-u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
-{
- if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
- return crc32_lsb_clmul(crc, p, len,
- &crc32_lsb_0x82f63b78_consts);
- return crc32c_base(crc, p, len);
-}
-EXPORT_SYMBOL(crc32c_arch);
-
-u32 crc32_optimizations(void)
-{
- if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
- return CRC32_LE_OPTIMIZATION |
- CRC32_BE_OPTIMIZATION |
- CRC32C_OPTIMIZATION;
- return 0;
-}
-EXPORT_SYMBOL(crc32_optimizations);
-
-MODULE_DESCRIPTION("RISC-V optimized CRC32 functions");
-MODULE_LICENSE("GPL");
diff --git a/arch/riscv/lib/crc32_lsb.c b/arch/riscv/lib/crc32_lsb.c
deleted file mode 100644
index 72fd67e7470c..000000000000
--- a/arch/riscv/lib/crc32_lsb.c
+++ /dev/null
@@ -1,18 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * RISC-V optimized least-significant-bit-first CRC32
- *
- * Copyright 2025 Google LLC
- */
-
-#include "crc-clmul.h"
-
-typedef u32 crc_t;
-#define LSB_CRC 1
-#include "crc-clmul-template.h"
-
-u32 crc32_lsb_clmul(u32 crc, const void *p, size_t len,
- const struct crc_clmul_consts *consts)
-{
- return crc_clmul(crc, p, len, consts);
-}
diff --git a/arch/riscv/lib/crc32_msb.c b/arch/riscv/lib/crc32_msb.c
deleted file mode 100644
index fdbeaccc369f..000000000000
--- a/arch/riscv/lib/crc32_msb.c
+++ /dev/null
@@ -1,18 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * RISC-V optimized most-significant-bit-first CRC32
- *
- * Copyright 2025 Google LLC
- */
-
-#include "crc-clmul.h"
-
-typedef u32 crc_t;
-#define LSB_CRC 0
-#include "crc-clmul-template.h"
-
-u32 crc32_msb_clmul(u32 crc, const void *p, size_t len,
- const struct crc_clmul_consts *consts)
-{
- return crc_clmul(crc, p, len, consts);
-}
diff --git a/arch/riscv/lib/crc64.c b/arch/riscv/lib/crc64.c
deleted file mode 100644
index f0015a27836a..000000000000
--- a/arch/riscv/lib/crc64.c
+++ /dev/null
@@ -1,34 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * RISC-V optimized CRC64 functions
- *
- * Copyright 2025 Google LLC
- */
-
-#include <asm/hwcap.h>
-#include <asm/alternative-macros.h>
-#include <linux/crc64.h>
-#include <linux/module.h>
-
-#include "crc-clmul.h"
-
-u64 crc64_be_arch(u64 crc, const u8 *p, size_t len)
-{
- if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
- return crc64_msb_clmul(crc, p, len,
- &crc64_msb_0x42f0e1eba9ea3693_consts);
- return crc64_be_generic(crc, p, len);
-}
-EXPORT_SYMBOL(crc64_be_arch);
-
-u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len)
-{
- if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
- return crc64_lsb_clmul(crc, p, len,
- &crc64_lsb_0x9a6c9329ac4bc9b5_consts);
- return crc64_nvme_generic(crc, p, len);
-}
-EXPORT_SYMBOL(crc64_nvme_arch);
-
-MODULE_DESCRIPTION("RISC-V optimized CRC64 functions");
-MODULE_LICENSE("GPL");
diff --git a/arch/riscv/lib/crc64_lsb.c b/arch/riscv/lib/crc64_lsb.c
deleted file mode 100644
index c5371bb85d90..000000000000
--- a/arch/riscv/lib/crc64_lsb.c
+++ /dev/null
@@ -1,18 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * RISC-V optimized least-significant-bit-first CRC64
- *
- * Copyright 2025 Google LLC
- */
-
-#include "crc-clmul.h"
-
-typedef u64 crc_t;
-#define LSB_CRC 1
-#include "crc-clmul-template.h"
-
-u64 crc64_lsb_clmul(u64 crc, const void *p, size_t len,
- const struct crc_clmul_consts *consts)
-{
- return crc_clmul(crc, p, len, consts);
-}
diff --git a/arch/riscv/lib/crc64_msb.c b/arch/riscv/lib/crc64_msb.c
deleted file mode 100644
index 1925d1dbe225..000000000000
--- a/arch/riscv/lib/crc64_msb.c
+++ /dev/null
@@ -1,18 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * RISC-V optimized most-significant-bit-first CRC64
- *
- * Copyright 2025 Google LLC
- */
-
-#include "crc-clmul.h"
-
-typedef u64 crc_t;
-#define LSB_CRC 0
-#include "crc-clmul-template.h"
-
-u64 crc64_msb_clmul(u64 crc, const void *p, size_t len,
- const struct crc_clmul_consts *consts)
-{
- return crc_clmul(crc, p, len, consts);
-}
diff --git a/arch/riscv/lib/crypto/Kconfig b/arch/riscv/lib/crypto/Kconfig
deleted file mode 100644
index 47c99ea97ce2..000000000000
--- a/arch/riscv/lib/crypto/Kconfig
+++ /dev/null
@@ -1,16 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-
-config CRYPTO_CHACHA_RISCV64
- tristate
- depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
- default CRYPTO_LIB_CHACHA
- select CRYPTO_ARCH_HAVE_LIB_CHACHA
- select CRYPTO_LIB_CHACHA_GENERIC
-
-config CRYPTO_SHA256_RISCV64
- tristate
- depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
- default CRYPTO_LIB_SHA256
- select CRYPTO_ARCH_HAVE_LIB_SHA256
- select CRYPTO_ARCH_HAVE_LIB_SHA256_SIMD
- select CRYPTO_LIB_SHA256_GENERIC
diff --git a/arch/riscv/lib/crypto/Makefile b/arch/riscv/lib/crypto/Makefile
deleted file mode 100644
index b7cb877a2c07..000000000000
--- a/arch/riscv/lib/crypto/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-
-obj-$(CONFIG_CRYPTO_CHACHA_RISCV64) += chacha-riscv64.o
-chacha-riscv64-y := chacha-riscv64-glue.o chacha-riscv64-zvkb.o
-
-obj-$(CONFIG_CRYPTO_SHA256_RISCV64) += sha256-riscv64.o
-sha256-riscv64-y := sha256.o sha256-riscv64-zvknha_or_zvknhb-zvkb.o
diff --git a/arch/riscv/lib/crypto/chacha-riscv64-glue.c b/arch/riscv/lib/crypto/chacha-riscv64-glue.c
deleted file mode 100644
index 8c3f11d79be3..000000000000
--- a/arch/riscv/lib/crypto/chacha-riscv64-glue.c
+++ /dev/null
@@ -1,75 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * ChaCha stream cipher (RISC-V optimized)
- *
- * Copyright (C) 2023 SiFive, Inc.
- * Author: Jerry Shih <jerry.shih@sifive.com>
- */
-
-#include <asm/simd.h>
-#include <asm/vector.h>
-#include <crypto/chacha.h>
-#include <crypto/internal/simd.h>
-#include <linux/linkage.h>
-#include <linux/module.h>
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_zvkb);
-
-asmlinkage void chacha_zvkb(struct chacha_state *state, const u8 *in, u8 *out,
- size_t nblocks, int nrounds);
-
-void hchacha_block_arch(const struct chacha_state *state,
- u32 out[HCHACHA_OUT_WORDS], int nrounds)
-{
- hchacha_block_generic(state, out, nrounds);
-}
-EXPORT_SYMBOL(hchacha_block_arch);
-
-void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src,
- unsigned int bytes, int nrounds)
-{
- u8 block_buffer[CHACHA_BLOCK_SIZE];
- unsigned int full_blocks = bytes / CHACHA_BLOCK_SIZE;
- unsigned int tail_bytes = bytes % CHACHA_BLOCK_SIZE;
-
- if (!static_branch_likely(&use_zvkb) || !crypto_simd_usable())
- return chacha_crypt_generic(state, dst, src, bytes, nrounds);
-
- kernel_vector_begin();
- if (full_blocks) {
- chacha_zvkb(state, src, dst, full_blocks, nrounds);
- src += full_blocks * CHACHA_BLOCK_SIZE;
- dst += full_blocks * CHACHA_BLOCK_SIZE;
- }
- if (tail_bytes) {
- memcpy(block_buffer, src, tail_bytes);
- chacha_zvkb(state, block_buffer, block_buffer, 1, nrounds);
- memcpy(dst, block_buffer, tail_bytes);
- }
- kernel_vector_end();
-}
-EXPORT_SYMBOL(chacha_crypt_arch);
-
-bool chacha_is_arch_optimized(void)
-{
- return static_key_enabled(&use_zvkb);
-}
-EXPORT_SYMBOL(chacha_is_arch_optimized);
-
-static int __init riscv64_chacha_mod_init(void)
-{
- if (riscv_isa_extension_available(NULL, ZVKB) &&
- riscv_vector_vlen() >= 128)
- static_branch_enable(&use_zvkb);
- return 0;
-}
-subsys_initcall(riscv64_chacha_mod_init);
-
-static void __exit riscv64_chacha_mod_exit(void)
-{
-}
-module_exit(riscv64_chacha_mod_exit);
-
-MODULE_DESCRIPTION("ChaCha stream cipher (RISC-V optimized)");
-MODULE_AUTHOR("Jerry Shih <jerry.shih@sifive.com>");
-MODULE_LICENSE("GPL");
diff --git a/arch/riscv/lib/crypto/chacha-riscv64-zvkb.S b/arch/riscv/lib/crypto/chacha-riscv64-zvkb.S
deleted file mode 100644
index b777d0b4e379..000000000000
--- a/arch/riscv/lib/crypto/chacha-riscv64-zvkb.S
+++ /dev/null
@@ -1,297 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
-//
-// This file is dual-licensed, meaning that you can use it under your
-// choice of either of the following two licenses:
-//
-// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
-//
-// Licensed under the Apache License 2.0 (the "License"). You can obtain
-// a copy in the file LICENSE in the source distribution or at
-// https://www.openssl.org/source/license.html
-//
-// or
-//
-// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
-// Copyright 2024 Google LLC
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// The generated code of this file depends on the following RISC-V extensions:
-// - RV64I
-// - RISC-V Vector ('V') with VLEN >= 128
-// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
-
-#include <linux/linkage.h>
-
-.text
-.option arch, +zvkb
-
-#define STATEP a0
-#define INP a1
-#define OUTP a2
-#define NBLOCKS a3
-#define NROUNDS a4
-
-#define CONSTS0 a5
-#define CONSTS1 a6
-#define CONSTS2 a7
-#define CONSTS3 t0
-#define TMP t1
-#define VL t2
-#define STRIDE t3
-#define ROUND_CTR t4
-#define KEY0 s0
-#define KEY1 s1
-#define KEY2 s2
-#define KEY3 s3
-#define KEY4 s4
-#define KEY5 s5
-#define KEY6 s6
-#define KEY7 s7
-#define COUNTER s8
-#define NONCE0 s9
-#define NONCE1 s10
-#define NONCE2 s11
-
-.macro chacha_round a0, b0, c0, d0, a1, b1, c1, d1, \
- a2, b2, c2, d2, a3, b3, c3, d3
- // a += b; d ^= a; d = rol(d, 16);
- vadd.vv \a0, \a0, \b0
- vadd.vv \a1, \a1, \b1
- vadd.vv \a2, \a2, \b2
- vadd.vv \a3, \a3, \b3
- vxor.vv \d0, \d0, \a0
- vxor.vv \d1, \d1, \a1
- vxor.vv \d2, \d2, \a2
- vxor.vv \d3, \d3, \a3
- vror.vi \d0, \d0, 32 - 16
- vror.vi \d1, \d1, 32 - 16
- vror.vi \d2, \d2, 32 - 16
- vror.vi \d3, \d3, 32 - 16
-
- // c += d; b ^= c; b = rol(b, 12);
- vadd.vv \c0, \c0, \d0
- vadd.vv \c1, \c1, \d1
- vadd.vv \c2, \c2, \d2
- vadd.vv \c3, \c3, \d3
- vxor.vv \b0, \b0, \c0
- vxor.vv \b1, \b1, \c1
- vxor.vv \b2, \b2, \c2
- vxor.vv \b3, \b3, \c3
- vror.vi \b0, \b0, 32 - 12
- vror.vi \b1, \b1, 32 - 12
- vror.vi \b2, \b2, 32 - 12
- vror.vi \b3, \b3, 32 - 12
-
- // a += b; d ^= a; d = rol(d, 8);
- vadd.vv \a0, \a0, \b0
- vadd.vv \a1, \a1, \b1
- vadd.vv \a2, \a2, \b2
- vadd.vv \a3, \a3, \b3
- vxor.vv \d0, \d0, \a0
- vxor.vv \d1, \d1, \a1
- vxor.vv \d2, \d2, \a2
- vxor.vv \d3, \d3, \a3
- vror.vi \d0, \d0, 32 - 8
- vror.vi \d1, \d1, 32 - 8
- vror.vi \d2, \d2, 32 - 8
- vror.vi \d3, \d3, 32 - 8
-
- // c += d; b ^= c; b = rol(b, 7);
- vadd.vv \c0, \c0, \d0
- vadd.vv \c1, \c1, \d1
- vadd.vv \c2, \c2, \d2
- vadd.vv \c3, \c3, \d3
- vxor.vv \b0, \b0, \c0
- vxor.vv \b1, \b1, \c1
- vxor.vv \b2, \b2, \c2
- vxor.vv \b3, \b3, \c3
- vror.vi \b0, \b0, 32 - 7
- vror.vi \b1, \b1, 32 - 7
- vror.vi \b2, \b2, 32 - 7
- vror.vi \b3, \b3, 32 - 7
-.endm
-
-// void chacha_zvkb(struct chacha_state *state, const u8 *in, u8 *out,
-// size_t nblocks, int nrounds);
-//
-// |nblocks| is the number of 64-byte blocks to process, and must be nonzero.
-//
-// |state| gives the ChaCha state matrix, including the 32-bit counter in
-// state->x[12] following the RFC7539 convention; note that this differs from
-// the original Salsa20 paper which uses a 64-bit counter in state->x[12..13].
-// The updated 32-bit counter is written back to state->x[12] before returning.
-SYM_FUNC_START(chacha_zvkb)
- addi sp, sp, -96
- sd s0, 0(sp)
- sd s1, 8(sp)
- sd s2, 16(sp)
- sd s3, 24(sp)
- sd s4, 32(sp)
- sd s5, 40(sp)
- sd s6, 48(sp)
- sd s7, 56(sp)
- sd s8, 64(sp)
- sd s9, 72(sp)
- sd s10, 80(sp)
- sd s11, 88(sp)
-
- li STRIDE, 64
-
- // Set up the initial state matrix in scalar registers.
- lw CONSTS0, 0(STATEP)
- lw CONSTS1, 4(STATEP)
- lw CONSTS2, 8(STATEP)
- lw CONSTS3, 12(STATEP)
- lw KEY0, 16(STATEP)
- lw KEY1, 20(STATEP)
- lw KEY2, 24(STATEP)
- lw KEY3, 28(STATEP)
- lw KEY4, 32(STATEP)
- lw KEY5, 36(STATEP)
- lw KEY6, 40(STATEP)
- lw KEY7, 44(STATEP)
- lw COUNTER, 48(STATEP)
- lw NONCE0, 52(STATEP)
- lw NONCE1, 56(STATEP)
- lw NONCE2, 60(STATEP)
-
-.Lblock_loop:
- // Set vl to the number of blocks to process in this iteration.
- vsetvli VL, NBLOCKS, e32, m1, ta, ma
-
- // Set up the initial state matrix for the next VL blocks in v0-v15.
- // v{i} holds the i'th 32-bit word of the state matrix for all blocks.
- // Note that only the counter word, at index 12, differs across blocks.
- vmv.v.x v0, CONSTS0
- vmv.v.x v1, CONSTS1
- vmv.v.x v2, CONSTS2
- vmv.v.x v3, CONSTS3
- vmv.v.x v4, KEY0
- vmv.v.x v5, KEY1
- vmv.v.x v6, KEY2
- vmv.v.x v7, KEY3
- vmv.v.x v8, KEY4
- vmv.v.x v9, KEY5
- vmv.v.x v10, KEY6
- vmv.v.x v11, KEY7
- vid.v v12
- vadd.vx v12, v12, COUNTER
- vmv.v.x v13, NONCE0
- vmv.v.x v14, NONCE1
- vmv.v.x v15, NONCE2
-
- // Load the first half of the input data for each block into v16-v23.
- // v{16+i} holds the i'th 32-bit word for all blocks.
- vlsseg8e32.v v16, (INP), STRIDE
-
- mv ROUND_CTR, NROUNDS
-.Lnext_doubleround:
- addi ROUND_CTR, ROUND_CTR, -2
- // column round
- chacha_round v0, v4, v8, v12, v1, v5, v9, v13, \
- v2, v6, v10, v14, v3, v7, v11, v15
- // diagonal round
- chacha_round v0, v5, v10, v15, v1, v6, v11, v12, \
- v2, v7, v8, v13, v3, v4, v9, v14
- bnez ROUND_CTR, .Lnext_doubleround
-
- // Load the second half of the input data for each block into v24-v31.
- // v{24+i} holds the {8+i}'th 32-bit word for all blocks.
- addi TMP, INP, 32
- vlsseg8e32.v v24, (TMP), STRIDE
-
- // Finalize the first half of the keystream for each block.
- vadd.vx v0, v0, CONSTS0
- vadd.vx v1, v1, CONSTS1
- vadd.vx v2, v2, CONSTS2
- vadd.vx v3, v3, CONSTS3
- vadd.vx v4, v4, KEY0
- vadd.vx v5, v5, KEY1
- vadd.vx v6, v6, KEY2
- vadd.vx v7, v7, KEY3
-
- // Encrypt/decrypt the first half of the data for each block.
- vxor.vv v16, v16, v0
- vxor.vv v17, v17, v1
- vxor.vv v18, v18, v2
- vxor.vv v19, v19, v3
- vxor.vv v20, v20, v4
- vxor.vv v21, v21, v5
- vxor.vv v22, v22, v6
- vxor.vv v23, v23, v7
-
- // Store the first half of the output data for each block.
- vssseg8e32.v v16, (OUTP), STRIDE
-
- // Finalize the second half of the keystream for each block.
- vadd.vx v8, v8, KEY4
- vadd.vx v9, v9, KEY5
- vadd.vx v10, v10, KEY6
- vadd.vx v11, v11, KEY7
- vid.v v0
- vadd.vx v12, v12, COUNTER
- vadd.vx v13, v13, NONCE0
- vadd.vx v14, v14, NONCE1
- vadd.vx v15, v15, NONCE2
- vadd.vv v12, v12, v0
-
- // Encrypt/decrypt the second half of the data for each block.
- vxor.vv v24, v24, v8
- vxor.vv v25, v25, v9
- vxor.vv v26, v26, v10
- vxor.vv v27, v27, v11
- vxor.vv v29, v29, v13
- vxor.vv v28, v28, v12
- vxor.vv v30, v30, v14
- vxor.vv v31, v31, v15
-
- // Store the second half of the output data for each block.
- addi TMP, OUTP, 32
- vssseg8e32.v v24, (TMP), STRIDE
-
- // Update the counter, the remaining number of blocks, and the input and
- // output pointers according to the number of blocks processed (VL).
- add COUNTER, COUNTER, VL
- sub NBLOCKS, NBLOCKS, VL
- slli TMP, VL, 6
- add OUTP, OUTP, TMP
- add INP, INP, TMP
- bnez NBLOCKS, .Lblock_loop
-
- sw COUNTER, 48(STATEP)
- ld s0, 0(sp)
- ld s1, 8(sp)
- ld s2, 16(sp)
- ld s3, 24(sp)
- ld s4, 32(sp)
- ld s5, 40(sp)
- ld s6, 48(sp)
- ld s7, 56(sp)
- ld s8, 64(sp)
- ld s9, 72(sp)
- ld s10, 80(sp)
- ld s11, 88(sp)
- addi sp, sp, 96
- ret
-SYM_FUNC_END(chacha_zvkb)
diff --git a/arch/riscv/lib/crypto/sha256-riscv64-zvknha_or_zvknhb-zvkb.S b/arch/riscv/lib/crypto/sha256-riscv64-zvknha_or_zvknhb-zvkb.S
deleted file mode 100644
index fad501ad0617..000000000000
--- a/arch/riscv/lib/crypto/sha256-riscv64-zvknha_or_zvknhb-zvkb.S
+++ /dev/null
@@ -1,225 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
-//
-// This file is dual-licensed, meaning that you can use it under your
-// choice of either of the following two licenses:
-//
-// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
-//
-// Licensed under the Apache License 2.0 (the "License"). You can obtain
-// a copy in the file LICENSE in the source distribution or at
-// https://www.openssl.org/source/license.html
-//
-// or
-//
-// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
-// Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com>
-// Copyright 2024 Google LLC
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// The generated code of this file depends on the following RISC-V extensions:
-// - RV64I
-// - RISC-V Vector ('V') with VLEN >= 128
-// - RISC-V Vector SHA-2 Secure Hash extension ('Zvknha' or 'Zvknhb')
-// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
-
-#include <linux/linkage.h>
-
-.text
-.option arch, +zvknha, +zvkb
-
-#define STATEP a0
-#define DATA a1
-#define NUM_BLOCKS a2
-
-#define STATEP_C a3
-
-#define MASK v0
-#define INDICES v1
-#define W0 v2
-#define W1 v3
-#define W2 v4
-#define W3 v5
-#define VTMP v6
-#define FEBA v7
-#define HGDC v8
-#define K0 v10
-#define K1 v11
-#define K2 v12
-#define K3 v13
-#define K4 v14
-#define K5 v15
-#define K6 v16
-#define K7 v17
-#define K8 v18
-#define K9 v19
-#define K10 v20
-#define K11 v21
-#define K12 v22
-#define K13 v23
-#define K14 v24
-#define K15 v25
-#define PREV_FEBA v26
-#define PREV_HGDC v27
-
-// Do 4 rounds of SHA-256. w0 contains the current 4 message schedule words.
-//
-// If not all the message schedule words have been computed yet, then this also
-// computes 4 more message schedule words. w1-w3 contain the next 3 groups of 4
-// message schedule words; this macro computes the group after w3 and writes it
-// to w0. This means that the next (w0, w1, w2, w3) is the current (w1, w2, w3,
-// w0), so the caller must cycle through the registers accordingly.
-.macro sha256_4rounds last, k, w0, w1, w2, w3
- vadd.vv VTMP, \k, \w0
- vsha2cl.vv HGDC, FEBA, VTMP
- vsha2ch.vv FEBA, HGDC, VTMP
-.if !\last
- vmerge.vvm VTMP, \w2, \w1, MASK
- vsha2ms.vv \w0, VTMP, \w3
-.endif
-.endm
-
-.macro sha256_16rounds last, k0, k1, k2, k3
- sha256_4rounds \last, \k0, W0, W1, W2, W3
- sha256_4rounds \last, \k1, W1, W2, W3, W0
- sha256_4rounds \last, \k2, W2, W3, W0, W1
- sha256_4rounds \last, \k3, W3, W0, W1, W2
-.endm
-
-// void sha256_transform_zvknha_or_zvknhb_zvkb(u32 state[SHA256_STATE_WORDS],
-// const u8 *data, size_t nblocks);
-SYM_FUNC_START(sha256_transform_zvknha_or_zvknhb_zvkb)
-
- // Load the round constants into K0-K15.
- vsetivli zero, 4, e32, m1, ta, ma
- la t0, K256
- vle32.v K0, (t0)
- addi t0, t0, 16
- vle32.v K1, (t0)
- addi t0, t0, 16
- vle32.v K2, (t0)
- addi t0, t0, 16
- vle32.v K3, (t0)
- addi t0, t0, 16
- vle32.v K4, (t0)
- addi t0, t0, 16
- vle32.v K5, (t0)
- addi t0, t0, 16
- vle32.v K6, (t0)
- addi t0, t0, 16
- vle32.v K7, (t0)
- addi t0, t0, 16
- vle32.v K8, (t0)
- addi t0, t0, 16
- vle32.v K9, (t0)
- addi t0, t0, 16
- vle32.v K10, (t0)
- addi t0, t0, 16
- vle32.v K11, (t0)
- addi t0, t0, 16
- vle32.v K12, (t0)
- addi t0, t0, 16
- vle32.v K13, (t0)
- addi t0, t0, 16
- vle32.v K14, (t0)
- addi t0, t0, 16
- vle32.v K15, (t0)
-
- // Setup mask for the vmerge to replace the first word (idx==0) in
- // message scheduling. There are 4 words, so an 8-bit mask suffices.
- vsetivli zero, 1, e8, m1, ta, ma
- vmv.v.i MASK, 0x01
-
- // Load the state. The state is stored as {a,b,c,d,e,f,g,h}, but we
- // need {f,e,b,a},{h,g,d,c}. The dst vtype is e32m1 and the index vtype
- // is e8mf4. We use index-load with the i8 indices {20, 16, 4, 0},
- // loaded using the 32-bit little endian value 0x00041014.
- li t0, 0x00041014
- vsetivli zero, 1, e32, m1, ta, ma
- vmv.v.x INDICES, t0
- addi STATEP_C, STATEP, 8
- vsetivli zero, 4, e32, m1, ta, ma
- vluxei8.v FEBA, (STATEP), INDICES
- vluxei8.v HGDC, (STATEP_C), INDICES
-
-.Lnext_block:
- addi NUM_BLOCKS, NUM_BLOCKS, -1
-
- // Save the previous state, as it's needed later.
- vmv.v.v PREV_FEBA, FEBA
- vmv.v.v PREV_HGDC, HGDC
-
- // Load the next 512-bit message block and endian-swap each 32-bit word.
- vle32.v W0, (DATA)
- vrev8.v W0, W0
- addi DATA, DATA, 16
- vle32.v W1, (DATA)
- vrev8.v W1, W1
- addi DATA, DATA, 16
- vle32.v W2, (DATA)
- vrev8.v W2, W2
- addi DATA, DATA, 16
- vle32.v W3, (DATA)
- vrev8.v W3, W3
- addi DATA, DATA, 16
-
- // Do the 64 rounds of SHA-256.
- sha256_16rounds 0, K0, K1, K2, K3
- sha256_16rounds 0, K4, K5, K6, K7
- sha256_16rounds 0, K8, K9, K10, K11
- sha256_16rounds 1, K12, K13, K14, K15
-
- // Add the previous state.
- vadd.vv FEBA, FEBA, PREV_FEBA
- vadd.vv HGDC, HGDC, PREV_HGDC
-
- // Repeat if more blocks remain.
- bnez NUM_BLOCKS, .Lnext_block
-
- // Store the new state and return.
- vsuxei8.v FEBA, (STATEP), INDICES
- vsuxei8.v HGDC, (STATEP_C), INDICES
- ret
-SYM_FUNC_END(sha256_transform_zvknha_or_zvknhb_zvkb)
-
-.section ".rodata"
-.p2align 2
-.type K256, @object
-K256:
- .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
- .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
- .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
- .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
- .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
- .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
- .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
- .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
- .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
- .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
- .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
- .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
- .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
- .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
- .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
- .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-.size K256, . - K256
diff --git a/arch/riscv/lib/crypto/sha256.c b/arch/riscv/lib/crypto/sha256.c
deleted file mode 100644
index 71808397dff4..000000000000
--- a/arch/riscv/lib/crypto/sha256.c
+++ /dev/null
@@ -1,67 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * SHA-256 (RISC-V accelerated)
- *
- * Copyright (C) 2022 VRULL GmbH
- * Author: Heiko Stuebner <heiko.stuebner@vrull.eu>
- *
- * Copyright (C) 2023 SiFive, Inc.
- * Author: Jerry Shih <jerry.shih@sifive.com>
- */
-
-#include <asm/vector.h>
-#include <crypto/internal/sha2.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-asmlinkage void sha256_transform_zvknha_or_zvknhb_zvkb(
- u32 state[SHA256_STATE_WORDS], const u8 *data, size_t nblocks);
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_extensions);
-
-void sha256_blocks_simd(u32 state[SHA256_STATE_WORDS],
- const u8 *data, size_t nblocks)
-{
- if (static_branch_likely(&have_extensions)) {
- kernel_vector_begin();
- sha256_transform_zvknha_or_zvknhb_zvkb(state, data, nblocks);
- kernel_vector_end();
- } else {
- sha256_blocks_generic(state, data, nblocks);
- }
-}
-EXPORT_SYMBOL_GPL(sha256_blocks_simd);
-
-void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
- const u8 *data, size_t nblocks)
-{
- sha256_blocks_generic(state, data, nblocks);
-}
-EXPORT_SYMBOL_GPL(sha256_blocks_arch);
-
-bool sha256_is_arch_optimized(void)
-{
- return static_key_enabled(&have_extensions);
-}
-EXPORT_SYMBOL_GPL(sha256_is_arch_optimized);
-
-static int __init riscv64_sha256_mod_init(void)
-{
- /* Both zvknha and zvknhb provide the SHA-256 instructions. */
- if ((riscv_isa_extension_available(NULL, ZVKNHA) ||
- riscv_isa_extension_available(NULL, ZVKNHB)) &&
- riscv_isa_extension_available(NULL, ZVKB) &&
- riscv_vector_vlen() >= 128)
- static_branch_enable(&have_extensions);
- return 0;
-}
-subsys_initcall(riscv64_sha256_mod_init);
-
-static void __exit riscv64_sha256_mod_exit(void)
-{
-}
-module_exit(riscv64_sha256_mod_exit);
-
-MODULE_DESCRIPTION("SHA-256 (RISC-V accelerated)");
-MODULE_AUTHOR("Heiko Stuebner <heiko.stuebner@vrull.eu>");
-MODULE_LICENSE("GPL");
diff --git a/arch/riscv/lib/riscv_v_helpers.c b/arch/riscv/lib/riscv_v_helpers.c
index be38a93cedae..7bbdfc6d4552 100644
--- a/arch/riscv/lib/riscv_v_helpers.c
+++ b/arch/riscv/lib/riscv_v_helpers.c
@@ -16,8 +16,11 @@
#ifdef CONFIG_MMU
size_t riscv_v_usercopy_threshold = CONFIG_RISCV_ISA_V_UCOPY_THRESHOLD;
int __asm_vector_usercopy(void *dst, void *src, size_t n);
+int __asm_vector_usercopy_sum_enabled(void *dst, void *src, size_t n);
int fallback_scalar_usercopy(void *dst, void *src, size_t n);
-asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n)
+int fallback_scalar_usercopy_sum_enabled(void *dst, void *src, size_t n);
+asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n,
+ bool enable_sum)
{
size_t remain, copied;
@@ -26,7 +29,8 @@ asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n)
goto fallback;
kernel_vector_begin();
- remain = __asm_vector_usercopy(dst, src, n);
+ remain = enable_sum ? __asm_vector_usercopy(dst, src, n) :
+ __asm_vector_usercopy_sum_enabled(dst, src, n);
kernel_vector_end();
if (remain) {
@@ -40,6 +44,7 @@ asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n)
return remain;
fallback:
- return fallback_scalar_usercopy(dst, src, n);
+ return enable_sum ? fallback_scalar_usercopy(dst, src, n) :
+ fallback_scalar_usercopy_sum_enabled(dst, src, n);
}
#endif
diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
index 6a9f116bb545..4efea1b3326c 100644
--- a/arch/riscv/lib/uaccess.S
+++ b/arch/riscv/lib/uaccess.S
@@ -17,14 +17,43 @@ SYM_FUNC_START(__asm_copy_to_user)
ALTERNATIVE("j fallback_scalar_usercopy", "nop", 0, RISCV_ISA_EXT_ZVE32X, CONFIG_RISCV_ISA_V)
REG_L t0, riscv_v_usercopy_threshold
bltu a2, t0, fallback_scalar_usercopy
- tail enter_vector_usercopy
+ li a3, 1
+ tail enter_vector_usercopy
#endif
-SYM_FUNC_START(fallback_scalar_usercopy)
+SYM_FUNC_END(__asm_copy_to_user)
+EXPORT_SYMBOL(__asm_copy_to_user)
+SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user)
+EXPORT_SYMBOL(__asm_copy_from_user)
+SYM_FUNC_START(fallback_scalar_usercopy)
/* Enable access to user memory */
- li t6, SR_SUM
- csrs CSR_STATUS, t6
+ li t6, SR_SUM
+ csrs CSR_STATUS, t6
+ mv t6, ra
+ call fallback_scalar_usercopy_sum_enabled
+
+ /* Disable access to user memory */
+ mv ra, t6
+ li t6, SR_SUM
+ csrc CSR_STATUS, t6
+ ret
+SYM_FUNC_END(fallback_scalar_usercopy)
+
+SYM_FUNC_START(__asm_copy_to_user_sum_enabled)
+#ifdef CONFIG_RISCV_ISA_V
+ ALTERNATIVE("j fallback_scalar_usercopy_sum_enabled", "nop", 0, RISCV_ISA_EXT_ZVE32X, CONFIG_RISCV_ISA_V)
+ REG_L t0, riscv_v_usercopy_threshold
+ bltu a2, t0, fallback_scalar_usercopy_sum_enabled
+ li a3, 0
+ tail enter_vector_usercopy
+#endif
+SYM_FUNC_END(__asm_copy_to_user_sum_enabled)
+SYM_FUNC_ALIAS(__asm_copy_from_user_sum_enabled, __asm_copy_to_user_sum_enabled)
+EXPORT_SYMBOL(__asm_copy_from_user_sum_enabled)
+EXPORT_SYMBOL(__asm_copy_to_user_sum_enabled)
+
+SYM_FUNC_START(fallback_scalar_usercopy_sum_enabled)
/*
* Save the terminal address which will be used to compute the number
* of bytes copied in case of a fixup exception.
@@ -178,23 +207,12 @@ SYM_FUNC_START(fallback_scalar_usercopy)
bltu a0, t0, 4b /* t0 - end of dst */
.Lout_copy_user:
- /* Disable access to user memory */
- csrc CSR_STATUS, t6
li a0, 0
ret
-
- /* Exception fixup code */
10:
- /* Disable access to user memory */
- csrc CSR_STATUS, t6
sub a0, t5, a0
ret
-SYM_FUNC_END(__asm_copy_to_user)
-SYM_FUNC_END(fallback_scalar_usercopy)
-EXPORT_SYMBOL(__asm_copy_to_user)
-SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user)
-EXPORT_SYMBOL(__asm_copy_from_user)
-
+SYM_FUNC_END(fallback_scalar_usercopy_sum_enabled)
SYM_FUNC_START(__clear_user)
diff --git a/arch/riscv/lib/uaccess_vector.S b/arch/riscv/lib/uaccess_vector.S
index 7c45f26de4f7..03b5560609a2 100644
--- a/arch/riscv/lib/uaccess_vector.S
+++ b/arch/riscv/lib/uaccess_vector.S
@@ -24,7 +24,18 @@ SYM_FUNC_START(__asm_vector_usercopy)
/* Enable access to user memory */
li t6, SR_SUM
csrs CSR_STATUS, t6
+ mv t6, ra
+ call __asm_vector_usercopy_sum_enabled
+
+ /* Disable access to user memory */
+ mv ra, t6
+ li t6, SR_SUM
+ csrc CSR_STATUS, t6
+ ret
+SYM_FUNC_END(__asm_vector_usercopy)
+
+SYM_FUNC_START(__asm_vector_usercopy_sum_enabled)
loop:
vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
fixup vle8.v vData, (pSrc), 10f
@@ -36,8 +47,6 @@ loop:
/* Exception fixup for vector load is shared with normal exit */
10:
- /* Disable access to user memory */
- csrc CSR_STATUS, t6
mv a0, iNum
ret
@@ -49,4 +58,4 @@ loop:
csrr t2, CSR_VSTART
sub iNum, iNum, t2
j 10b
-SYM_FUNC_END(__asm_vector_usercopy)
+SYM_FUNC_END(__asm_vector_usercopy_sum_enabled)