summaryrefslogtreecommitdiff
path: root/arch/riscv/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/riscv/lib')
-rw-r--r--arch/riscv/lib/Makefile4
-rw-r--r--arch/riscv/lib/crc32-riscv.c311
-rw-r--r--arch/riscv/lib/csum.c7
-rw-r--r--arch/riscv/lib/memset.S2
-rw-r--r--arch/riscv/lib/strcmp.S2
-rw-r--r--arch/riscv/lib/strlen.S1
-rw-r--r--arch/riscv/lib/strncmp.S2
-rw-r--r--arch/riscv/lib/uaccess.S6
-rw-r--r--arch/riscv/lib/uaccess_vector.S1
9 files changed, 326 insertions, 10 deletions
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index bd6e6c1b0497..79368a895fee 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -3,9 +3,11 @@ lib-y += delay.o
lib-y += memcpy.o
lib-y += memset.o
lib-y += memmove.o
+ifeq ($(CONFIG_KASAN_GENERIC)$(CONFIG_KASAN_SW_TAGS),)
lib-y += strcmp.o
lib-y += strlen.o
lib-y += strncmp.o
+endif
lib-y += csum.o
ifeq ($(CONFIG_MMU), y)
lib-$(CONFIG_RISCV_ISA_V) += uaccess_vector.o
@@ -13,7 +15,7 @@ endif
lib-$(CONFIG_MMU) += uaccess.o
lib-$(CONFIG_64BIT) += tishift.o
lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
-
+obj-$(CONFIG_CRC32_ARCH) += crc32-riscv.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
lib-$(CONFIG_RISCV_ISA_V) += xor.o
lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o
diff --git a/arch/riscv/lib/crc32-riscv.c b/arch/riscv/lib/crc32-riscv.c
new file mode 100644
index 000000000000..53d56ab422c7
--- /dev/null
+++ b/arch/riscv/lib/crc32-riscv.c
@@ -0,0 +1,311 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Accelerated CRC32 implementation with Zbc extension.
+ *
+ * Copyright (C) 2024 Intel Corporation
+ */
+
+#include <asm/hwcap.h>
+#include <asm/alternative-macros.h>
+#include <asm/byteorder.h>
+
+#include <linux/types.h>
+#include <linux/minmax.h>
+#include <linux/crc32poly.h>
+#include <linux/crc32.h>
+#include <linux/byteorder/generic.h>
+#include <linux/module.h>
+
+/*
+ * Refer to https://www.corsix.org/content/barrett-reduction-polynomials for
+ * better understanding of how this math works.
+ *
+ * let "+" denotes polynomial add (XOR)
+ * let "-" denotes polynomial sub (XOR)
+ * let "*" denotes polynomial multiplication
+ * let "/" denotes polynomial floor division
+ * let "S" denotes source data, XLEN bit wide
+ * let "P" denotes CRC32 polynomial
+ * let "T" denotes 2^(XLEN+32)
+ * let "QT" denotes quotient of T/P, with the bit for 2^XLEN being implicit
+ *
+ * crc32(S, P)
+ * => S * (2^32) - S * (2^32) / P * P
+ * => lowest 32 bits of: S * (2^32) / P * P
+ * => lowest 32 bits of: S * (2^32) * (T / P) / T * P
+ * => lowest 32 bits of: S * (2^32) * quotient / T * P
+ * => lowest 32 bits of: S * quotient / 2^XLEN * P
+ * => lowest 32 bits of: (clmul_high_part(S, QT) + S) * P
+ * => clmul_low_part(clmul_high_part(S, QT) + S, P)
+ *
+ * In terms of below implementations, the BE case is more intuitive, since the
+ * higher order bit sits at more significant position.
+ */
+
+#if __riscv_xlen == 64
+/* Slide by XLEN bits per iteration */
+# define STEP_ORDER 3
+
+/* Each below polynomial quotient has an implicit bit for 2^XLEN */
+
+/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in LE format */
+# define CRC32_POLY_QT_LE 0x5a72d812fb808b20
+
+/* Polynomial quotient of (2^(XLEN+32))/CRC32C_POLY, in LE format */
+# define CRC32C_POLY_QT_LE 0xa434f61c6f5389f8
+
+/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in BE format, it should be
+ * the same as the bit-reversed version of CRC32_POLY_QT_LE
+ */
+# define CRC32_POLY_QT_BE 0x04d101df481b4e5a
+
+static inline u64 crc32_le_prep(u32 crc, unsigned long const *ptr)
+{
+ return (u64)crc ^ (__force u64)__cpu_to_le64(*ptr);
+}
+
+static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt)
+{
+ u32 crc;
+
+ /* We don't have a "clmulrh" insn, so use clmul + slli instead. */
+ asm volatile (".option push\n"
+ ".option arch,+zbc\n"
+ "clmul %0, %1, %2\n"
+ "slli %0, %0, 1\n"
+ "xor %0, %0, %1\n"
+ "clmulr %0, %0, %3\n"
+ "srli %0, %0, 32\n"
+ ".option pop\n"
+ : "=&r" (crc)
+ : "r" (s),
+ "r" (poly_qt),
+ "r" ((u64)poly << 32)
+ :);
+ return crc;
+}
+
+static inline u64 crc32_be_prep(u32 crc, unsigned long const *ptr)
+{
+ return ((u64)crc << 32) ^ (__force u64)__cpu_to_be64(*ptr);
+}
+
+#elif __riscv_xlen == 32
+# define STEP_ORDER 2
+/* Each quotient should match the upper half of its analog in RV64 */
+# define CRC32_POLY_QT_LE 0xfb808b20
+# define CRC32C_POLY_QT_LE 0x6f5389f8
+# define CRC32_POLY_QT_BE 0x04d101df
+
+static inline u32 crc32_le_prep(u32 crc, unsigned long const *ptr)
+{
+ return crc ^ (__force u32)__cpu_to_le32(*ptr);
+}
+
+static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt)
+{
+ u32 crc;
+
+ /* We don't have a "clmulrh" insn, so use clmul + slli instead. */
+ asm volatile (".option push\n"
+ ".option arch,+zbc\n"
+ "clmul %0, %1, %2\n"
+ "slli %0, %0, 1\n"
+ "xor %0, %0, %1\n"
+ "clmulr %0, %0, %3\n"
+ ".option pop\n"
+ : "=&r" (crc)
+ : "r" (s),
+ "r" (poly_qt),
+ "r" (poly)
+ :);
+ return crc;
+}
+
+static inline u32 crc32_be_prep(u32 crc, unsigned long const *ptr)
+{
+ return crc ^ (__force u32)__cpu_to_be32(*ptr);
+}
+
+#else
+# error "Unexpected __riscv_xlen"
+#endif
+
+static inline u32 crc32_be_zbc(unsigned long s)
+{
+ u32 crc;
+
+ asm volatile (".option push\n"
+ ".option arch,+zbc\n"
+ "clmulh %0, %1, %2\n"
+ "xor %0, %0, %1\n"
+ "clmul %0, %0, %3\n"
+ ".option pop\n"
+ : "=&r" (crc)
+ : "r" (s),
+ "r" (CRC32_POLY_QT_BE),
+ "r" (CRC32_POLY_BE)
+ :);
+ return crc;
+}
+
+#define STEP (1 << STEP_ORDER)
+#define OFFSET_MASK (STEP - 1)
+
+typedef u32 (*fallback)(u32 crc, unsigned char const *p, size_t len);
+
+static inline u32 crc32_le_unaligned(u32 crc, unsigned char const *p,
+ size_t len, u32 poly,
+ unsigned long poly_qt)
+{
+ size_t bits = len * 8;
+ unsigned long s = 0;
+ u32 crc_low = 0;
+
+ for (int i = 0; i < len; i++)
+ s = ((unsigned long)*p++ << (__riscv_xlen - 8)) | (s >> 8);
+
+ s ^= (unsigned long)crc << (__riscv_xlen - bits);
+ if (__riscv_xlen == 32 || len < sizeof(u32))
+ crc_low = crc >> bits;
+
+ crc = crc32_le_zbc(s, poly, poly_qt);
+ crc ^= crc_low;
+
+ return crc;
+}
+
+static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
+ size_t len, u32 poly,
+ unsigned long poly_qt,
+ fallback crc_fb)
+{
+ size_t offset, head_len, tail_len;
+ unsigned long const *p_ul;
+ unsigned long s;
+
+ asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+ RISCV_ISA_EXT_ZBC, 1)
+ : : : : legacy);
+
+ /* Handle the unaligned head. */
+ offset = (unsigned long)p & OFFSET_MASK;
+ if (offset && len) {
+ head_len = min(STEP - offset, len);
+ crc = crc32_le_unaligned(crc, p, head_len, poly, poly_qt);
+ p += head_len;
+ len -= head_len;
+ }
+
+ tail_len = len & OFFSET_MASK;
+ len = len >> STEP_ORDER;
+ p_ul = (unsigned long const *)p;
+
+ for (int i = 0; i < len; i++) {
+ s = crc32_le_prep(crc, p_ul);
+ crc = crc32_le_zbc(s, poly, poly_qt);
+ p_ul++;
+ }
+
+ /* Handle the tail bytes. */
+ p = (unsigned char const *)p_ul;
+ if (tail_len)
+ crc = crc32_le_unaligned(crc, p, tail_len, poly, poly_qt);
+
+ return crc;
+
+legacy:
+ return crc_fb(crc, p, len);
+}
+
+u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len)
+{
+ return crc32_le_generic(crc, p, len, CRC32_POLY_LE, CRC32_POLY_QT_LE,
+ crc32_le_base);
+}
+EXPORT_SYMBOL(crc32_le_arch);
+
+u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len)
+{
+ return crc32_le_generic(crc, p, len, CRC32C_POLY_LE,
+ CRC32C_POLY_QT_LE, crc32c_le_base);
+}
+EXPORT_SYMBOL(crc32c_le_arch);
+
+static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p,
+ size_t len)
+{
+ size_t bits = len * 8;
+ unsigned long s = 0;
+ u32 crc_low = 0;
+
+ s = 0;
+ for (int i = 0; i < len; i++)
+ s = *p++ | (s << 8);
+
+ if (__riscv_xlen == 32 || len < sizeof(u32)) {
+ s ^= crc >> (32 - bits);
+ crc_low = crc << bits;
+ } else {
+ s ^= (unsigned long)crc << (bits - 32);
+ }
+
+ crc = crc32_be_zbc(s);
+ crc ^= crc_low;
+
+ return crc;
+}
+
+u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len)
+{
+ size_t offset, head_len, tail_len;
+ unsigned long const *p_ul;
+ unsigned long s;
+
+ asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+ RISCV_ISA_EXT_ZBC, 1)
+ : : : : legacy);
+
+ /* Handle the unaligned head. */
+ offset = (unsigned long)p & OFFSET_MASK;
+ if (offset && len) {
+ head_len = min(STEP - offset, len);
+ crc = crc32_be_unaligned(crc, p, head_len);
+ p += head_len;
+ len -= head_len;
+ }
+
+ tail_len = len & OFFSET_MASK;
+ len = len >> STEP_ORDER;
+ p_ul = (unsigned long const *)p;
+
+ for (int i = 0; i < len; i++) {
+ s = crc32_be_prep(crc, p_ul);
+ crc = crc32_be_zbc(s);
+ p_ul++;
+ }
+
+ /* Handle the tail bytes. */
+ p = (unsigned char const *)p_ul;
+ if (tail_len)
+ crc = crc32_be_unaligned(crc, p, tail_len);
+
+ return crc;
+
+legacy:
+ return crc32_be_base(crc, p, len);
+}
+EXPORT_SYMBOL(crc32_be_arch);
+
+u32 crc32_optimizations(void)
+{
+ if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
+ return CRC32_LE_OPTIMIZATION |
+ CRC32_BE_OPTIMIZATION |
+ CRC32C_OPTIMIZATION;
+ return 0;
+}
+EXPORT_SYMBOL(crc32_optimizations);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Accelerated CRC32 implementation with Zbc extension");
diff --git a/arch/riscv/lib/csum.c b/arch/riscv/lib/csum.c
index 74af3ab520b6..7fb12c59e571 100644
--- a/arch/riscv/lib/csum.c
+++ b/arch/riscv/lib/csum.c
@@ -3,7 +3,7 @@
* Checksum library
*
* Influenced by arch/arm64/lib/csum.c
- * Copyright (C) 2023 Rivos Inc.
+ * Copyright (C) 2023-2024 Rivos Inc.
*/
#include <linux/bitops.h>
#include <linux/compiler.h>
@@ -318,10 +318,7 @@ unsigned int do_csum(const unsigned char *buff, int len)
* branches. The largest chunk of overlap was delegated into the
* do_csum_common function.
*/
- if (static_branch_likely(&fast_misaligned_access_speed_key))
- return do_csum_no_alignment(buff, len);
-
- if (((unsigned long)buff & OFFSET_MASK) == 0)
+ if (has_fast_unaligned_accesses() || (((unsigned long)buff & OFFSET_MASK) == 0))
return do_csum_no_alignment(buff, len);
return do_csum_with_alignment(buff, len);
diff --git a/arch/riscv/lib/memset.S b/arch/riscv/lib/memset.S
index 35f358e70bdb..da23b8347e2d 100644
--- a/arch/riscv/lib/memset.S
+++ b/arch/riscv/lib/memset.S
@@ -111,3 +111,5 @@ SYM_FUNC_START(__memset)
ret
SYM_FUNC_END(__memset)
SYM_FUNC_ALIAS_WEAK(memset, __memset)
+SYM_FUNC_ALIAS(__pi_memset, __memset)
+SYM_FUNC_ALIAS(__pi___memset, __memset)
diff --git a/arch/riscv/lib/strcmp.S b/arch/riscv/lib/strcmp.S
index 687b2bea5c43..57a5c0066231 100644
--- a/arch/riscv/lib/strcmp.S
+++ b/arch/riscv/lib/strcmp.S
@@ -120,3 +120,5 @@ strcmp_zbb:
.option pop
#endif
SYM_FUNC_END(strcmp)
+SYM_FUNC_ALIAS(__pi_strcmp, strcmp)
+EXPORT_SYMBOL(strcmp)
diff --git a/arch/riscv/lib/strlen.S b/arch/riscv/lib/strlen.S
index 8ae3064e45ff..962983b73251 100644
--- a/arch/riscv/lib/strlen.S
+++ b/arch/riscv/lib/strlen.S
@@ -131,3 +131,4 @@ strlen_zbb:
#endif
SYM_FUNC_END(strlen)
SYM_FUNC_ALIAS(__pi_strlen, strlen)
+EXPORT_SYMBOL(strlen)
diff --git a/arch/riscv/lib/strncmp.S b/arch/riscv/lib/strncmp.S
index aba5b3148621..7b2d0ff9ed6c 100644
--- a/arch/riscv/lib/strncmp.S
+++ b/arch/riscv/lib/strncmp.S
@@ -136,3 +136,5 @@ strncmp_zbb:
.option pop
#endif
SYM_FUNC_END(strncmp)
+SYM_FUNC_ALIAS(__pi_strncmp, strncmp)
+EXPORT_SYMBOL(strncmp)
diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
index bc22c078aba8..6a9f116bb545 100644
--- a/arch/riscv/lib/uaccess.S
+++ b/arch/riscv/lib/uaccess.S
@@ -14,7 +14,7 @@
SYM_FUNC_START(__asm_copy_to_user)
#ifdef CONFIG_RISCV_ISA_V
- ALTERNATIVE("j fallback_scalar_usercopy", "nop", 0, RISCV_ISA_EXT_v, CONFIG_RISCV_ISA_V)
+ ALTERNATIVE("j fallback_scalar_usercopy", "nop", 0, RISCV_ISA_EXT_ZVE32X, CONFIG_RISCV_ISA_V)
REG_L t0, riscv_v_usercopy_threshold
bltu a2, t0, fallback_scalar_usercopy
tail enter_vector_usercopy
@@ -44,7 +44,7 @@ SYM_FUNC_START(fallback_scalar_usercopy)
* Use byte copy only if too small.
* SZREG holds 4 for RV32 and 8 for RV64
*/
- li a3, 9*SZREG /* size must be larger than size in word_copy */
+ li a3, 9*SZREG-1 /* size must >= (word_copy stride + SZREG-1) */
bltu a2, a3, .Lbyte_copy_tail
/*
@@ -103,7 +103,7 @@ SYM_FUNC_START(fallback_scalar_usercopy)
fixup REG_S t4, 7*SZREG(a0), 10f
addi a0, a0, 8*SZREG
addi a1, a1, 8*SZREG
- bltu a0, t0, 2b
+ bleu a0, t0, 2b
addi t0, t0, 8*SZREG /* revert to original value */
j .Lbyte_copy_tail
diff --git a/arch/riscv/lib/uaccess_vector.S b/arch/riscv/lib/uaccess_vector.S
index 51ab5588e9ff..7c45f26de4f7 100644
--- a/arch/riscv/lib/uaccess_vector.S
+++ b/arch/riscv/lib/uaccess_vector.S
@@ -1,7 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#include <linux/linkage.h>
-#include <asm-generic/export.h>
#include <asm/asm.h>
#include <asm/asm-extable.h>
#include <asm/csr.h>