summaryrefslogtreecommitdiff
path: root/arch/riscv/lib
diff options
context:
space:
mode:
authorHeiko Stuebner <heiko.stuebner@vrull.eu>2023-01-13 22:23:01 +0100
committerPalmer Dabbelt <palmer@rivosinc.com>2023-01-31 11:43:24 -0800
commitb6fcdb191e36f82336f9b5e126d51c02e7323480 (patch)
tree9478e9f2e5141295105137ec3e62e1aa9e6141d2 /arch/riscv/lib
parent56e0790c7f9e59ba6a0f4b59981d1d6fbf43efb0 (diff)
RISC-V: add zbb support to string functions
Add handling for ZBB extension and add support for using it as a variant for optimized string functions. Support for the Zbb-str-variants is limited to the GNU-assembler for now, as LLVM has not yet acquired the functionality to selectively change the arch option in assembler code. This is still under review at https://reviews.llvm.org/D123515 Co-developed-by: Christoph Muellner <christoph.muellner@vrull.eu> Signed-off-by: Christoph Muellner <christoph.muellner@vrull.eu> Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu> Reviewed-by: Conor Dooley <conor.dooley@microchip.com> Link: https://lore.kernel.org/r/20230113212301.3534711-3-heiko@sntech.de Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
Diffstat (limited to 'arch/riscv/lib')
-rw-r--r--arch/riscv/lib/strcmp.S85
-rw-r--r--arch/riscv/lib/strlen.S105
-rw-r--r--arch/riscv/lib/strncmp.S98
3 files changed, 288 insertions, 0 deletions
diff --git a/arch/riscv/lib/strcmp.S b/arch/riscv/lib/strcmp.S
index 8babd712b958..8148b6418f61 100644
--- a/arch/riscv/lib/strcmp.S
+++ b/arch/riscv/lib/strcmp.S
@@ -3,9 +3,14 @@
#include <linux/linkage.h>
#include <asm/asm.h>
#include <asm-generic/export.h>
+#include <asm/alternative-macros.h>
+#include <asm/errata_list.h>
/* int strcmp(const char *cs, const char *ct) */
SYM_FUNC_START(strcmp)
+
+ ALTERNATIVE("nop", "j strcmp_zbb", 0, CPUFEATURE_ZBB, CONFIG_RISCV_ISA_ZBB)
+
/*
* Returns
* a0 - comparison result, value like strcmp
@@ -33,4 +38,84 @@ SYM_FUNC_START(strcmp)
*/
sub a0, t0, t1
ret
+
+/*
+ * Variant of strcmp using the ZBB extension if available
+ */
+#ifdef CONFIG_RISCV_ISA_ZBB
+strcmp_zbb:
+
+.option push
+.option arch,+zbb
+
+ /*
+ * Returns
+ * a0 - comparison result, value like strcmp
+ *
+ * Parameters
+ * a0 - string1
+ * a1 - string2
+ *
+ * Clobbers
+ * t0, t1, t2, t3, t4, t5
+ */
+
+ or t2, a0, a1
+ li t4, -1
+ and t2, t2, SZREG-1
+ bnez t2, 3f
+
+ /* Main loop for aligned string. */
+ .p2align 3
+1:
+ REG_L t0, 0(a0)
+ REG_L t1, 0(a1)
+ orc.b t3, t0
+ bne t3, t4, 2f
+ addi a0, a0, SZREG
+ addi a1, a1, SZREG
+ beq t0, t1, 1b
+
+ /*
+ * Words don't match, and no null byte in the first
+ * word. Get bytes in big-endian order and compare.
+ */
+#ifndef CONFIG_CPU_BIG_ENDIAN
+ rev8 t0, t0
+ rev8 t1, t1
+#endif
+
+ /* Synthesize (t0 >= t1) ? 1 : -1 in a branchless sequence. */
+ sltu a0, t0, t1
+ neg a0, a0
+ ori a0, a0, 1
+ ret
+
+2:
+ /*
+ * Found a null byte.
+ * If words don't match, fall back to simple loop.
+ */
+ bne t0, t1, 3f
+
+ /* Otherwise, strings are equal. */
+ li a0, 0
+ ret
+
+ /* Simple loop for misaligned strings. */
+ .p2align 3
+3:
+ lbu t0, 0(a0)
+ lbu t1, 0(a1)
+ addi a0, a0, 1
+ addi a1, a1, 1
+ bne t0, t1, 4f
+ bnez t0, 3b
+
+4:
+ sub a0, t0, t1
+ ret
+
+.option pop
+#endif
SYM_FUNC_END(strcmp)
diff --git a/arch/riscv/lib/strlen.S b/arch/riscv/lib/strlen.S
index 0a3b11853efd..0f9dbf93301a 100644
--- a/arch/riscv/lib/strlen.S
+++ b/arch/riscv/lib/strlen.S
@@ -3,9 +3,14 @@
#include <linux/linkage.h>
#include <asm/asm.h>
#include <asm-generic/export.h>
+#include <asm/alternative-macros.h>
+#include <asm/errata_list.h>
/* int strlen(const char *s) */
SYM_FUNC_START(strlen)
+
+ ALTERNATIVE("nop", "j strlen_zbb", 0, CPUFEATURE_ZBB, CONFIG_RISCV_ISA_ZBB)
+
/*
* Returns
* a0 - string length
@@ -25,4 +30,104 @@ SYM_FUNC_START(strlen)
2:
sub a0, t1, a0
ret
+
+/*
+ * Variant of strlen using the ZBB extension if available
+ */
+#ifdef CONFIG_RISCV_ISA_ZBB
+strlen_zbb:
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+# define CZ clz
+# define SHIFT sll
+#else
+# define CZ ctz
+# define SHIFT srl
+#endif
+
+.option push
+.option arch,+zbb
+
+ /*
+ * Returns
+ * a0 - string length
+ *
+ * Parameters
+ * a0 - String to measure
+ *
+ * Clobbers
+ * t0, t1, t2, t3
+ */
+
+ /* Number of irrelevant bytes in the first word. */
+ andi t2, a0, SZREG-1
+
+ /* Align pointer. */
+ andi t0, a0, -SZREG
+
+ li t3, SZREG
+ sub t3, t3, t2
+ slli t2, t2, 3
+
+ /* Get the first word. */
+ REG_L t1, 0(t0)
+
+ /*
+ * Shift away the partial data we loaded to remove the irrelevant bytes
+ * preceding the string with the effect of adding NUL bytes at the
+ * end of the string's first word.
+ */
+ SHIFT t1, t1, t2
+
+ /* Convert non-NUL into 0xff and NUL into 0x00. */
+ orc.b t1, t1
+
+ /* Convert non-NUL into 0x00 and NUL into 0xff. */
+ not t1, t1
+
+ /*
+ * Search for the first set bit (corresponding to a NUL byte in the
+ * original chunk).
+ */
+ CZ t1, t1
+
+ /*
+ * The first chunk is special: compare against the number
+ * of valid bytes in this chunk.
+ */
+ srli a0, t1, 3
+ bgtu t3, a0, 3f
+
+ /* Prepare for the word comparison loop. */
+ addi t2, t0, SZREG
+ li t3, -1
+
+ /*
+ * Our critical loop is 4 instructions and processes data in
+ * 4 byte or 8 byte chunks.
+ */
+ .p2align 3
+1:
+ REG_L t1, SZREG(t0)
+ addi t0, t0, SZREG
+ orc.b t1, t1
+ beq t1, t3, 1b
+2:
+ not t1, t1
+ CZ t1, t1
+
+ /* Get number of processed words. */
+ sub t2, t0, t2
+
+ /* Add number of characters in the first word. */
+ add a0, a0, t2
+ srli t1, t1, 3
+
+ /* Add number of characters in the last word. */
+ add a0, a0, t1
+3:
+ ret
+
+.option pop
+#endif
SYM_FUNC_END(strlen)
diff --git a/arch/riscv/lib/strncmp.S b/arch/riscv/lib/strncmp.S
index 1f644d0a93f6..7940ddab2d48 100644
--- a/arch/riscv/lib/strncmp.S
+++ b/arch/riscv/lib/strncmp.S
@@ -3,9 +3,14 @@
#include <linux/linkage.h>
#include <asm/asm.h>
#include <asm-generic/export.h>
+#include <asm/alternative-macros.h>
+#include <asm/errata_list.h>
/* int strncmp(const char *cs, const char *ct, size_t count) */
SYM_FUNC_START(strncmp)
+
+ ALTERNATIVE("nop", "j strncmp_zbb", 0, CPUFEATURE_ZBB, CONFIG_RISCV_ISA_ZBB)
+
/*
* Returns
* a0 - comparison result, value like strncmp
@@ -38,4 +43,97 @@ SYM_FUNC_START(strncmp)
*/
sub a0, t0, t1
ret
+
+/*
+ * Variant of strncmp using the ZBB extension if available
+ */
+#ifdef CONFIG_RISCV_ISA_ZBB
+strncmp_zbb:
+
+.option push
+.option arch,+zbb
+
+ /*
+ * Returns
+ * a0 - comparison result, like strncmp
+ *
+ * Parameters
+ * a0 - string1
+ * a1 - string2
+ * a2 - number of characters to compare
+ *
+ * Clobbers
+ * t0, t1, t2, t3, t4, t5, t6
+ */
+
+ or t2, a0, a1
+ li t5, -1
+ and t2, t2, SZREG-1
+ add t4, a0, a2
+ bnez t2, 4f
+
+ /* Adjust limit for fast-path. */
+ andi t6, t4, -SZREG
+
+ /* Main loop for aligned string. */
+ .p2align 3
+1:
+ bgt a0, t6, 3f
+ REG_L t0, 0(a0)
+ REG_L t1, 0(a1)
+ orc.b t3, t0
+ bne t3, t5, 2f
+ addi a0, a0, SZREG
+ addi a1, a1, SZREG
+ beq t0, t1, 1b
+
+ /*
+ * Words don't match, and no null byte in the first
+ * word. Get bytes in big-endian order and compare.
+ */
+#ifndef CONFIG_CPU_BIG_ENDIAN
+ rev8 t0, t0
+ rev8 t1, t1
+#endif
+
+ /* Synthesize (t0 >= t1) ? 1 : -1 in a branchless sequence. */
+ sltu a0, t0, t1
+ neg a0, a0
+ ori a0, a0, 1
+ ret
+
+2:
+ /*
+ * Found a null byte.
+ * If words don't match, fall back to simple loop.
+ */
+ bne t0, t1, 3f
+
+ /* Otherwise, strings are equal. */
+ li a0, 0
+ ret
+
+ /* Simple loop for misaligned strings. */
+3:
+ /* Restore limit for slow-path. */
+ .p2align 3
+4:
+ bge a0, t4, 6f
+ lbu t0, 0(a0)
+ lbu t1, 0(a1)
+ addi a0, a0, 1
+ addi a1, a1, 1
+ bne t0, t1, 5f
+ bnez t0, 4b
+
+5:
+ sub a0, t0, t1
+ ret
+
+6:
+ li a0, 0
+ ret
+
+.option pop
+#endif
SYM_FUNC_END(strncmp)