From b6fcdb191e36f82336f9b5e126d51c02e7323480 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Fri, 13 Jan 2023 22:23:01 +0100 Subject: RISC-V: add zbb support to string functions Add handling for ZBB extension and add support for using it as a variant for optimized string functions. Support for the Zbb-str-variants is limited to the GNU-assembler for now, as LLVM has not yet acquired the functionality to selectively change the arch option in assembler code. This is still under review at https://reviews.llvm.org/D123515 Co-developed-by: Christoph Muellner Signed-off-by: Christoph Muellner Signed-off-by: Heiko Stuebner Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230113212301.3534711-3-heiko@sntech.de Signed-off-by: Palmer Dabbelt --- arch/riscv/lib/strcmp.S | 85 ++++++++++++++++++++++++++++++++++++++ arch/riscv/lib/strlen.S | 105 +++++++++++++++++++++++++++++++++++++++++++++++ arch/riscv/lib/strncmp.S | 98 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 288 insertions(+) (limited to 'arch/riscv/lib') diff --git a/arch/riscv/lib/strcmp.S b/arch/riscv/lib/strcmp.S index 8babd712b958..8148b6418f61 100644 --- a/arch/riscv/lib/strcmp.S +++ b/arch/riscv/lib/strcmp.S @@ -3,9 +3,14 @@ #include #include #include +#include +#include /* int strcmp(const char *cs, const char *ct) */ SYM_FUNC_START(strcmp) + + ALTERNATIVE("nop", "j strcmp_zbb", 0, CPUFEATURE_ZBB, CONFIG_RISCV_ISA_ZBB) + /* * Returns * a0 - comparison result, value like strcmp @@ -33,4 +38,84 @@ SYM_FUNC_START(strcmp) */ sub a0, t0, t1 ret + +/* + * Variant of strcmp using the ZBB extension if available + */ +#ifdef CONFIG_RISCV_ISA_ZBB +strcmp_zbb: + +.option push +.option arch,+zbb + + /* + * Returns + * a0 - comparison result, value like strcmp + * + * Parameters + * a0 - string1 + * a1 - string2 + * + * Clobbers + * t0, t1, t2, t3, t4, t5 + */ + + or t2, a0, a1 + li t4, -1 + and t2, t2, SZREG-1 + bnez t2, 3f + + /* Main loop for aligned string. */ + .p2align 3 +1: + REG_L t0, 0(a0) + REG_L t1, 0(a1) + orc.b t3, t0 + bne t3, t4, 2f + addi a0, a0, SZREG + addi a1, a1, SZREG + beq t0, t1, 1b + + /* + * Words don't match, and no null byte in the first + * word. Get bytes in big-endian order and compare. + */ +#ifndef CONFIG_CPU_BIG_ENDIAN + rev8 t0, t0 + rev8 t1, t1 +#endif + + /* Synthesize (t0 >= t1) ? 1 : -1 in a branchless sequence. */ + sltu a0, t0, t1 + neg a0, a0 + ori a0, a0, 1 + ret + +2: + /* + * Found a null byte. + * If words don't match, fall back to simple loop. + */ + bne t0, t1, 3f + + /* Otherwise, strings are equal. */ + li a0, 0 + ret + + /* Simple loop for misaligned strings. */ + .p2align 3 +3: + lbu t0, 0(a0) + lbu t1, 0(a1) + addi a0, a0, 1 + addi a1, a1, 1 + bne t0, t1, 4f + bnez t0, 3b + +4: + sub a0, t0, t1 + ret + +.option pop +#endif SYM_FUNC_END(strcmp) diff --git a/arch/riscv/lib/strlen.S b/arch/riscv/lib/strlen.S index 0a3b11853efd..0f9dbf93301a 100644 --- a/arch/riscv/lib/strlen.S +++ b/arch/riscv/lib/strlen.S @@ -3,9 +3,14 @@ #include #include #include +#include +#include /* int strlen(const char *s) */ SYM_FUNC_START(strlen) + + ALTERNATIVE("nop", "j strlen_zbb", 0, CPUFEATURE_ZBB, CONFIG_RISCV_ISA_ZBB) + /* * Returns * a0 - string length @@ -25,4 +30,104 @@ SYM_FUNC_START(strlen) 2: sub a0, t1, a0 ret + +/* + * Variant of strlen using the ZBB extension if available + */ +#ifdef CONFIG_RISCV_ISA_ZBB +strlen_zbb: + +#ifdef CONFIG_CPU_BIG_ENDIAN +# define CZ clz +# define SHIFT sll +#else +# define CZ ctz +# define SHIFT srl +#endif + +.option push +.option arch,+zbb + + /* + * Returns + * a0 - string length + * + * Parameters + * a0 - String to measure + * + * Clobbers + * t0, t1, t2, t3 + */ + + /* Number of irrelevant bytes in the first word. */ + andi t2, a0, SZREG-1 + + /* Align pointer. */ + andi t0, a0, -SZREG + + li t3, SZREG + sub t3, t3, t2 + slli t2, t2, 3 + + /* Get the first word. */ + REG_L t1, 0(t0) + + /* + * Shift away the partial data we loaded to remove the irrelevant bytes + * preceding the string with the effect of adding NUL bytes at the + * end of the string's first word. + */ + SHIFT t1, t1, t2 + + /* Convert non-NUL into 0xff and NUL into 0x00. */ + orc.b t1, t1 + + /* Convert non-NUL into 0x00 and NUL into 0xff. */ + not t1, t1 + + /* + * Search for the first set bit (corresponding to a NUL byte in the + * original chunk). + */ + CZ t1, t1 + + /* + * The first chunk is special: compare against the number + * of valid bytes in this chunk. + */ + srli a0, t1, 3 + bgtu t3, a0, 3f + + /* Prepare for the word comparison loop. */ + addi t2, t0, SZREG + li t3, -1 + + /* + * Our critical loop is 4 instructions and processes data in + * 4 byte or 8 byte chunks. + */ + .p2align 3 +1: + REG_L t1, SZREG(t0) + addi t0, t0, SZREG + orc.b t1, t1 + beq t1, t3, 1b +2: + not t1, t1 + CZ t1, t1 + + /* Get number of processed words. */ + sub t2, t0, t2 + + /* Add number of characters in the first word. */ + add a0, a0, t2 + srli t1, t1, 3 + + /* Add number of characters in the last word. */ + add a0, a0, t1 +3: + ret + +.option pop +#endif SYM_FUNC_END(strlen) diff --git a/arch/riscv/lib/strncmp.S b/arch/riscv/lib/strncmp.S index 1f644d0a93f6..7940ddab2d48 100644 --- a/arch/riscv/lib/strncmp.S +++ b/arch/riscv/lib/strncmp.S @@ -3,9 +3,14 @@ #include #include #include +#include +#include /* int strncmp(const char *cs, const char *ct, size_t count) */ SYM_FUNC_START(strncmp) + + ALTERNATIVE("nop", "j strncmp_zbb", 0, CPUFEATURE_ZBB, CONFIG_RISCV_ISA_ZBB) + /* * Returns * a0 - comparison result, value like strncmp @@ -38,4 +43,97 @@ SYM_FUNC_START(strncmp) */ sub a0, t0, t1 ret + +/* + * Variant of strncmp using the ZBB extension if available + */ +#ifdef CONFIG_RISCV_ISA_ZBB +strncmp_zbb: + +.option push +.option arch,+zbb + + /* + * Returns + * a0 - comparison result, like strncmp + * + * Parameters + * a0 - string1 + * a1 - string2 + * a2 - number of characters to compare + * + * Clobbers + * t0, t1, t2, t3, t4, t5, t6 + */ + + or t2, a0, a1 + li t5, -1 + and t2, t2, SZREG-1 + add t4, a0, a2 + bnez t2, 4f + + /* Adjust limit for fast-path. */ + andi t6, t4, -SZREG + + /* Main loop for aligned string. */ + .p2align 3 +1: + bgt a0, t6, 3f + REG_L t0, 0(a0) + REG_L t1, 0(a1) + orc.b t3, t0 + bne t3, t5, 2f + addi a0, a0, SZREG + addi a1, a1, SZREG + beq t0, t1, 1b + + /* + * Words don't match, and no null byte in the first + * word. Get bytes in big-endian order and compare. + */ +#ifndef CONFIG_CPU_BIG_ENDIAN + rev8 t0, t0 + rev8 t1, t1 +#endif + + /* Synthesize (t0 >= t1) ? 1 : -1 in a branchless sequence. */ + sltu a0, t0, t1 + neg a0, a0 + ori a0, a0, 1 + ret + +2: + /* + * Found a null byte. + * If words don't match, fall back to simple loop. + */ + bne t0, t1, 3f + + /* Otherwise, strings are equal. */ + li a0, 0 + ret + + /* Simple loop for misaligned strings. */ +3: + /* Restore limit for slow-path. */ + .p2align 3 +4: + bge a0, t4, 6f + lbu t0, 0(a0) + lbu t1, 0(a1) + addi a0, a0, 1 + addi a1, a1, 1 + bne t0, t1, 5f + bnez t0, 4b + +5: + sub a0, t0, t1 + ret + +6: + li a0, 0 + ret + +.option pop +#endif SYM_FUNC_END(strncmp) -- cgit