summaryrefslogtreecommitdiff
path: root/arch/sparc
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-11-15 14:26:46 +0900
committerDavid S. Miller <davem@davemloft.net>2017-11-15 14:26:46 +0900
commita16036e9b2fbdadf44f9fbbdf95d173d3f78170b (patch)
treedac0c0d3b741e169a4269c59ee1ef152c8ba9583 /arch/sparc
parent9a08862a5d2e266ecea1865547463da2745fc687 (diff)
parent46ad8d2d22c17e2b577adec55ae87161666a3267 (diff)
Merge branch 'sparc64-optimized-fls'
Vijay Kumar says: ==================== sparc64: Optimize fls and __fls SPARC provides lzcnt instruction (with VIS3) which can be used to optimize fls, __fls and fls64 functions. For the systems that supports lzcnt instruction, we now do boot time patching to use sparc optimized fls, __fls and fls64 functions. v3->v4: - Fixed a typo. v2->v3: - Using ENTRY(), ENDPROC() for assembler functions. - Removed BITS_PER_LONG from __fls. - Using generic fls64(). - Replaced lzcnt instruction with .word directive. v1->v2: - Fixed delay slot issue. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc')
-rw-r--r--arch/sparc/include/asm/bitops_64.h5
-rw-r--r--arch/sparc/kernel/head_64.S2
-rw-r--r--arch/sparc/lib/Makefile3
-rw-r--r--arch/sparc/lib/NG4fls.S30
-rw-r--r--arch/sparc/lib/NG4patch.S9
-rw-r--r--arch/sparc/lib/fls.S67
-rw-r--r--arch/sparc/lib/fls64.S61
7 files changed, 175 insertions, 2 deletions
diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h
index 2d522402a937..d7a46e2480c0 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -22,10 +22,11 @@ void set_bit(unsigned long nr, volatile unsigned long *addr);
void clear_bit(unsigned long nr, volatile unsigned long *addr);
void change_bit(unsigned long nr, volatile unsigned long *addr);
+int fls(unsigned int word);
+int __fls(unsigned long word);
+
#include <asm-generic/bitops/non-atomic.h>
-#include <asm-generic/bitops/fls.h>
-#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>
#ifdef __KERNEL__
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 4de9fbd1a177..f362ecb9955d 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -640,6 +640,8 @@ niagara4_patch:
nop
call niagara4_patch_pageops
nop
+ call niagara4_patch_fls
+ nop
ba,a,pt %xcc, 80f
nop
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index a1a2d39ec96e..2823b8e530ed 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -16,6 +16,9 @@ lib-$(CONFIG_SPARC64) += atomic_64.o
lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
lib-$(CONFIG_SPARC64) += multi3.o
+lib-$(CONFIG_SPARC64) += fls.o
+lib-$(CONFIG_SPARC64) += fls64.o
+obj-$(CONFIG_SPARC64) += NG4fls.o
lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
new file mode 100644
index 000000000000..2d0991e5b034
--- /dev/null
+++ b/arch/sparc/lib/NG4fls.S
@@ -0,0 +1,30 @@
+/* NG4fls.S: SPARC optimized fls and __fls for T4 and above.
+ *
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <linux/linkage.h>
+
+#define LZCNT_O0_G2 \
+ .word 0x85b002e8
+
+ .text
+ .register %g2, #scratch
+ .register %g3, #scratch
+
+ENTRY(NG4fls)
+ LZCNT_O0_G2 !lzcnt %o0, %g2
+ mov 64, %g3
+ retl
+ sub %g3, %g2, %o0
+ENDPROC(NG4fls)
+
+ENTRY(__NG4fls)
+ brz,pn %o0, 1f
+ LZCNT_O0_G2 !lzcnt %o0, %g2
+ mov 63, %g3
+ sub %g3, %g2, %o0
+1:
+ retl
+ nop
+ENDPROC(__NG4fls)
diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S
index 3cc0f8cc95df..da65a3ebb7cc 100644
--- a/arch/sparc/lib/NG4patch.S
+++ b/arch/sparc/lib/NG4patch.S
@@ -3,6 +3,8 @@
* Copyright (C) 2012 David S. Miller <davem@davemloft.net>
*/
+#include <linux/linkage.h>
+
#define BRANCH_ALWAYS 0x10680000
#define NOP 0x01000000
#define NG_DO_PATCH(OLD, NEW) \
@@ -52,3 +54,10 @@ niagara4_patch_pageops:
retl
nop
.size niagara4_patch_pageops,.-niagara4_patch_pageops
+
+ENTRY(niagara4_patch_fls)
+ NG_DO_PATCH(fls, NG4fls)
+ NG_DO_PATCH(__fls, __NG4fls)
+ retl
+ nop
+ENDPROC(niagara4_patch_fls)
diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S
new file mode 100644
index 000000000000..06b8d300bcae
--- /dev/null
+++ b/arch/sparc/lib/fls.S
@@ -0,0 +1,67 @@
+/* fls.S: SPARC default fls definition.
+ *
+ * SPARC default fls definition, which follows the same algorithm as
+ * in generic fls(). This function will be boot time patched on T4
+ * and onward.
+ */
+
+#include <linux/linkage.h>
+#include <asm/export.h>
+
+ .text
+ .register %g2, #scratch
+ .register %g3, #scratch
+ENTRY(fls)
+ brz,pn %o0, 6f
+ mov 0, %o1
+ sethi %hi(0xffff0000), %g3
+ mov %o0, %g2
+ andcc %o0, %g3, %g0
+ be,pt %icc, 8f
+ mov 32, %o1
+ sethi %hi(0xff000000), %g3
+ andcc %g2, %g3, %g0
+ bne,pt %icc, 3f
+ sethi %hi(0xf0000000), %g3
+ sll %o0, 8, %o0
+1:
+ add %o1, -8, %o1
+ sra %o0, 0, %o0
+ mov %o0, %g2
+2:
+ sethi %hi(0xf0000000), %g3
+3:
+ andcc %g2, %g3, %g0
+ bne,pt %icc, 4f
+ sethi %hi(0xc0000000), %g3
+ sll %o0, 4, %o0
+ add %o1, -4, %o1
+ sra %o0, 0, %o0
+ mov %o0, %g2
+4:
+ andcc %g2, %g3, %g0
+ be,a,pt %icc, 7f
+ sll %o0, 2, %o0
+5:
+ xnor %g0, %o0, %o0
+ srl %o0, 31, %o0
+ sub %o1, %o0, %o1
+6:
+ jmp %o7 + 8
+ sra %o1, 0, %o0
+7:
+ add %o1, -2, %o1
+ ba,pt %xcc, 5b
+ sra %o0, 0, %o0
+8:
+ sll %o0, 16, %o0
+ sethi %hi(0xff000000), %g3
+ sra %o0, 0, %o0
+ mov %o0, %g2
+ andcc %g2, %g3, %g0
+ bne,pt %icc, 2b
+ mov 16, %o1
+ ba,pt %xcc, 1b
+ sll %o0, 8, %o0
+ENDPROC(fls)
+EXPORT_SYMBOL(fls)
diff --git a/arch/sparc/lib/fls64.S b/arch/sparc/lib/fls64.S
new file mode 100644
index 000000000000..c83e22ae9586
--- /dev/null
+++ b/arch/sparc/lib/fls64.S
@@ -0,0 +1,61 @@
+/* fls64.S: SPARC default __fls definition.
+ *
+ * SPARC default __fls definition, which follows the same algorithm as
+ * in generic __fls(). This function will be boot time patched on T4
+ * and onward.
+ */
+
+#include <linux/linkage.h>
+#include <asm/export.h>
+
+ .text
+ .register %g2, #scratch
+ .register %g3, #scratch
+ENTRY(__fls)
+ mov -1, %g2
+ sllx %g2, 32, %g2
+ and %o0, %g2, %g2
+ brnz,pt %g2, 1f
+ mov 63, %g1
+ sllx %o0, 32, %o0
+ mov 31, %g1
+1:
+ mov -1, %g2
+ sllx %g2, 48, %g2
+ and %o0, %g2, %g2
+ brnz,pt %g2, 2f
+ mov -1, %g2
+ sllx %o0, 16, %o0
+ add %g1, -16, %g1
+2:
+ mov -1, %g2
+ sllx %g2, 56, %g2
+ and %o0, %g2, %g2
+ brnz,pt %g2, 3f
+ mov -1, %g2
+ sllx %o0, 8, %o0
+ add %g1, -8, %g1
+3:
+ sllx %g2, 60, %g2
+ and %o0, %g2, %g2
+ brnz,pt %g2, 4f
+ mov -1, %g2
+ sllx %o0, 4, %o0
+ add %g1, -4, %g1
+4:
+ sllx %g2, 62, %g2
+ and %o0, %g2, %g2
+ brnz,pt %g2, 5f
+ mov -1, %g3
+ sllx %o0, 2, %o0
+ add %g1, -2, %g1
+5:
+ mov 0, %g2
+ sllx %g3, 63, %g3
+ and %o0, %g3, %o0
+ movre %o0, 1, %g2
+ sub %g1, %g2, %g1
+ jmp %o7+8
+ sra %g1, 0, %o0
+ENDPROC(__fls)
+EXPORT_SYMBOL(__fls)