summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/Kconfig5
-rw-r--r--arch/x86/include/asm/arch_hweight.h24
-rw-r--r--arch/x86/kernel/i386_ksyms_32.c2
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c3
-rw-r--r--arch/x86/lib/Makefile2
-rw-r--r--arch/x86/lib/hweight.S77
-rw-r--r--lib/Makefile5
-rw-r--r--lib/hweight.c4
8 files changed, 97 insertions, 25 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0a7b885964ba..729d41d9ced3 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -294,11 +294,6 @@ config X86_32_LAZY_GS
def_bool y
depends on X86_32 && !CC_STACKPROTECTOR
-config ARCH_HWEIGHT_CFLAGS
- string
- default "-fcall-saved-ecx -fcall-saved-edx" if X86_32
- default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64
-
config ARCH_SUPPORTS_UPROBES
def_bool y
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index 02e799fa43d1..e7cd63175de4 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -4,8 +4,8 @@
#include <asm/cpufeatures.h>
#ifdef CONFIG_64BIT
-/* popcnt %edi, %eax -- redundant REX prefix for alignment */
-#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7"
+/* popcnt %edi, %eax */
+#define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc7"
/* popcnt %rdi, %rax */
#define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7"
#define REG_IN "D"
@@ -17,19 +17,15 @@
#define REG_OUT "a"
#endif
-/*
- * __sw_hweightXX are called from within the alternatives below
- * and callee-clobbered registers need to be taken care of. See
- * ARCH_HWEIGHT_CFLAGS in <arch/x86/Kconfig> for the respective
- * compiler switches.
- */
+#define __HAVE_ARCH_SW_HWEIGHT
+
static __always_inline unsigned int __arch_hweight32(unsigned int w)
{
- unsigned int res = 0;
+ unsigned int res;
asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT)
- : "="REG_OUT (res)
- : REG_IN (w));
+ : "="REG_OUT (res)
+ : REG_IN (w));
return res;
}
@@ -53,11 +49,11 @@ static inline unsigned long __arch_hweight64(__u64 w)
#else
static __always_inline unsigned long __arch_hweight64(__u64 w)
{
- unsigned long res = 0;
+ unsigned long res;
asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT)
- : "="REG_OUT (res)
- : REG_IN (w));
+ : "="REG_OUT (res)
+ : REG_IN (w));
return res;
}
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index 64341aa485ae..d40ee8a38fed 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -42,3 +42,5 @@ EXPORT_SYMBOL(empty_zero_page);
EXPORT_SYMBOL(___preempt_schedule);
EXPORT_SYMBOL(___preempt_schedule_notrace);
#endif
+
+EXPORT_SYMBOL(__sw_hweight32);
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index cd05942bc918..f1aebfb49c36 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -44,6 +44,9 @@ EXPORT_SYMBOL(clear_page);
EXPORT_SYMBOL(csum_partial);
+EXPORT_SYMBOL(__sw_hweight32);
+EXPORT_SYMBOL(__sw_hweight64);
+
/*
* Export string functions. We normally rely on gcc builtin for most of these,
* but gcc sometimes decides not to inline them.
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 72a576752a7e..ec969cc3eb20 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -25,7 +25,7 @@ lib-y += memcpy_$(BITS).o
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
-obj-y += msr.o msr-reg.o msr-reg-export.o
+obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
ifeq ($(CONFIG_X86_32),y)
obj-y += atomic64_32.o
diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S
new file mode 100644
index 000000000000..02de3d74d2c5
--- /dev/null
+++ b/arch/x86/lib/hweight.S
@@ -0,0 +1,77 @@
+#include <linux/linkage.h>
+
+#include <asm/asm.h>
+
+/*
+ * unsigned int __sw_hweight32(unsigned int w)
+ * %rdi: w
+ */
+ENTRY(__sw_hweight32)
+
+#ifdef CONFIG_X86_64
+ movl %edi, %eax # w
+#endif
+ __ASM_SIZE(push,) %__ASM_REG(dx)
+ movl %eax, %edx # w -> t
+ shrl %edx # t >>= 1
+ andl $0x55555555, %edx # t &= 0x55555555
+ subl %edx, %eax # w -= t
+
+ movl %eax, %edx # w -> t
+ shrl $2, %eax # w_tmp >>= 2
+ andl $0x33333333, %edx # t &= 0x33333333
+ andl $0x33333333, %eax # w_tmp &= 0x33333333
+ addl %edx, %eax # w = w_tmp + t
+
+ movl %eax, %edx # w -> t
+ shrl $4, %edx # t >>= 4
+ addl %edx, %eax # w_tmp += t
+ andl $0x0f0f0f0f, %eax # w_tmp &= 0x0f0f0f0f
+ imull $0x01010101, %eax, %eax # w_tmp *= 0x01010101
+ shrl $24, %eax # w = w_tmp >> 24
+ __ASM_SIZE(pop,) %__ASM_REG(dx)
+ ret
+ENDPROC(__sw_hweight32)
+
+ENTRY(__sw_hweight64)
+#ifdef CONFIG_X86_64
+ pushq %rdx
+
+ movq %rdi, %rdx # w -> t
+ movabsq $0x5555555555555555, %rax
+ shrq %rdx # t >>= 1
+ andq %rdx, %rax # t &= 0x5555555555555555
+ movabsq $0x3333333333333333, %rdx
+ subq %rax, %rdi # w -= t
+
+ movq %rdi, %rax # w -> t
+ shrq $2, %rdi # w_tmp >>= 2
+ andq %rdx, %rax # t &= 0x3333333333333333
+ andq %rdi, %rdx # w_tmp &= 0x3333333333333333
+ addq %rdx, %rax # w = w_tmp + t
+
+ movq %rax, %rdx # w -> t
+ shrq $4, %rdx # t >>= 4
+ addq %rdx, %rax # w_tmp += t
+ movabsq $0x0f0f0f0f0f0f0f0f, %rdx
+ andq %rdx, %rax # w_tmp &= 0x0f0f0f0f0f0f0f0f
+ movabsq $0x0101010101010101, %rdx
+ imulq %rdx, %rax # w_tmp *= 0x0101010101010101
+ shrq $56, %rax # w = w_tmp >> 56
+
+ popq %rdx
+ ret
+#else /* CONFIG_X86_32 */
+ /* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */
+ pushl %ecx
+
+ call __sw_hweight32
+ movl %eax, %ecx # stash away result
+ movl %edx, %eax # second part of input
+ call __sw_hweight32
+ addl %ecx, %eax # result
+
+ popl %ecx
+ ret
+#endif
+ENDPROC(__sw_hweight64)
diff --git a/lib/Makefile b/lib/Makefile
index ff6a7a6c6395..07d06a8b9788 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -15,9 +15,6 @@ KCOV_INSTRUMENT_rbtree.o := n
KCOV_INSTRUMENT_list_debug.o := n
KCOV_INSTRUMENT_debugobjects.o := n
KCOV_INSTRUMENT_dynamic_debug.o := n
-# Kernel does not boot if we instrument this file as it uses custom calling
-# convention (see CONFIG_ARCH_HWEIGHT_CFLAGS).
-KCOV_INSTRUMENT_hweight.o := n
lib-y := ctype.o string.o vsprintf.o cmdline.o \
rbtree.o radix-tree.o dump_stack.o timerqueue.o\
@@ -74,8 +71,6 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
-GCOV_PROFILE_hweight.o := n
-CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
obj-$(CONFIG_BTREE) += btree.o
diff --git a/lib/hweight.c b/lib/hweight.c
index 9a5c1f221558..43273a7d83cf 100644
--- a/lib/hweight.c
+++ b/lib/hweight.c
@@ -9,6 +9,7 @@
* The Hamming Weight of a number is the total number of bits set in it.
*/
+#ifndef __HAVE_ARCH_SW_HWEIGHT
unsigned int __sw_hweight32(unsigned int w)
{
#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER
@@ -25,6 +26,7 @@ unsigned int __sw_hweight32(unsigned int w)
#endif
}
EXPORT_SYMBOL(__sw_hweight32);
+#endif
unsigned int __sw_hweight16(unsigned int w)
{
@@ -43,6 +45,7 @@ unsigned int __sw_hweight8(unsigned int w)
}
EXPORT_SYMBOL(__sw_hweight8);
+#ifndef __HAVE_ARCH_SW_HWEIGHT
unsigned long __sw_hweight64(__u64 w)
{
#if BITS_PER_LONG == 32
@@ -65,3 +68,4 @@ unsigned long __sw_hweight64(__u64 w)
#endif
}
EXPORT_SYMBOL(__sw_hweight64);
+#endif