summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBorislav Petkov <bp@suse.de>2016-05-30 06:56:27 -0400
committerIngo Molnar <mingo@kernel.org>2016-06-08 09:01:02 -0400
commitf5967101e9de12addcda4510dfbac66d7c5779c3 (patch)
tree2f01f9e5eb430728ebecd5abff9851fafb5c8c62
parent08dd8cd06ed95625b9e2fac43c78fcb45b7eaf94 (diff)
x86/hweight: Get rid of the special calling convention
People complained about ARCH_HWEIGHT_CFLAGS and how it throws a wrench into kcov, lto, etc, experimentations. Add asm versions for __sw_hweight{32,64}() and do explicit saving and restoring of clobbered registers. This gets rid of the special calling convention. We get to call those functions on !X86_FEATURE_POPCNT CPUs. We still need to hardcode POPCNT and register operands as some old gas versions which we support, do not know about POPCNT. Btw, remove redundant REX prefix from 32-bit POPCNT because alternatives can do padding now. Suggested-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Borislav Petkov <bp@suse.de> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/1464605787-20603-1-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/Kconfig5
-rw-r--r--arch/x86/include/asm/arch_hweight.h24
-rw-r--r--arch/x86/kernel/i386_ksyms_32.c2
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c3
-rw-r--r--arch/x86/lib/Makefile2
-rw-r--r--arch/x86/lib/hweight.S77
-rw-r--r--lib/Makefile5
-rw-r--r--lib/hweight.c4
8 files changed, 97 insertions, 25 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0a7b885964ba..729d41d9ced3 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -294,11 +294,6 @@ config X86_32_LAZY_GS
294 def_bool y 294 def_bool y
295 depends on X86_32 && !CC_STACKPROTECTOR 295 depends on X86_32 && !CC_STACKPROTECTOR
296 296
297config ARCH_HWEIGHT_CFLAGS
298 string
299 default "-fcall-saved-ecx -fcall-saved-edx" if X86_32
300 default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64
301
302config ARCH_SUPPORTS_UPROBES 297config ARCH_SUPPORTS_UPROBES
303 def_bool y 298 def_bool y
304 299
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index 02e799fa43d1..e7cd63175de4 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -4,8 +4,8 @@
4#include <asm/cpufeatures.h> 4#include <asm/cpufeatures.h>
5 5
6#ifdef CONFIG_64BIT 6#ifdef CONFIG_64BIT
7/* popcnt %edi, %eax -- redundant REX prefix for alignment */ 7/* popcnt %edi, %eax */
8#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7" 8#define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc7"
9/* popcnt %rdi, %rax */ 9/* popcnt %rdi, %rax */
10#define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7" 10#define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7"
11#define REG_IN "D" 11#define REG_IN "D"
@@ -17,19 +17,15 @@
17#define REG_OUT "a" 17#define REG_OUT "a"
18#endif 18#endif
19 19
20/* 20#define __HAVE_ARCH_SW_HWEIGHT
21 * __sw_hweightXX are called from within the alternatives below 21
22 * and callee-clobbered registers need to be taken care of. See
23 * ARCH_HWEIGHT_CFLAGS in <arch/x86/Kconfig> for the respective
24 * compiler switches.
25 */
26static __always_inline unsigned int __arch_hweight32(unsigned int w) 22static __always_inline unsigned int __arch_hweight32(unsigned int w)
27{ 23{
28 unsigned int res = 0; 24 unsigned int res;
29 25
30 asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT) 26 asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT)
31 : "="REG_OUT (res) 27 : "="REG_OUT (res)
32 : REG_IN (w)); 28 : REG_IN (w));
33 29
34 return res; 30 return res;
35} 31}
@@ -53,11 +49,11 @@ static inline unsigned long __arch_hweight64(__u64 w)
53#else 49#else
54static __always_inline unsigned long __arch_hweight64(__u64 w) 50static __always_inline unsigned long __arch_hweight64(__u64 w)
55{ 51{
56 unsigned long res = 0; 52 unsigned long res;
57 53
58 asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT) 54 asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT)
59 : "="REG_OUT (res) 55 : "="REG_OUT (res)
60 : REG_IN (w)); 56 : REG_IN (w));
61 57
62 return res; 58 return res;
63} 59}
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index 64341aa485ae..d40ee8a38fed 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -42,3 +42,5 @@ EXPORT_SYMBOL(empty_zero_page);
42EXPORT_SYMBOL(___preempt_schedule); 42EXPORT_SYMBOL(___preempt_schedule);
43EXPORT_SYMBOL(___preempt_schedule_notrace); 43EXPORT_SYMBOL(___preempt_schedule_notrace);
44#endif 44#endif
45
46EXPORT_SYMBOL(__sw_hweight32);
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index cd05942bc918..f1aebfb49c36 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -44,6 +44,9 @@ EXPORT_SYMBOL(clear_page);
44 44
45EXPORT_SYMBOL(csum_partial); 45EXPORT_SYMBOL(csum_partial);
46 46
47EXPORT_SYMBOL(__sw_hweight32);
48EXPORT_SYMBOL(__sw_hweight64);
49
47/* 50/*
48 * Export string functions. We normally rely on gcc builtin for most of these, 51 * Export string functions. We normally rely on gcc builtin for most of these,
49 * but gcc sometimes decides not to inline them. 52 * but gcc sometimes decides not to inline them.
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 72a576752a7e..ec969cc3eb20 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -25,7 +25,7 @@ lib-y += memcpy_$(BITS).o
25lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o 25lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
26lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o 26lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
27 27
28obj-y += msr.o msr-reg.o msr-reg-export.o 28obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
29 29
30ifeq ($(CONFIG_X86_32),y) 30ifeq ($(CONFIG_X86_32),y)
31 obj-y += atomic64_32.o 31 obj-y += atomic64_32.o
diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S
new file mode 100644
index 000000000000..02de3d74d2c5
--- /dev/null
+++ b/arch/x86/lib/hweight.S
@@ -0,0 +1,77 @@
1#include <linux/linkage.h>
2
3#include <asm/asm.h>
4
5/*
6 * unsigned int __sw_hweight32(unsigned int w)
7 * %rdi: w
8 */
9ENTRY(__sw_hweight32)
10
11#ifdef CONFIG_X86_64
12 movl %edi, %eax # w
13#endif
14 __ASM_SIZE(push,) %__ASM_REG(dx)
15 movl %eax, %edx # w -> t
16 shrl %edx # t >>= 1
17 andl $0x55555555, %edx # t &= 0x55555555
18 subl %edx, %eax # w -= t
19
20 movl %eax, %edx # w -> t
21 shrl $2, %eax # w_tmp >>= 2
22 andl $0x33333333, %edx # t &= 0x33333333
23 andl $0x33333333, %eax # w_tmp &= 0x33333333
24 addl %edx, %eax # w = w_tmp + t
25
26 movl %eax, %edx # w -> t
27 shrl $4, %edx # t >>= 4
28 addl %edx, %eax # w_tmp += t
29 andl $0x0f0f0f0f, %eax # w_tmp &= 0x0f0f0f0f
30 imull $0x01010101, %eax, %eax # w_tmp *= 0x01010101
31 shrl $24, %eax # w = w_tmp >> 24
32 __ASM_SIZE(pop,) %__ASM_REG(dx)
33 ret
34ENDPROC(__sw_hweight32)
35
36ENTRY(__sw_hweight64)
37#ifdef CONFIG_X86_64
38 pushq %rdx
39
40 movq %rdi, %rdx # w -> t
41 movabsq $0x5555555555555555, %rax
42 shrq %rdx # t >>= 1
43 andq %rdx, %rax # t &= 0x5555555555555555
44 movabsq $0x3333333333333333, %rdx
45 subq %rax, %rdi # w -= t
46
47 movq %rdi, %rax # w -> t
48 shrq $2, %rdi # w_tmp >>= 2
49 andq %rdx, %rax # t &= 0x3333333333333333
50 andq %rdi, %rdx # w_tmp &= 0x3333333333333333
51 addq %rdx, %rax # w = w_tmp + t
52
53 movq %rax, %rdx # w -> t
54 shrq $4, %rdx # t >>= 4
55 addq %rdx, %rax # w_tmp += t
56 movabsq $0x0f0f0f0f0f0f0f0f, %rdx
57 andq %rdx, %rax # w_tmp &= 0x0f0f0f0f0f0f0f0f
58 movabsq $0x0101010101010101, %rdx
59 imulq %rdx, %rax # w_tmp *= 0x0101010101010101
60 shrq $56, %rax # w = w_tmp >> 56
61
62 popq %rdx
63 ret
64#else /* CONFIG_X86_32 */
65 /* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */
66 pushl %ecx
67
68 call __sw_hweight32
69 movl %eax, %ecx # stash away result
70 movl %edx, %eax # second part of input
71 call __sw_hweight32
72 addl %ecx, %eax # result
73
74 popl %ecx
75 ret
76#endif
77ENDPROC(__sw_hweight64)
diff --git a/lib/Makefile b/lib/Makefile
index ff6a7a6c6395..07d06a8b9788 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -15,9 +15,6 @@ KCOV_INSTRUMENT_rbtree.o := n
15KCOV_INSTRUMENT_list_debug.o := n 15KCOV_INSTRUMENT_list_debug.o := n
16KCOV_INSTRUMENT_debugobjects.o := n 16KCOV_INSTRUMENT_debugobjects.o := n
17KCOV_INSTRUMENT_dynamic_debug.o := n 17KCOV_INSTRUMENT_dynamic_debug.o := n
18# Kernel does not boot if we instrument this file as it uses custom calling
19# convention (see CONFIG_ARCH_HWEIGHT_CFLAGS).
20KCOV_INSTRUMENT_hweight.o := n
21 18
22lib-y := ctype.o string.o vsprintf.o cmdline.o \ 19lib-y := ctype.o string.o vsprintf.o cmdline.o \
23 rbtree.o radix-tree.o dump_stack.o timerqueue.o\ 20 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
@@ -74,8 +71,6 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
74obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o 71obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
75obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o 72obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
76 73
77GCOV_PROFILE_hweight.o := n
78CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
79obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o 74obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
80 75
81obj-$(CONFIG_BTREE) += btree.o 76obj-$(CONFIG_BTREE) += btree.o
diff --git a/lib/hweight.c b/lib/hweight.c
index 9a5c1f221558..43273a7d83cf 100644
--- a/lib/hweight.c
+++ b/lib/hweight.c
@@ -9,6 +9,7 @@
9 * The Hamming Weight of a number is the total number of bits set in it. 9 * The Hamming Weight of a number is the total number of bits set in it.
10 */ 10 */
11 11
12#ifndef __HAVE_ARCH_SW_HWEIGHT
12unsigned int __sw_hweight32(unsigned int w) 13unsigned int __sw_hweight32(unsigned int w)
13{ 14{
14#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER 15#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER
@@ -25,6 +26,7 @@ unsigned int __sw_hweight32(unsigned int w)
25#endif 26#endif
26} 27}
27EXPORT_SYMBOL(__sw_hweight32); 28EXPORT_SYMBOL(__sw_hweight32);
29#endif
28 30
29unsigned int __sw_hweight16(unsigned int w) 31unsigned int __sw_hweight16(unsigned int w)
30{ 32{
@@ -43,6 +45,7 @@ unsigned int __sw_hweight8(unsigned int w)
43} 45}
44EXPORT_SYMBOL(__sw_hweight8); 46EXPORT_SYMBOL(__sw_hweight8);
45 47
48#ifndef __HAVE_ARCH_SW_HWEIGHT
46unsigned long __sw_hweight64(__u64 w) 49unsigned long __sw_hweight64(__u64 w)
47{ 50{
48#if BITS_PER_LONG == 32 51#if BITS_PER_LONG == 32
@@ -65,3 +68,4 @@ unsigned long __sw_hweight64(__u64 w)
65#endif 68#endif
66} 69}
67EXPORT_SYMBOL(__sw_hweight64); 70EXPORT_SYMBOL(__sw_hweight64);
71#endif