aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-05-18 12:17:01 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-05-18 12:17:01 -0400
commitcb41838bbc4403f7270a94b93a9a0d9fc9c2e7ea (patch)
tree0f359975ccad4ac72e86b8edf1924c076e74bd89 /arch/x86
parent98f01720cbe3e2eb719682777049b6514e9db556 (diff)
parentc59bd5688299cddb71183e156e7a3c1409b90df2 (diff)
Merge branch 'core-hweight-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'core-hweight-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: x86, hweight: Use a 32-bit popcnt for __arch_hweight32() arch, hweight: Fix compilation errors x86: Add optimized popcnt variants bitops: Optimize hweight() by making use of compile-time evaluation
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig5
-rw-r--r--arch/x86/include/asm/alternative.h9
-rw-r--r--arch/x86/include/asm/arch_hweight.h61
-rw-r--r--arch/x86/include/asm/bitops.h4
4 files changed, 75 insertions, 4 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7b7838874552..a2d3a5fbeeda 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -237,6 +237,11 @@ config X86_32_LAZY_GS
237 def_bool y 237 def_bool y
238 depends on X86_32 && !CC_STACKPROTECTOR 238 depends on X86_32 && !CC_STACKPROTECTOR
239 239
240config ARCH_HWEIGHT_CFLAGS
241 string
242 default "-fcall-saved-ecx -fcall-saved-edx" if X86_32
243 default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64
244
240config KTIME_SCALAR 245config KTIME_SCALAR
241 def_bool X86_32 246 def_bool X86_32
242source "init/Kconfig" 247source "init/Kconfig"
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 92a9033c14d1..03b6bb5394a0 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -42,9 +42,6 @@
42#define LOCK_PREFIX "" 42#define LOCK_PREFIX ""
43#endif 43#endif
44 44
45/* This must be included *after* the definition of LOCK_PREFIX */
46#include <asm/cpufeature.h>
47
48struct alt_instr { 45struct alt_instr {
49 u8 *instr; /* original instruction */ 46 u8 *instr; /* original instruction */
50 u8 *replacement; 47 u8 *replacement;
@@ -99,6 +96,12 @@ static inline int alternatives_text_reserved(void *start, void *end)
99 ".previous" 96 ".previous"
100 97
101/* 98/*
99 * This must be included *after* the definition of ALTERNATIVE due to
100 * <asm/arch_hweight.h>
101 */
102#include <asm/cpufeature.h>
103
104/*
102 * Alternative instructions for different CPU types or capabilities. 105 * Alternative instructions for different CPU types or capabilities.
103 * 106 *
104 * This allows to use optimized instructions even on generic binary 107 * This allows to use optimized instructions even on generic binary
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
new file mode 100644
index 000000000000..9686c3d9ff73
--- /dev/null
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -0,0 +1,61 @@
1#ifndef _ASM_X86_HWEIGHT_H
2#define _ASM_X86_HWEIGHT_H
3
4#ifdef CONFIG_64BIT
5/* popcnt %edi, %eax -- redundant REX prefix for alignment */
6#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7"
7/* popcnt %rdi, %rax */
8#define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7"
9#define REG_IN "D"
10#define REG_OUT "a"
11#else
12/* popcnt %eax, %eax */
13#define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc0"
14#define REG_IN "a"
15#define REG_OUT "a"
16#endif
17
18/*
19 * __sw_hweightXX are called from within the alternatives below
20 * and callee-clobbered registers need to be taken care of. See
21 * ARCH_HWEIGHT_CFLAGS in <arch/x86/Kconfig> for the respective
22 * compiler switches.
23 */
24static inline unsigned int __arch_hweight32(unsigned int w)
25{
26 unsigned int res = 0;
27
28 asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT)
29 : "="REG_OUT (res)
30 : REG_IN (w));
31
32 return res;
33}
34
35static inline unsigned int __arch_hweight16(unsigned int w)
36{
37 return __arch_hweight32(w & 0xffff);
38}
39
40static inline unsigned int __arch_hweight8(unsigned int w)
41{
42 return __arch_hweight32(w & 0xff);
43}
44
45static inline unsigned long __arch_hweight64(__u64 w)
46{
47 unsigned long res = 0;
48
49#ifdef CONFIG_X86_32
50 return __arch_hweight32((u32)w) +
51 __arch_hweight32((u32)(w >> 32));
52#else
53 asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT)
54 : "="REG_OUT (res)
55 : REG_IN (w));
56#endif /* CONFIG_X86_32 */
57
58 return res;
59}
60
61#endif
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 02b47a603fc8..545776efeb16 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -444,7 +444,9 @@ static inline int fls(int x)
444 444
445#define ARCH_HAS_FAST_MULTIPLIER 1 445#define ARCH_HAS_FAST_MULTIPLIER 1
446 446
447#include <asm-generic/bitops/hweight.h> 447#include <asm/arch_hweight.h>
448
449#include <asm-generic/bitops/const_hweight.h>
448 450
449#endif /* __KERNEL__ */ 451#endif /* __KERNEL__ */
450 452