diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-18 12:17:01 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-18 12:17:01 -0400 |
commit | cb41838bbc4403f7270a94b93a9a0d9fc9c2e7ea (patch) | |
tree | 0f359975ccad4ac72e86b8edf1924c076e74bd89 /arch | |
parent | 98f01720cbe3e2eb719682777049b6514e9db556 (diff) | |
parent | c59bd5688299cddb71183e156e7a3c1409b90df2 (diff) |
Merge branch 'core-hweight-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'core-hweight-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
x86, hweight: Use a 32-bit popcnt for __arch_hweight32()
arch, hweight: Fix compilation errors
x86: Add optimized popcnt variants
bitops: Optimize hweight() by making use of compile-time evaluation
Diffstat (limited to 'arch')
-rw-r--r-- | arch/alpha/include/asm/bitops.h | 18 | ||||
-rw-r--r-- | arch/ia64/include/asm/bitops.h | 11 | ||||
-rw-r--r-- | arch/sparc/include/asm/bitops_64.h | 11 | ||||
-rw-r--r-- | arch/x86/Kconfig | 5 | ||||
-rw-r--r-- | arch/x86/include/asm/alternative.h | 9 | ||||
-rw-r--r-- | arch/x86/include/asm/arch_hweight.h | 61 | ||||
-rw-r--r-- | arch/x86/include/asm/bitops.h | 4 |
7 files changed, 97 insertions, 22 deletions
diff --git a/arch/alpha/include/asm/bitops.h b/arch/alpha/include/asm/bitops.h index 15f3ae25c511..296da1d5ed57 100644 --- a/arch/alpha/include/asm/bitops.h +++ b/arch/alpha/include/asm/bitops.h | |||
@@ -405,29 +405,31 @@ static inline int fls(int x) | |||
405 | 405 | ||
406 | #if defined(CONFIG_ALPHA_EV6) && defined(CONFIG_ALPHA_EV67) | 406 | #if defined(CONFIG_ALPHA_EV6) && defined(CONFIG_ALPHA_EV67) |
407 | /* Whee. EV67 can calculate it directly. */ | 407 | /* Whee. EV67 can calculate it directly. */ |
408 | static inline unsigned long hweight64(unsigned long w) | 408 | static inline unsigned long __arch_hweight64(unsigned long w) |
409 | { | 409 | { |
410 | return __kernel_ctpop(w); | 410 | return __kernel_ctpop(w); |
411 | } | 411 | } |
412 | 412 | ||
413 | static inline unsigned int hweight32(unsigned int w) | 413 | static inline unsigned int __arch_weight32(unsigned int w) |
414 | { | 414 | { |
415 | return hweight64(w); | 415 | return __arch_hweight64(w); |
416 | } | 416 | } |
417 | 417 | ||
418 | static inline unsigned int hweight16(unsigned int w) | 418 | static inline unsigned int __arch_hweight16(unsigned int w) |
419 | { | 419 | { |
420 | return hweight64(w & 0xffff); | 420 | return __arch_hweight64(w & 0xffff); |
421 | } | 421 | } |
422 | 422 | ||
423 | static inline unsigned int hweight8(unsigned int w) | 423 | static inline unsigned int __arch_hweight8(unsigned int w) |
424 | { | 424 | { |
425 | return hweight64(w & 0xff); | 425 | return __arch_hweight64(w & 0xff); |
426 | } | 426 | } |
427 | #else | 427 | #else |
428 | #include <asm-generic/bitops/hweight.h> | 428 | #include <asm-generic/bitops/arch_hweight.h> |
429 | #endif | 429 | #endif |
430 | 430 | ||
431 | #include <asm-generic/bitops/const_hweight.h> | ||
432 | |||
431 | #endif /* __KERNEL__ */ | 433 | #endif /* __KERNEL__ */ |
432 | 434 | ||
433 | #include <asm-generic/bitops/find.h> | 435 | #include <asm-generic/bitops/find.h> |
diff --git a/arch/ia64/include/asm/bitops.h b/arch/ia64/include/asm/bitops.h index 6ebc229a1c51..9da3df6f1a52 100644 --- a/arch/ia64/include/asm/bitops.h +++ b/arch/ia64/include/asm/bitops.h | |||
@@ -437,17 +437,18 @@ __fls (unsigned long x) | |||
437 | * hweightN: returns the hamming weight (i.e. the number | 437 | * hweightN: returns the hamming weight (i.e. the number |
438 | * of bits set) of a N-bit word | 438 | * of bits set) of a N-bit word |
439 | */ | 439 | */ |
440 | static __inline__ unsigned long | 440 | static __inline__ unsigned long __arch_hweight64(unsigned long x) |
441 | hweight64 (unsigned long x) | ||
442 | { | 441 | { |
443 | unsigned long result; | 442 | unsigned long result; |
444 | result = ia64_popcnt(x); | 443 | result = ia64_popcnt(x); |
445 | return result; | 444 | return result; |
446 | } | 445 | } |
447 | 446 | ||
448 | #define hweight32(x) (unsigned int) hweight64((x) & 0xfffffffful) | 447 | #define __arch_hweight32(x) ((unsigned int) __arch_hweight64((x) & 0xfffffffful)) |
449 | #define hweight16(x) (unsigned int) hweight64((x) & 0xfffful) | 448 | #define __arch_hweight16(x) ((unsigned int) __arch_hweight64((x) & 0xfffful)) |
450 | #define hweight8(x) (unsigned int) hweight64((x) & 0xfful) | 449 | #define __arch_hweight8(x) ((unsigned int) __arch_hweight64((x) & 0xfful)) |
450 | |||
451 | #include <asm-generic/bitops/const_hweight.h> | ||
451 | 452 | ||
452 | #endif /* __KERNEL__ */ | 453 | #endif /* __KERNEL__ */ |
453 | 454 | ||
diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h index e72ac9cdfb98..766121a67a24 100644 --- a/arch/sparc/include/asm/bitops_64.h +++ b/arch/sparc/include/asm/bitops_64.h | |||
@@ -44,7 +44,7 @@ extern void change_bit(unsigned long nr, volatile unsigned long *addr); | |||
44 | 44 | ||
45 | #ifdef ULTRA_HAS_POPULATION_COUNT | 45 | #ifdef ULTRA_HAS_POPULATION_COUNT |
46 | 46 | ||
47 | static inline unsigned int hweight64(unsigned long w) | 47 | static inline unsigned int __arch_hweight64(unsigned long w) |
48 | { | 48 | { |
49 | unsigned int res; | 49 | unsigned int res; |
50 | 50 | ||
@@ -52,7 +52,7 @@ static inline unsigned int hweight64(unsigned long w) | |||
52 | return res; | 52 | return res; |
53 | } | 53 | } |
54 | 54 | ||
55 | static inline unsigned int hweight32(unsigned int w) | 55 | static inline unsigned int __arch_hweight32(unsigned int w) |
56 | { | 56 | { |
57 | unsigned int res; | 57 | unsigned int res; |
58 | 58 | ||
@@ -60,7 +60,7 @@ static inline unsigned int hweight32(unsigned int w) | |||
60 | return res; | 60 | return res; |
61 | } | 61 | } |
62 | 62 | ||
63 | static inline unsigned int hweight16(unsigned int w) | 63 | static inline unsigned int __arch_hweight16(unsigned int w) |
64 | { | 64 | { |
65 | unsigned int res; | 65 | unsigned int res; |
66 | 66 | ||
@@ -68,7 +68,7 @@ static inline unsigned int hweight16(unsigned int w) | |||
68 | return res; | 68 | return res; |
69 | } | 69 | } |
70 | 70 | ||
71 | static inline unsigned int hweight8(unsigned int w) | 71 | static inline unsigned int __arch_hweight8(unsigned int w) |
72 | { | 72 | { |
73 | unsigned int res; | 73 | unsigned int res; |
74 | 74 | ||
@@ -78,9 +78,10 @@ static inline unsigned int hweight8(unsigned int w) | |||
78 | 78 | ||
79 | #else | 79 | #else |
80 | 80 | ||
81 | #include <asm-generic/bitops/hweight.h> | 81 | #include <asm-generic/bitops/arch_hweight.h> |
82 | 82 | ||
83 | #endif | 83 | #endif |
84 | #include <asm-generic/bitops/const_hweight.h> | ||
84 | #include <asm-generic/bitops/lock.h> | 85 | #include <asm-generic/bitops/lock.h> |
85 | #endif /* __KERNEL__ */ | 86 | #endif /* __KERNEL__ */ |
86 | 87 | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7b7838874552..a2d3a5fbeeda 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -237,6 +237,11 @@ config X86_32_LAZY_GS | |||
237 | def_bool y | 237 | def_bool y |
238 | depends on X86_32 && !CC_STACKPROTECTOR | 238 | depends on X86_32 && !CC_STACKPROTECTOR |
239 | 239 | ||
240 | config ARCH_HWEIGHT_CFLAGS | ||
241 | string | ||
242 | default "-fcall-saved-ecx -fcall-saved-edx" if X86_32 | ||
243 | default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64 | ||
244 | |||
240 | config KTIME_SCALAR | 245 | config KTIME_SCALAR |
241 | def_bool X86_32 | 246 | def_bool X86_32 |
242 | source "init/Kconfig" | 247 | source "init/Kconfig" |
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 92a9033c14d1..03b6bb5394a0 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h | |||
@@ -42,9 +42,6 @@ | |||
42 | #define LOCK_PREFIX "" | 42 | #define LOCK_PREFIX "" |
43 | #endif | 43 | #endif |
44 | 44 | ||
45 | /* This must be included *after* the definition of LOCK_PREFIX */ | ||
46 | #include <asm/cpufeature.h> | ||
47 | |||
48 | struct alt_instr { | 45 | struct alt_instr { |
49 | u8 *instr; /* original instruction */ | 46 | u8 *instr; /* original instruction */ |
50 | u8 *replacement; | 47 | u8 *replacement; |
@@ -99,6 +96,12 @@ static inline int alternatives_text_reserved(void *start, void *end) | |||
99 | ".previous" | 96 | ".previous" |
100 | 97 | ||
101 | /* | 98 | /* |
99 | * This must be included *after* the definition of ALTERNATIVE due to | ||
100 | * <asm/arch_hweight.h> | ||
101 | */ | ||
102 | #include <asm/cpufeature.h> | ||
103 | |||
104 | /* | ||
102 | * Alternative instructions for different CPU types or capabilities. | 105 | * Alternative instructions for different CPU types or capabilities. |
103 | * | 106 | * |
104 | * This allows to use optimized instructions even on generic binary | 107 | * This allows to use optimized instructions even on generic binary |
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h new file mode 100644 index 000000000000..9686c3d9ff73 --- /dev/null +++ b/arch/x86/include/asm/arch_hweight.h | |||
@@ -0,0 +1,61 @@ | |||
1 | #ifndef _ASM_X86_HWEIGHT_H | ||
2 | #define _ASM_X86_HWEIGHT_H | ||
3 | |||
4 | #ifdef CONFIG_64BIT | ||
5 | /* popcnt %edi, %eax -- redundant REX prefix for alignment */ | ||
6 | #define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7" | ||
7 | /* popcnt %rdi, %rax */ | ||
8 | #define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7" | ||
9 | #define REG_IN "D" | ||
10 | #define REG_OUT "a" | ||
11 | #else | ||
12 | /* popcnt %eax, %eax */ | ||
13 | #define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc0" | ||
14 | #define REG_IN "a" | ||
15 | #define REG_OUT "a" | ||
16 | #endif | ||
17 | |||
18 | /* | ||
19 | * __sw_hweightXX are called from within the alternatives below | ||
20 | * and callee-clobbered registers need to be taken care of. See | ||
21 | * ARCH_HWEIGHT_CFLAGS in <arch/x86/Kconfig> for the respective | ||
22 | * compiler switches. | ||
23 | */ | ||
24 | static inline unsigned int __arch_hweight32(unsigned int w) | ||
25 | { | ||
26 | unsigned int res = 0; | ||
27 | |||
28 | asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT) | ||
29 | : "="REG_OUT (res) | ||
30 | : REG_IN (w)); | ||
31 | |||
32 | return res; | ||
33 | } | ||
34 | |||
35 | static inline unsigned int __arch_hweight16(unsigned int w) | ||
36 | { | ||
37 | return __arch_hweight32(w & 0xffff); | ||
38 | } | ||
39 | |||
40 | static inline unsigned int __arch_hweight8(unsigned int w) | ||
41 | { | ||
42 | return __arch_hweight32(w & 0xff); | ||
43 | } | ||
44 | |||
45 | static inline unsigned long __arch_hweight64(__u64 w) | ||
46 | { | ||
47 | unsigned long res = 0; | ||
48 | |||
49 | #ifdef CONFIG_X86_32 | ||
50 | return __arch_hweight32((u32)w) + | ||
51 | __arch_hweight32((u32)(w >> 32)); | ||
52 | #else | ||
53 | asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT) | ||
54 | : "="REG_OUT (res) | ||
55 | : REG_IN (w)); | ||
56 | #endif /* CONFIG_X86_32 */ | ||
57 | |||
58 | return res; | ||
59 | } | ||
60 | |||
61 | #endif | ||
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 02b47a603fc8..545776efeb16 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h | |||
@@ -444,7 +444,9 @@ static inline int fls(int x) | |||
444 | 444 | ||
445 | #define ARCH_HAS_FAST_MULTIPLIER 1 | 445 | #define ARCH_HAS_FAST_MULTIPLIER 1 |
446 | 446 | ||
447 | #include <asm-generic/bitops/hweight.h> | 447 | #include <asm/arch_hweight.h> |
448 | |||
449 | #include <asm-generic/bitops/const_hweight.h> | ||
448 | 450 | ||
449 | #endif /* __KERNEL__ */ | 451 | #endif /* __KERNEL__ */ |
450 | 452 | ||