aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-05-18 12:17:01 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-05-18 12:17:01 -0400
commitcb41838bbc4403f7270a94b93a9a0d9fc9c2e7ea (patch)
tree0f359975ccad4ac72e86b8edf1924c076e74bd89 /arch
parent98f01720cbe3e2eb719682777049b6514e9db556 (diff)
parentc59bd5688299cddb71183e156e7a3c1409b90df2 (diff)
Merge branch 'core-hweight-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'core-hweight-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: x86, hweight: Use a 32-bit popcnt for __arch_hweight32() arch, hweight: Fix compilation errors x86: Add optimized popcnt variants bitops: Optimize hweight() by making use of compile-time evaluation
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/include/asm/bitops.h18
-rw-r--r--arch/ia64/include/asm/bitops.h11
-rw-r--r--arch/sparc/include/asm/bitops_64.h11
-rw-r--r--arch/x86/Kconfig5
-rw-r--r--arch/x86/include/asm/alternative.h9
-rw-r--r--arch/x86/include/asm/arch_hweight.h61
-rw-r--r--arch/x86/include/asm/bitops.h4
7 files changed, 97 insertions, 22 deletions
diff --git a/arch/alpha/include/asm/bitops.h b/arch/alpha/include/asm/bitops.h
index 15f3ae25c511..296da1d5ed57 100644
--- a/arch/alpha/include/asm/bitops.h
+++ b/arch/alpha/include/asm/bitops.h
@@ -405,29 +405,31 @@ static inline int fls(int x)
405 405
406#if defined(CONFIG_ALPHA_EV6) && defined(CONFIG_ALPHA_EV67) 406#if defined(CONFIG_ALPHA_EV6) && defined(CONFIG_ALPHA_EV67)
407/* Whee. EV67 can calculate it directly. */ 407/* Whee. EV67 can calculate it directly. */
408static inline unsigned long hweight64(unsigned long w) 408static inline unsigned long __arch_hweight64(unsigned long w)
409{ 409{
410 return __kernel_ctpop(w); 410 return __kernel_ctpop(w);
411} 411}
412 412
413static inline unsigned int hweight32(unsigned int w) 413static inline unsigned int __arch_weight32(unsigned int w)
414{ 414{
415 return hweight64(w); 415 return __arch_hweight64(w);
416} 416}
417 417
418static inline unsigned int hweight16(unsigned int w) 418static inline unsigned int __arch_hweight16(unsigned int w)
419{ 419{
420 return hweight64(w & 0xffff); 420 return __arch_hweight64(w & 0xffff);
421} 421}
422 422
423static inline unsigned int hweight8(unsigned int w) 423static inline unsigned int __arch_hweight8(unsigned int w)
424{ 424{
425 return hweight64(w & 0xff); 425 return __arch_hweight64(w & 0xff);
426} 426}
427#else 427#else
428#include <asm-generic/bitops/hweight.h> 428#include <asm-generic/bitops/arch_hweight.h>
429#endif 429#endif
430 430
431#include <asm-generic/bitops/const_hweight.h>
432
431#endif /* __KERNEL__ */ 433#endif /* __KERNEL__ */
432 434
433#include <asm-generic/bitops/find.h> 435#include <asm-generic/bitops/find.h>
diff --git a/arch/ia64/include/asm/bitops.h b/arch/ia64/include/asm/bitops.h
index 6ebc229a1c51..9da3df6f1a52 100644
--- a/arch/ia64/include/asm/bitops.h
+++ b/arch/ia64/include/asm/bitops.h
@@ -437,17 +437,18 @@ __fls (unsigned long x)
437 * hweightN: returns the hamming weight (i.e. the number 437 * hweightN: returns the hamming weight (i.e. the number
438 * of bits set) of a N-bit word 438 * of bits set) of a N-bit word
439 */ 439 */
440static __inline__ unsigned long 440static __inline__ unsigned long __arch_hweight64(unsigned long x)
441hweight64 (unsigned long x)
442{ 441{
443 unsigned long result; 442 unsigned long result;
444 result = ia64_popcnt(x); 443 result = ia64_popcnt(x);
445 return result; 444 return result;
446} 445}
447 446
448#define hweight32(x) (unsigned int) hweight64((x) & 0xfffffffful) 447#define __arch_hweight32(x) ((unsigned int) __arch_hweight64((x) & 0xfffffffful))
449#define hweight16(x) (unsigned int) hweight64((x) & 0xfffful) 448#define __arch_hweight16(x) ((unsigned int) __arch_hweight64((x) & 0xfffful))
450#define hweight8(x) (unsigned int) hweight64((x) & 0xfful) 449#define __arch_hweight8(x) ((unsigned int) __arch_hweight64((x) & 0xfful))
450
451#include <asm-generic/bitops/const_hweight.h>
451 452
452#endif /* __KERNEL__ */ 453#endif /* __KERNEL__ */
453 454
diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h
index e72ac9cdfb98..766121a67a24 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -44,7 +44,7 @@ extern void change_bit(unsigned long nr, volatile unsigned long *addr);
44 44
45#ifdef ULTRA_HAS_POPULATION_COUNT 45#ifdef ULTRA_HAS_POPULATION_COUNT
46 46
47static inline unsigned int hweight64(unsigned long w) 47static inline unsigned int __arch_hweight64(unsigned long w)
48{ 48{
49 unsigned int res; 49 unsigned int res;
50 50
@@ -52,7 +52,7 @@ static inline unsigned int hweight64(unsigned long w)
52 return res; 52 return res;
53} 53}
54 54
55static inline unsigned int hweight32(unsigned int w) 55static inline unsigned int __arch_hweight32(unsigned int w)
56{ 56{
57 unsigned int res; 57 unsigned int res;
58 58
@@ -60,7 +60,7 @@ static inline unsigned int hweight32(unsigned int w)
60 return res; 60 return res;
61} 61}
62 62
63static inline unsigned int hweight16(unsigned int w) 63static inline unsigned int __arch_hweight16(unsigned int w)
64{ 64{
65 unsigned int res; 65 unsigned int res;
66 66
@@ -68,7 +68,7 @@ static inline unsigned int hweight16(unsigned int w)
68 return res; 68 return res;
69} 69}
70 70
71static inline unsigned int hweight8(unsigned int w) 71static inline unsigned int __arch_hweight8(unsigned int w)
72{ 72{
73 unsigned int res; 73 unsigned int res;
74 74
@@ -78,9 +78,10 @@ static inline unsigned int hweight8(unsigned int w)
78 78
79#else 79#else
80 80
81#include <asm-generic/bitops/hweight.h> 81#include <asm-generic/bitops/arch_hweight.h>
82 82
83#endif 83#endif
84#include <asm-generic/bitops/const_hweight.h>
84#include <asm-generic/bitops/lock.h> 85#include <asm-generic/bitops/lock.h>
85#endif /* __KERNEL__ */ 86#endif /* __KERNEL__ */
86 87
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7b7838874552..a2d3a5fbeeda 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -237,6 +237,11 @@ config X86_32_LAZY_GS
237 def_bool y 237 def_bool y
238 depends on X86_32 && !CC_STACKPROTECTOR 238 depends on X86_32 && !CC_STACKPROTECTOR
239 239
240config ARCH_HWEIGHT_CFLAGS
241 string
242 default "-fcall-saved-ecx -fcall-saved-edx" if X86_32
243 default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64
244
240config KTIME_SCALAR 245config KTIME_SCALAR
241 def_bool X86_32 246 def_bool X86_32
242source "init/Kconfig" 247source "init/Kconfig"
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 92a9033c14d1..03b6bb5394a0 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -42,9 +42,6 @@
42#define LOCK_PREFIX "" 42#define LOCK_PREFIX ""
43#endif 43#endif
44 44
45/* This must be included *after* the definition of LOCK_PREFIX */
46#include <asm/cpufeature.h>
47
48struct alt_instr { 45struct alt_instr {
49 u8 *instr; /* original instruction */ 46 u8 *instr; /* original instruction */
50 u8 *replacement; 47 u8 *replacement;
@@ -99,6 +96,12 @@ static inline int alternatives_text_reserved(void *start, void *end)
99 ".previous" 96 ".previous"
100 97
101/* 98/*
99 * This must be included *after* the definition of ALTERNATIVE due to
100 * <asm/arch_hweight.h>
101 */
102#include <asm/cpufeature.h>
103
104/*
102 * Alternative instructions for different CPU types or capabilities. 105 * Alternative instructions for different CPU types or capabilities.
103 * 106 *
104 * This allows to use optimized instructions even on generic binary 107 * This allows to use optimized instructions even on generic binary
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
new file mode 100644
index 000000000000..9686c3d9ff73
--- /dev/null
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -0,0 +1,61 @@
1#ifndef _ASM_X86_HWEIGHT_H
2#define _ASM_X86_HWEIGHT_H
3
4#ifdef CONFIG_64BIT
5/* popcnt %edi, %eax -- redundant REX prefix for alignment */
6#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7"
7/* popcnt %rdi, %rax */
8#define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7"
9#define REG_IN "D"
10#define REG_OUT "a"
11#else
12/* popcnt %eax, %eax */
13#define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc0"
14#define REG_IN "a"
15#define REG_OUT "a"
16#endif
17
18/*
19 * __sw_hweightXX are called from within the alternatives below
20 * and callee-clobbered registers need to be taken care of. See
21 * ARCH_HWEIGHT_CFLAGS in <arch/x86/Kconfig> for the respective
22 * compiler switches.
23 */
24static inline unsigned int __arch_hweight32(unsigned int w)
25{
26 unsigned int res = 0;
27
28 asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT)
29 : "="REG_OUT (res)
30 : REG_IN (w));
31
32 return res;
33}
34
35static inline unsigned int __arch_hweight16(unsigned int w)
36{
37 return __arch_hweight32(w & 0xffff);
38}
39
40static inline unsigned int __arch_hweight8(unsigned int w)
41{
42 return __arch_hweight32(w & 0xff);
43}
44
45static inline unsigned long __arch_hweight64(__u64 w)
46{
47 unsigned long res = 0;
48
49#ifdef CONFIG_X86_32
50 return __arch_hweight32((u32)w) +
51 __arch_hweight32((u32)(w >> 32));
52#else
53 asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT)
54 : "="REG_OUT (res)
55 : REG_IN (w));
56#endif /* CONFIG_X86_32 */
57
58 return res;
59}
60
61#endif
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 02b47a603fc8..545776efeb16 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -444,7 +444,9 @@ static inline int fls(int x)
444 444
445#define ARCH_HAS_FAST_MULTIPLIER 1 445#define ARCH_HAS_FAST_MULTIPLIER 1
446 446
447#include <asm-generic/bitops/hweight.h> 447#include <asm/arch_hweight.h>
448
449#include <asm-generic/bitops/const_hweight.h>
448 450
449#endif /* __KERNEL__ */ 451#endif /* __KERNEL__ */
450 452