diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2010-02-01 09:03:07 -0500 |
---|---|---|
committer | H. Peter Anvin <hpa@zytor.com> | 2010-04-06 18:52:11 -0400 |
commit | 1527bc8b928dd1399c3d3467dd47d9ede210978a (patch) | |
tree | fb391da915bdae9f933b8170ff61aa43c85ef9ae | |
parent | 0fdf86754f70e813845af4abaa805165ce57a0bb (diff) |
bitops: Optimize hweight() by making use of compile-time evaluation
Rename the extisting runtime hweight() implementations to
__arch_hweight(), rename the compile-time versions to __const_hweight()
and then have hweight() pick between them.
Suggested-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20100318111929.GB11152@aftab>
Acked-by: H. Peter Anvin <hpa@zytor.com>
LKML-Reference: <1265028224.24455.154.camel@laptop>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
-rw-r--r-- | arch/alpha/include/asm/bitops.h | 18 | ||||
-rw-r--r-- | arch/ia64/include/asm/bitops.h | 11 | ||||
-rw-r--r-- | arch/sparc/include/asm/bitops_64.h | 11 | ||||
-rw-r--r-- | include/asm-generic/bitops/arch_hweight.h | 11 | ||||
-rw-r--r-- | include/asm-generic/bitops/const_hweight.h | 42 | ||||
-rw-r--r-- | include/asm-generic/bitops/hweight.h | 8 | ||||
-rw-r--r-- | include/linux/bitops.h | 25 | ||||
-rw-r--r-- | lib/hweight.c | 19 |
8 files changed, 87 insertions, 58 deletions
diff --git a/arch/alpha/include/asm/bitops.h b/arch/alpha/include/asm/bitops.h index 15f3ae25c511..296da1d5ed57 100644 --- a/arch/alpha/include/asm/bitops.h +++ b/arch/alpha/include/asm/bitops.h | |||
@@ -405,29 +405,31 @@ static inline int fls(int x) | |||
405 | 405 | ||
406 | #if defined(CONFIG_ALPHA_EV6) && defined(CONFIG_ALPHA_EV67) | 406 | #if defined(CONFIG_ALPHA_EV6) && defined(CONFIG_ALPHA_EV67) |
407 | /* Whee. EV67 can calculate it directly. */ | 407 | /* Whee. EV67 can calculate it directly. */ |
408 | static inline unsigned long hweight64(unsigned long w) | 408 | static inline unsigned long __arch_hweight64(unsigned long w) |
409 | { | 409 | { |
410 | return __kernel_ctpop(w); | 410 | return __kernel_ctpop(w); |
411 | } | 411 | } |
412 | 412 | ||
413 | static inline unsigned int hweight32(unsigned int w) | 413 | static inline unsigned int __arch_weight32(unsigned int w) |
414 | { | 414 | { |
415 | return hweight64(w); | 415 | return __arch_hweight64(w); |
416 | } | 416 | } |
417 | 417 | ||
418 | static inline unsigned int hweight16(unsigned int w) | 418 | static inline unsigned int __arch_hweight16(unsigned int w) |
419 | { | 419 | { |
420 | return hweight64(w & 0xffff); | 420 | return __arch_hweight64(w & 0xffff); |
421 | } | 421 | } |
422 | 422 | ||
423 | static inline unsigned int hweight8(unsigned int w) | 423 | static inline unsigned int __arch_hweight8(unsigned int w) |
424 | { | 424 | { |
425 | return hweight64(w & 0xff); | 425 | return __arch_hweight64(w & 0xff); |
426 | } | 426 | } |
427 | #else | 427 | #else |
428 | #include <asm-generic/bitops/hweight.h> | 428 | #include <asm-generic/bitops/arch_hweight.h> |
429 | #endif | 429 | #endif |
430 | 430 | ||
431 | #include <asm-generic/bitops/const_hweight.h> | ||
432 | |||
431 | #endif /* __KERNEL__ */ | 433 | #endif /* __KERNEL__ */ |
432 | 434 | ||
433 | #include <asm-generic/bitops/find.h> | 435 | #include <asm-generic/bitops/find.h> |
diff --git a/arch/ia64/include/asm/bitops.h b/arch/ia64/include/asm/bitops.h index 6ebc229a1c51..9da3df6f1a52 100644 --- a/arch/ia64/include/asm/bitops.h +++ b/arch/ia64/include/asm/bitops.h | |||
@@ -437,17 +437,18 @@ __fls (unsigned long x) | |||
437 | * hweightN: returns the hamming weight (i.e. the number | 437 | * hweightN: returns the hamming weight (i.e. the number |
438 | * of bits set) of a N-bit word | 438 | * of bits set) of a N-bit word |
439 | */ | 439 | */ |
440 | static __inline__ unsigned long | 440 | static __inline__ unsigned long __arch_hweight64(unsigned long x) |
441 | hweight64 (unsigned long x) | ||
442 | { | 441 | { |
443 | unsigned long result; | 442 | unsigned long result; |
444 | result = ia64_popcnt(x); | 443 | result = ia64_popcnt(x); |
445 | return result; | 444 | return result; |
446 | } | 445 | } |
447 | 446 | ||
448 | #define hweight32(x) (unsigned int) hweight64((x) & 0xfffffffful) | 447 | #define __arch_hweight32(x) ((unsigned int) __arch_hweight64((x) & 0xfffffffful)) |
449 | #define hweight16(x) (unsigned int) hweight64((x) & 0xfffful) | 448 | #define __arch_hweight16(x) ((unsigned int) __arch_hweight64((x) & 0xfffful)) |
450 | #define hweight8(x) (unsigned int) hweight64((x) & 0xfful) | 449 | #define __arch_hweight8(x) ((unsigned int) __arch_hweight64((x) & 0xfful)) |
450 | |||
451 | #include <asm-generic/bitops/const_hweight.h> | ||
451 | 452 | ||
452 | #endif /* __KERNEL__ */ | 453 | #endif /* __KERNEL__ */ |
453 | 454 | ||
diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h index e72ac9cdfb98..766121a67a24 100644 --- a/arch/sparc/include/asm/bitops_64.h +++ b/arch/sparc/include/asm/bitops_64.h | |||
@@ -44,7 +44,7 @@ extern void change_bit(unsigned long nr, volatile unsigned long *addr); | |||
44 | 44 | ||
45 | #ifdef ULTRA_HAS_POPULATION_COUNT | 45 | #ifdef ULTRA_HAS_POPULATION_COUNT |
46 | 46 | ||
47 | static inline unsigned int hweight64(unsigned long w) | 47 | static inline unsigned int __arch_hweight64(unsigned long w) |
48 | { | 48 | { |
49 | unsigned int res; | 49 | unsigned int res; |
50 | 50 | ||
@@ -52,7 +52,7 @@ static inline unsigned int hweight64(unsigned long w) | |||
52 | return res; | 52 | return res; |
53 | } | 53 | } |
54 | 54 | ||
55 | static inline unsigned int hweight32(unsigned int w) | 55 | static inline unsigned int __arch_hweight32(unsigned int w) |
56 | { | 56 | { |
57 | unsigned int res; | 57 | unsigned int res; |
58 | 58 | ||
@@ -60,7 +60,7 @@ static inline unsigned int hweight32(unsigned int w) | |||
60 | return res; | 60 | return res; |
61 | } | 61 | } |
62 | 62 | ||
63 | static inline unsigned int hweight16(unsigned int w) | 63 | static inline unsigned int __arch_hweight16(unsigned int w) |
64 | { | 64 | { |
65 | unsigned int res; | 65 | unsigned int res; |
66 | 66 | ||
@@ -68,7 +68,7 @@ static inline unsigned int hweight16(unsigned int w) | |||
68 | return res; | 68 | return res; |
69 | } | 69 | } |
70 | 70 | ||
71 | static inline unsigned int hweight8(unsigned int w) | 71 | static inline unsigned int __arch_hweight8(unsigned int w) |
72 | { | 72 | { |
73 | unsigned int res; | 73 | unsigned int res; |
74 | 74 | ||
@@ -78,9 +78,10 @@ static inline unsigned int hweight8(unsigned int w) | |||
78 | 78 | ||
79 | #else | 79 | #else |
80 | 80 | ||
81 | #include <asm-generic/bitops/hweight.h> | 81 | #include <asm-generic/bitops/arch_hweight.h> |
82 | 82 | ||
83 | #endif | 83 | #endif |
84 | #include <asm-generic/bitops/const_hweight.h> | ||
84 | #include <asm-generic/bitops/lock.h> | 85 | #include <asm-generic/bitops/lock.h> |
85 | #endif /* __KERNEL__ */ | 86 | #endif /* __KERNEL__ */ |
86 | 87 | ||
diff --git a/include/asm-generic/bitops/arch_hweight.h b/include/asm-generic/bitops/arch_hweight.h new file mode 100644 index 000000000000..3a7be842cdce --- /dev/null +++ b/include/asm-generic/bitops/arch_hweight.h | |||
@@ -0,0 +1,11 @@ | |||
1 | #ifndef _ASM_GENERIC_BITOPS_ARCH_HWEIGHT_H_ | ||
2 | #define _ASM_GENERIC_BITOPS_ARCH_HWEIGHT_H_ | ||
3 | |||
4 | #include <asm/types.h> | ||
5 | |||
6 | extern unsigned int __arch_hweight32(unsigned int w); | ||
7 | extern unsigned int __arch_hweight16(unsigned int w); | ||
8 | extern unsigned int __arch_hweight8(unsigned int w); | ||
9 | extern unsigned long __arch_hweight64(__u64 w); | ||
10 | |||
11 | #endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */ | ||
diff --git a/include/asm-generic/bitops/const_hweight.h b/include/asm-generic/bitops/const_hweight.h new file mode 100644 index 000000000000..fa2a50b7ee66 --- /dev/null +++ b/include/asm-generic/bitops/const_hweight.h | |||
@@ -0,0 +1,42 @@ | |||
1 | #ifndef _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_ | ||
2 | #define _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_ | ||
3 | |||
4 | /* | ||
5 | * Compile time versions of __arch_hweightN() | ||
6 | */ | ||
7 | #define __const_hweight8(w) \ | ||
8 | ( (!!((w) & (1ULL << 0))) + \ | ||
9 | (!!((w) & (1ULL << 1))) + \ | ||
10 | (!!((w) & (1ULL << 2))) + \ | ||
11 | (!!((w) & (1ULL << 3))) + \ | ||
12 | (!!((w) & (1ULL << 4))) + \ | ||
13 | (!!((w) & (1ULL << 5))) + \ | ||
14 | (!!((w) & (1ULL << 6))) + \ | ||
15 | (!!((w) & (1ULL << 7))) ) | ||
16 | |||
17 | #define __const_hweight16(w) (__const_hweight8(w) + __const_hweight8((w) >> 8 )) | ||
18 | #define __const_hweight32(w) (__const_hweight16(w) + __const_hweight16((w) >> 16)) | ||
19 | #define __const_hweight64(w) (__const_hweight32(w) + __const_hweight32((w) >> 32)) | ||
20 | |||
21 | /* | ||
22 | * Generic interface. | ||
23 | */ | ||
24 | #define hweight8(w) (__builtin_constant_p(w) ? __const_hweight8(w) : __arch_hweight8(w)) | ||
25 | #define hweight16(w) (__builtin_constant_p(w) ? __const_hweight16(w) : __arch_hweight16(w)) | ||
26 | #define hweight32(w) (__builtin_constant_p(w) ? __const_hweight32(w) : __arch_hweight32(w)) | ||
27 | #define hweight64(w) (__builtin_constant_p(w) ? __const_hweight64(w) : __arch_hweight64(w)) | ||
28 | |||
29 | /* | ||
30 | * Interface for known constant arguments | ||
31 | */ | ||
32 | #define HWEIGHT8(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight8(w)) | ||
33 | #define HWEIGHT16(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight16(w)) | ||
34 | #define HWEIGHT32(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight32(w)) | ||
35 | #define HWEIGHT64(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight64(w)) | ||
36 | |||
37 | /* | ||
38 | * Type invariant interface to the compile time constant hweight functions. | ||
39 | */ | ||
40 | #define HWEIGHT(w) HWEIGHT64((u64)w) | ||
41 | |||
42 | #endif /* _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_ */ | ||
diff --git a/include/asm-generic/bitops/hweight.h b/include/asm-generic/bitops/hweight.h index fbbc383771da..a94d6519c7ed 100644 --- a/include/asm-generic/bitops/hweight.h +++ b/include/asm-generic/bitops/hweight.h | |||
@@ -1,11 +1,7 @@ | |||
1 | #ifndef _ASM_GENERIC_BITOPS_HWEIGHT_H_ | 1 | #ifndef _ASM_GENERIC_BITOPS_HWEIGHT_H_ |
2 | #define _ASM_GENERIC_BITOPS_HWEIGHT_H_ | 2 | #define _ASM_GENERIC_BITOPS_HWEIGHT_H_ |
3 | 3 | ||
4 | #include <asm/types.h> | 4 | #include <asm-generic/bitops/arch_hweight.h> |
5 | 5 | #include <asm-generic/bitops/const_hweight.h> | |
6 | extern unsigned int hweight32(unsigned int w); | ||
7 | extern unsigned int hweight16(unsigned int w); | ||
8 | extern unsigned int hweight8(unsigned int w); | ||
9 | extern unsigned long hweight64(__u64 w); | ||
10 | 6 | ||
11 | #endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */ | 7 | #endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */ |
diff --git a/include/linux/bitops.h b/include/linux/bitops.h index b79389879238..c55d5bc4ee58 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h | |||
@@ -47,31 +47,6 @@ static inline unsigned long hweight_long(unsigned long w) | |||
47 | return sizeof(w) == 4 ? hweight32(w) : hweight64(w); | 47 | return sizeof(w) == 4 ? hweight32(w) : hweight64(w); |
48 | } | 48 | } |
49 | 49 | ||
50 | /* | ||
51 | * Clearly slow versions of the hweightN() functions, their benefit is | ||
52 | * of course compile time evaluation of constant arguments. | ||
53 | */ | ||
54 | #define HWEIGHT8(w) \ | ||
55 | ( BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + \ | ||
56 | (!!((w) & (1ULL << 0))) + \ | ||
57 | (!!((w) & (1ULL << 1))) + \ | ||
58 | (!!((w) & (1ULL << 2))) + \ | ||
59 | (!!((w) & (1ULL << 3))) + \ | ||
60 | (!!((w) & (1ULL << 4))) + \ | ||
61 | (!!((w) & (1ULL << 5))) + \ | ||
62 | (!!((w) & (1ULL << 6))) + \ | ||
63 | (!!((w) & (1ULL << 7))) ) | ||
64 | |||
65 | #define HWEIGHT16(w) (HWEIGHT8(w) + HWEIGHT8((w) >> 8)) | ||
66 | #define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16((w) >> 16)) | ||
67 | #define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32((w) >> 32)) | ||
68 | |||
69 | /* | ||
70 | * Type invariant version that simply casts things to the | ||
71 | * largest type. | ||
72 | */ | ||
73 | #define HWEIGHT(w) HWEIGHT64((u64)(w)) | ||
74 | |||
75 | /** | 50 | /** |
76 | * rol32 - rotate a 32-bit value left | 51 | * rol32 - rotate a 32-bit value left |
77 | * @word: value to rotate | 52 | * @word: value to rotate |
diff --git a/lib/hweight.c b/lib/hweight.c index 63ee4eb1228d..a6927e76840f 100644 --- a/lib/hweight.c +++ b/lib/hweight.c | |||
@@ -9,7 +9,7 @@ | |||
9 | * The Hamming Weight of a number is the total number of bits set in it. | 9 | * The Hamming Weight of a number is the total number of bits set in it. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | unsigned int hweight32(unsigned int w) | 12 | unsigned int __arch_hweight32(unsigned int w) |
13 | { | 13 | { |
14 | #ifdef ARCH_HAS_FAST_MULTIPLIER | 14 | #ifdef ARCH_HAS_FAST_MULTIPLIER |
15 | w -= (w >> 1) & 0x55555555; | 15 | w -= (w >> 1) & 0x55555555; |
@@ -24,29 +24,30 @@ unsigned int hweight32(unsigned int w) | |||
24 | return (res + (res >> 16)) & 0x000000FF; | 24 | return (res + (res >> 16)) & 0x000000FF; |
25 | #endif | 25 | #endif |
26 | } | 26 | } |
27 | EXPORT_SYMBOL(hweight32); | 27 | EXPORT_SYMBOL(__arch_hweight32); |
28 | 28 | ||
29 | unsigned int hweight16(unsigned int w) | 29 | unsigned int __arch_hweight16(unsigned int w) |
30 | { | 30 | { |
31 | unsigned int res = w - ((w >> 1) & 0x5555); | 31 | unsigned int res = w - ((w >> 1) & 0x5555); |
32 | res = (res & 0x3333) + ((res >> 2) & 0x3333); | 32 | res = (res & 0x3333) + ((res >> 2) & 0x3333); |
33 | res = (res + (res >> 4)) & 0x0F0F; | 33 | res = (res + (res >> 4)) & 0x0F0F; |
34 | return (res + (res >> 8)) & 0x00FF; | 34 | return (res + (res >> 8)) & 0x00FF; |
35 | } | 35 | } |
36 | EXPORT_SYMBOL(hweight16); | 36 | EXPORT_SYMBOL(__arch_hweight16); |
37 | 37 | ||
38 | unsigned int hweight8(unsigned int w) | 38 | unsigned int __arch_hweight8(unsigned int w) |
39 | { | 39 | { |
40 | unsigned int res = w - ((w >> 1) & 0x55); | 40 | unsigned int res = w - ((w >> 1) & 0x55); |
41 | res = (res & 0x33) + ((res >> 2) & 0x33); | 41 | res = (res & 0x33) + ((res >> 2) & 0x33); |
42 | return (res + (res >> 4)) & 0x0F; | 42 | return (res + (res >> 4)) & 0x0F; |
43 | } | 43 | } |
44 | EXPORT_SYMBOL(hweight8); | 44 | EXPORT_SYMBOL(__arch_hweight8); |
45 | 45 | ||
46 | unsigned long hweight64(__u64 w) | 46 | unsigned long __arch_hweight64(__u64 w) |
47 | { | 47 | { |
48 | #if BITS_PER_LONG == 32 | 48 | #if BITS_PER_LONG == 32 |
49 | return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w); | 49 | return __arch_hweight32((unsigned int)(w >> 32)) + |
50 | __arch_hweight32((unsigned int)w); | ||
50 | #elif BITS_PER_LONG == 64 | 51 | #elif BITS_PER_LONG == 64 |
51 | #ifdef ARCH_HAS_FAST_MULTIPLIER | 52 | #ifdef ARCH_HAS_FAST_MULTIPLIER |
52 | w -= (w >> 1) & 0x5555555555555555ul; | 53 | w -= (w >> 1) & 0x5555555555555555ul; |
@@ -63,4 +64,4 @@ unsigned long hweight64(__u64 w) | |||
63 | #endif | 64 | #endif |
64 | #endif | 65 | #endif |
65 | } | 66 | } |
66 | EXPORT_SYMBOL(hweight64); | 67 | EXPORT_SYMBOL(__arch_hweight64); |