diff options
author | Akinobu Mita <akinobu.mita@gmail.com> | 2009-12-21 19:20:16 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-12-28 04:41:39 -0500 |
commit | 39d997b514e12d5aff0dca206eb8996b3957927e (patch) | |
tree | d63202847a8a421fbd1fc39e9a2433dbc86ce104 /lib/hweight.c | |
parent | 6b7b284958d47b77d06745b36bc7f36dab769d9b (diff) |
x86, core: Optimize hweight32()
Optimize hweight32 by using the same technique in hweight64.
The proof of this technique can be found in the commit log for
f9b4192923fa6e38331e88214b1fe5fc21583fcc ("bitops: hweight()
speedup").
The userspace benchmark on x86_32 showed 20% speedup with
bitmap_weight() which uses hweight32 to count bits for each
unsigned long on 32bit architectures.
int main(void)
{
#define SZ (1024 * 1024 * 512)
static DECLARE_BITMAP(bitmap, SZ) = {
[0 ... 100] = 1,
};
return bitmap_weight(bitmap, SZ);
}
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <1258603932-4590-1-git-send-email-akinobu.mita@gmail.com>
[ only x86 sets ARCH_HAS_FAST_MULTIPLIER so we do this via the x86 tree]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'lib/hweight.c')
-rw-r--r-- | lib/hweight.c | 7 |
1 files changed, 7 insertions, 0 deletions
diff --git a/lib/hweight.c b/lib/hweight.c index 389424ecb129..63ee4eb1228d 100644 --- a/lib/hweight.c +++ b/lib/hweight.c | |||
@@ -11,11 +11,18 @@ | |||
11 | 11 | ||
12 | unsigned int hweight32(unsigned int w) | 12 | unsigned int hweight32(unsigned int w) |
13 | { | 13 | { |
14 | #ifdef ARCH_HAS_FAST_MULTIPLIER | ||
15 | w -= (w >> 1) & 0x55555555; | ||
16 | w = (w & 0x33333333) + ((w >> 2) & 0x33333333); | ||
17 | w = (w + (w >> 4)) & 0x0f0f0f0f; | ||
18 | return (w * 0x01010101) >> 24; | ||
19 | #else | ||
14 | unsigned int res = w - ((w >> 1) & 0x55555555); | 20 | unsigned int res = w - ((w >> 1) & 0x55555555); |
15 | res = (res & 0x33333333) + ((res >> 2) & 0x33333333); | 21 | res = (res & 0x33333333) + ((res >> 2) & 0x33333333); |
16 | res = (res + (res >> 4)) & 0x0F0F0F0F; | 22 | res = (res + (res >> 4)) & 0x0F0F0F0F; |
17 | res = res + (res >> 8); | 23 | res = res + (res >> 8); |
18 | return (res + (res >> 16)) & 0x000000FF; | 24 | return (res + (res >> 16)) & 0x000000FF; |
25 | #endif | ||
19 | } | 26 | } |
20 | EXPORT_SYMBOL(hweight32); | 27 | EXPORT_SYMBOL(hweight32); |
21 | 28 | ||