aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorAkinobu Mita <akinobu.mita@gmail.com>2009-12-21 19:20:16 -0500
committerIngo Molnar <mingo@elte.hu>2009-12-28 04:41:39 -0500
commit39d997b514e12d5aff0dca206eb8996b3957927e (patch)
treed63202847a8a421fbd1fc39e9a2433dbc86ce104 /lib
parent6b7b284958d47b77d06745b36bc7f36dab769d9b (diff)
x86, core: Optimize hweight32()
Optimize hweight32 by using the same technique in hweight64. The proof of this technique can be found in the commit log for f9b4192923fa6e38331e88214b1fe5fc21583fcc ("bitops: hweight() speedup"). The userspace benchmark on x86_32 showed 20% speedup with bitmap_weight() which uses hweight32 to count bits for each unsigned long on 32bit architectures. int main(void) { #define SZ (1024 * 1024 * 512) static DECLARE_BITMAP(bitmap, SZ) = { [0 ... 100] = 1, }; return bitmap_weight(bitmap, SZ); } Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> LKML-Reference: <1258603932-4590-1-git-send-email-akinobu.mita@gmail.com> [ only x86 sets ARCH_HAS_FAST_MULTIPLIER so we do this via the x86 tree] Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'lib')
-rw-r--r--lib/hweight.c7
1 files changed, 7 insertions, 0 deletions
diff --git a/lib/hweight.c b/lib/hweight.c
index 389424ecb129..63ee4eb1228d 100644
--- a/lib/hweight.c
+++ b/lib/hweight.c
@@ -11,11 +11,18 @@
11 11
12unsigned int hweight32(unsigned int w) 12unsigned int hweight32(unsigned int w)
13{ 13{
14#ifdef ARCH_HAS_FAST_MULTIPLIER
15 w -= (w >> 1) & 0x55555555;
16 w = (w & 0x33333333) + ((w >> 2) & 0x33333333);
17 w = (w + (w >> 4)) & 0x0f0f0f0f;
18 return (w * 0x01010101) >> 24;
19#else
14 unsigned int res = w - ((w >> 1) & 0x55555555); 20 unsigned int res = w - ((w >> 1) & 0x55555555);
15 res = (res & 0x33333333) + ((res >> 2) & 0x33333333); 21 res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
16 res = (res + (res >> 4)) & 0x0F0F0F0F; 22 res = (res + (res >> 4)) & 0x0F0F0F0F;
17 res = res + (res >> 8); 23 res = res + (res >> 8);
18 return (res + (res >> 16)) & 0x000000FF; 24 return (res + (res >> 16)) & 0x000000FF;
25#endif
19} 26}
20EXPORT_SYMBOL(hweight32); 27EXPORT_SYMBOL(hweight32);
21 28