From 1527bc8b928dd1399c3d3467dd47d9ede210978a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 1 Feb 2010 15:03:07 +0100 Subject: bitops: Optimize hweight() by making use of compile-time evaluation Rename the extisting runtime hweight() implementations to __arch_hweight(), rename the compile-time versions to __const_hweight() and then have hweight() pick between them. Suggested-by: H. Peter Anvin Signed-off-by: Peter Zijlstra LKML-Reference: <20100318111929.GB11152@aftab> Acked-by: H. Peter Anvin LKML-Reference: <1265028224.24455.154.camel@laptop> Signed-off-by: H. Peter Anvin --- include/asm-generic/bitops/arch_hweight.h | 11 ++++++++ include/asm-generic/bitops/const_hweight.h | 42 ++++++++++++++++++++++++++++++ include/asm-generic/bitops/hweight.h | 8 ++---- include/linux/bitops.h | 25 ------------------ 4 files changed, 55 insertions(+), 31 deletions(-) create mode 100644 include/asm-generic/bitops/arch_hweight.h create mode 100644 include/asm-generic/bitops/const_hweight.h (limited to 'include') diff --git a/include/asm-generic/bitops/arch_hweight.h b/include/asm-generic/bitops/arch_hweight.h new file mode 100644 index 000000000000..3a7be842cdce --- /dev/null +++ b/include/asm-generic/bitops/arch_hweight.h @@ -0,0 +1,11 @@ +#ifndef _ASM_GENERIC_BITOPS_ARCH_HWEIGHT_H_ +#define _ASM_GENERIC_BITOPS_ARCH_HWEIGHT_H_ + +#include + +extern unsigned int __arch_hweight32(unsigned int w); +extern unsigned int __arch_hweight16(unsigned int w); +extern unsigned int __arch_hweight8(unsigned int w); +extern unsigned long __arch_hweight64(__u64 w); + +#endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */ diff --git a/include/asm-generic/bitops/const_hweight.h b/include/asm-generic/bitops/const_hweight.h new file mode 100644 index 000000000000..fa2a50b7ee66 --- /dev/null +++ b/include/asm-generic/bitops/const_hweight.h @@ -0,0 +1,42 @@ +#ifndef _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_ +#define _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_ + +/* + * Compile time versions of __arch_hweightN() + */ +#define __const_hweight8(w) \ + ( (!!((w) & (1ULL << 0))) + \ + (!!((w) & (1ULL << 1))) + \ + (!!((w) & (1ULL << 2))) + \ + (!!((w) & (1ULL << 3))) + \ + (!!((w) & (1ULL << 4))) + \ + (!!((w) & (1ULL << 5))) + \ + (!!((w) & (1ULL << 6))) + \ + (!!((w) & (1ULL << 7))) ) + +#define __const_hweight16(w) (__const_hweight8(w) + __const_hweight8((w) >> 8 )) +#define __const_hweight32(w) (__const_hweight16(w) + __const_hweight16((w) >> 16)) +#define __const_hweight64(w) (__const_hweight32(w) + __const_hweight32((w) >> 32)) + +/* + * Generic interface. + */ +#define hweight8(w) (__builtin_constant_p(w) ? __const_hweight8(w) : __arch_hweight8(w)) +#define hweight16(w) (__builtin_constant_p(w) ? __const_hweight16(w) : __arch_hweight16(w)) +#define hweight32(w) (__builtin_constant_p(w) ? __const_hweight32(w) : __arch_hweight32(w)) +#define hweight64(w) (__builtin_constant_p(w) ? __const_hweight64(w) : __arch_hweight64(w)) + +/* + * Interface for known constant arguments + */ +#define HWEIGHT8(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight8(w)) +#define HWEIGHT16(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight16(w)) +#define HWEIGHT32(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight32(w)) +#define HWEIGHT64(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight64(w)) + +/* + * Type invariant interface to the compile time constant hweight functions. + */ +#define HWEIGHT(w) HWEIGHT64((u64)w) + +#endif /* _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_ */ diff --git a/include/asm-generic/bitops/hweight.h b/include/asm-generic/bitops/hweight.h index fbbc383771da..a94d6519c7ed 100644 --- a/include/asm-generic/bitops/hweight.h +++ b/include/asm-generic/bitops/hweight.h @@ -1,11 +1,7 @@ #ifndef _ASM_GENERIC_BITOPS_HWEIGHT_H_ #define _ASM_GENERIC_BITOPS_HWEIGHT_H_ -#include - -extern unsigned int hweight32(unsigned int w); -extern unsigned int hweight16(unsigned int w); -extern unsigned int hweight8(unsigned int w); -extern unsigned long hweight64(__u64 w); +#include +#include #endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */ diff --git a/include/linux/bitops.h b/include/linux/bitops.h index b79389879238..c55d5bc4ee58 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -47,31 +47,6 @@ static inline unsigned long hweight_long(unsigned long w) return sizeof(w) == 4 ? hweight32(w) : hweight64(w); } -/* - * Clearly slow versions of the hweightN() functions, their benefit is - * of course compile time evaluation of constant arguments. - */ -#define HWEIGHT8(w) \ - ( BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + \ - (!!((w) & (1ULL << 0))) + \ - (!!((w) & (1ULL << 1))) + \ - (!!((w) & (1ULL << 2))) + \ - (!!((w) & (1ULL << 3))) + \ - (!!((w) & (1ULL << 4))) + \ - (!!((w) & (1ULL << 5))) + \ - (!!((w) & (1ULL << 6))) + \ - (!!((w) & (1ULL << 7))) ) - -#define HWEIGHT16(w) (HWEIGHT8(w) + HWEIGHT8((w) >> 8)) -#define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16((w) >> 16)) -#define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32((w) >> 32)) - -/* - * Type invariant version that simply casts things to the - * largest type. - */ -#define HWEIGHT(w) HWEIGHT64((u64)(w)) - /** * rol32 - rotate a 32-bit value left * @word: value to rotate -- cgit v1.2.2 From d61931d89be506372d01a90d1755f6d0a9fafe2d Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 5 Mar 2010 17:34:46 +0100 Subject: x86: Add optimized popcnt variants Add support for the hardware version of the Hamming weight function, popcnt, present in CPUs which advertize it under CPUID, Function 0x0000_0001_ECX[23]. On CPUs which don't support it, we fallback to the default lib/hweight.c sw versions. A synthetic benchmark comparing popcnt with __sw_hweight64 showed almost a 3x speedup on a F10h machine. Signed-off-by: Borislav Petkov LKML-Reference: <20100318112015.GC11152@aftab> Signed-off-by: H. Peter Anvin --- include/asm-generic/bitops/arch_hweight.h | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-generic/bitops/arch_hweight.h b/include/asm-generic/bitops/arch_hweight.h index 3a7be842cdce..9a81c1e9436c 100644 --- a/include/asm-generic/bitops/arch_hweight.h +++ b/include/asm-generic/bitops/arch_hweight.h @@ -3,9 +3,23 @@ #include -extern unsigned int __arch_hweight32(unsigned int w); -extern unsigned int __arch_hweight16(unsigned int w); -extern unsigned int __arch_hweight8(unsigned int w); -extern unsigned long __arch_hweight64(__u64 w); +inline unsigned int __arch_hweight32(unsigned int w) +{ + return __sw_hweight32(w); +} +inline unsigned int __arch_hweight16(unsigned int w) +{ + return __sw_hweight16(w); +} + +inline unsigned int __arch_hweight8(unsigned int w) +{ + return __sw_hweight8(w); +} + +inline unsigned long __arch_hweight64(__u64 w) +{ + return __sw_hweight64(w); +} #endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */ -- cgit v1.2.2 From 4677d4a53e0d565742277e8913e91c821453e63e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 3 May 2010 14:57:11 +0200 Subject: arch, hweight: Fix compilation errors Fix function prototype visibility issues when compiling for non-x86 architectures. Tested with crosstool (ftp://ftp.kernel.org/pub/tools/crosstool/) with alpha, ia64 and sparc targets. Signed-off-by: Borislav Petkov LKML-Reference: <20100503130736.GD26107@aftab> Signed-off-by: H. Peter Anvin --- include/asm-generic/bitops/arch_hweight.h | 8 ++++---- include/linux/bitops.h | 5 +++++ 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-generic/bitops/arch_hweight.h b/include/asm-generic/bitops/arch_hweight.h index 9a81c1e9436c..6a211f40665c 100644 --- a/include/asm-generic/bitops/arch_hweight.h +++ b/include/asm-generic/bitops/arch_hweight.h @@ -3,22 +3,22 @@ #include -inline unsigned int __arch_hweight32(unsigned int w) +static inline unsigned int __arch_hweight32(unsigned int w) { return __sw_hweight32(w); } -inline unsigned int __arch_hweight16(unsigned int w) +static inline unsigned int __arch_hweight16(unsigned int w) { return __sw_hweight16(w); } -inline unsigned int __arch_hweight8(unsigned int w) +static inline unsigned int __arch_hweight8(unsigned int w) { return __sw_hweight8(w); } -inline unsigned long __arch_hweight64(__u64 w) +static inline unsigned long __arch_hweight64(__u64 w) { return __sw_hweight64(w); } diff --git a/include/linux/bitops.h b/include/linux/bitops.h index c55d5bc4ee58..26caa608ccd9 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -10,6 +10,11 @@ #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) #endif +extern unsigned int __sw_hweight8(unsigned int w); +extern unsigned int __sw_hweight16(unsigned int w); +extern unsigned int __sw_hweight32(unsigned int w); +extern unsigned long __sw_hweight64(__u64 w); + /* * Include this here because some architectures need generic_ffs/fls in * scope -- cgit v1.2.2