aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Mosberger-Tang <davidm@hpl.hp.com>2005-04-21 14:07:59 -0400
committerTony Luck <tony.luck@intel.com>2005-04-21 14:07:59 -0400
commit821376bf15e692941f9235f13a14987009fd0b10 (patch)
tree2179380ee3eb38fb393719e6ce32b15e934c4a44
parentd8470b7c13e11c18cf14a7e3180f0b00e715e4f0 (diff)
[IA64] fix fls()
The ia64-version of fls() never worked as intended (the bitnumbering was off by 1 and fls(0) was undefined). This patch fixes the problem by using a popcnt-based fls(), which on McKinley-derived cores is slightly faster than both ia64_fls() and generic_fls(). The resulting code, however, is bigger (7-8 bundles instead of about 3 bundles). Also switch ia64_popcnt() to __builtin_popcountl() for GCC v3.4 or newer since the compiler can predicate that and schedule it better. Thanks to Simon Derr and Matt Mackall for tracking down this bug. Signed-off-by: David Mosberger-Tang <davidm@hpl.hp.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
-rw-r--r--include/asm-ia64/bitops.h21
-rw-r--r--include/asm-ia64/gcc_intrin.h10
2 files changed, 24 insertions, 7 deletions
diff --git a/include/asm-ia64/bitops.h b/include/asm-ia64/bitops.h
index 925d54cee475..7232528e2d0c 100644
--- a/include/asm-ia64/bitops.h
+++ b/include/asm-ia64/bitops.h
@@ -314,8 +314,8 @@ __ffs (unsigned long x)
314#ifdef __KERNEL__ 314#ifdef __KERNEL__
315 315
316/* 316/*
317 * find_last_zero_bit - find the last zero bit in a 64 bit quantity 317 * Return bit number of last (most-significant) bit set. Undefined
318 * @x: The value to search 318 * for x==0. Bits are numbered from 0..63 (e.g., ia64_fls(9) == 3).
319 */ 319 */
320static inline unsigned long 320static inline unsigned long
321ia64_fls (unsigned long x) 321ia64_fls (unsigned long x)
@@ -327,10 +327,23 @@ ia64_fls (unsigned long x)
327 return exp - 0xffff; 327 return exp - 0xffff;
328} 328}
329 329
330/*
331 * Find the last (most significant) bit set. Returns 0 for x==0 and
332 * bits are numbered from 1..32 (e.g., fls(9) == 4).
333 */
330static inline int 334static inline int
331fls (int x) 335fls (int t)
332{ 336{
333 return ia64_fls((unsigned int) x); 337 unsigned long x = t & 0xffffffffu;
338
339 if (!x)
340 return 0;
341 x |= x >> 1;
342 x |= x >> 2;
343 x |= x >> 4;
344 x |= x >> 8;
345 x |= x >> 16;
346 return ia64_popcnt(x);
334} 347}
335 348
336/* 349/*
diff --git a/include/asm-ia64/gcc_intrin.h b/include/asm-ia64/gcc_intrin.h
index 7c357dfbae50..4fb4e439b05c 100644
--- a/include/asm-ia64/gcc_intrin.h
+++ b/include/asm-ia64/gcc_intrin.h
@@ -133,13 +133,17 @@ register unsigned long ia64_r13 asm ("r13") __attribute_used__;
133 ia64_intri_res; \ 133 ia64_intri_res; \
134}) 134})
135 135
136#define ia64_popcnt(x) \ 136#if __GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
137({ \ 137# define ia64_popcnt(x) __builtin_popcountl(x)
138#else
139# define ia64_popcnt(x) \
140 ({ \
138 __u64 ia64_intri_res; \ 141 __u64 ia64_intri_res; \
139 asm ("popcnt %0=%1" : "=r" (ia64_intri_res) : "r" (x)); \ 142 asm ("popcnt %0=%1" : "=r" (ia64_intri_res) : "r" (x)); \
140 \ 143 \
141 ia64_intri_res; \ 144 ia64_intri_res; \
142}) 145 })
146#endif
143 147
144#define ia64_getf_exp(x) \ 148#define ia64_getf_exp(x) \
145({ \ 149({ \