diff options
author | David Mosberger-Tang <davidm@hpl.hp.com> | 2005-04-21 14:07:59 -0400 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2005-04-21 14:07:59 -0400 |
commit | 821376bf15e692941f9235f13a14987009fd0b10 (patch) | |
tree | 2179380ee3eb38fb393719e6ce32b15e934c4a44 | |
parent | d8470b7c13e11c18cf14a7e3180f0b00e715e4f0 (diff) |
[IA64] fix fls()
The ia64-version of fls() never worked as intended (the bitnumbering
was off by 1 and fls(0) was undefined). This patch fixes the problem
by using a popcnt-based fls(), which on McKinley-derived cores is
slightly faster than both ia64_fls() and generic_fls(). The resulting
code, however, is bigger (7-8 bundles instead of about 3 bundles).
Also switch ia64_popcnt() to __builtin_popcountl() for GCC v3.4 or
newer since the compiler can predicate that and schedule it better.
Thanks to Simon Derr and Matt Mackall for tracking down this bug.
Signed-off-by: David Mosberger-Tang <davidm@hpl.hp.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
-rw-r--r-- | include/asm-ia64/bitops.h | 21 | ||||
-rw-r--r-- | include/asm-ia64/gcc_intrin.h | 10 |
2 files changed, 24 insertions, 7 deletions
diff --git a/include/asm-ia64/bitops.h b/include/asm-ia64/bitops.h index 925d54cee475..7232528e2d0c 100644 --- a/include/asm-ia64/bitops.h +++ b/include/asm-ia64/bitops.h | |||
@@ -314,8 +314,8 @@ __ffs (unsigned long x) | |||
314 | #ifdef __KERNEL__ | 314 | #ifdef __KERNEL__ |
315 | 315 | ||
316 | /* | 316 | /* |
317 | * find_last_zero_bit - find the last zero bit in a 64 bit quantity | 317 | * Return bit number of last (most-significant) bit set. Undefined |
318 | * @x: The value to search | 318 | * for x==0. Bits are numbered from 0..63 (e.g., ia64_fls(9) == 3). |
319 | */ | 319 | */ |
320 | static inline unsigned long | 320 | static inline unsigned long |
321 | ia64_fls (unsigned long x) | 321 | ia64_fls (unsigned long x) |
@@ -327,10 +327,23 @@ ia64_fls (unsigned long x) | |||
327 | return exp - 0xffff; | 327 | return exp - 0xffff; |
328 | } | 328 | } |
329 | 329 | ||
330 | /* | ||
331 | * Find the last (most significant) bit set. Returns 0 for x==0 and | ||
332 | * bits are numbered from 1..32 (e.g., fls(9) == 4). | ||
333 | */ | ||
330 | static inline int | 334 | static inline int |
331 | fls (int x) | 335 | fls (int t) |
332 | { | 336 | { |
333 | return ia64_fls((unsigned int) x); | 337 | unsigned long x = t & 0xffffffffu; |
338 | |||
339 | if (!x) | ||
340 | return 0; | ||
341 | x |= x >> 1; | ||
342 | x |= x >> 2; | ||
343 | x |= x >> 4; | ||
344 | x |= x >> 8; | ||
345 | x |= x >> 16; | ||
346 | return ia64_popcnt(x); | ||
334 | } | 347 | } |
335 | 348 | ||
336 | /* | 349 | /* |
diff --git a/include/asm-ia64/gcc_intrin.h b/include/asm-ia64/gcc_intrin.h index 7c357dfbae50..4fb4e439b05c 100644 --- a/include/asm-ia64/gcc_intrin.h +++ b/include/asm-ia64/gcc_intrin.h | |||
@@ -133,13 +133,17 @@ register unsigned long ia64_r13 asm ("r13") __attribute_used__; | |||
133 | ia64_intri_res; \ | 133 | ia64_intri_res; \ |
134 | }) | 134 | }) |
135 | 135 | ||
136 | #define ia64_popcnt(x) \ | 136 | #if __GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) |
137 | ({ \ | 137 | # define ia64_popcnt(x) __builtin_popcountl(x) |
138 | #else | ||
139 | # define ia64_popcnt(x) \ | ||
140 | ({ \ | ||
138 | __u64 ia64_intri_res; \ | 141 | __u64 ia64_intri_res; \ |
139 | asm ("popcnt %0=%1" : "=r" (ia64_intri_res) : "r" (x)); \ | 142 | asm ("popcnt %0=%1" : "=r" (ia64_intri_res) : "r" (x)); \ |
140 | \ | 143 | \ |
141 | ia64_intri_res; \ | 144 | ia64_intri_res; \ |
142 | }) | 145 | }) |
146 | #endif | ||
143 | 147 | ||
144 | #define ia64_getf_exp(x) \ | 148 | #define ia64_getf_exp(x) \ |
145 | ({ \ | 149 | ({ \ |