diff options
author | David Howells <dhowells@redhat.com> | 2012-09-13 08:09:33 -0400 |
---|---|---|
committer | Rusty Russell <rusty@rustcorp.com.au> | 2012-10-07 23:20:11 -0400 |
commit | aacf29bf1bf133f6219e6f8969d4ebc2ac76458f (patch) | |
tree | b4379ab6617d1e963020365e03cda0b551bc3237 /lib | |
parent | cf7f601c067994f371ba77721d1e45fce61a4569 (diff) |
MPILIB: Provide count_leading/trailing_zeros() based on arch functions
Provide count_leading/trailing_zeros() macros based on extant arch bit scanning
functions rather than reimplementing from scratch in MPILIB.
Whilst we're at it, turn count_foo_zeros(n, x) into n = count_foo_zeros(x).
Also move the definition to asm-generic as other people may be interested in
using it.
Signed-off-by: David Howells <dhowells@redhat.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Dmitry Kasatkin <dmitry.kasatkin@intel.com>
Cc: Arnd Bergmann <arnd@arndb.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/mpi/longlong.h | 138 | ||||
-rw-r--r-- | lib/mpi/mpi-bit.c | 2 | ||||
-rw-r--r-- | lib/mpi/mpi-pow.c | 4 |
3 files changed, 5 insertions, 139 deletions
diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h index 29f98624ef93..678ce4f1e124 100644 --- a/lib/mpi/longlong.h +++ b/lib/mpi/longlong.h | |||
@@ -19,6 +19,8 @@ | |||
19 | * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, | 19 | * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
20 | * MA 02111-1307, USA. */ | 20 | * MA 02111-1307, USA. */ |
21 | 21 | ||
22 | #include <asm-generic/bitops/count_zeros.h> | ||
23 | |||
22 | /* You have to define the following before including this file: | 24 | /* You have to define the following before including this file: |
23 | * | 25 | * |
24 | * UWtype -- An unsigned type, default type for operations (typically a "word") | 26 | * UWtype -- An unsigned type, default type for operations (typically a "word") |
@@ -146,12 +148,6 @@ do { \ | |||
146 | : "1" ((USItype)(n1)), \ | 148 | : "1" ((USItype)(n1)), \ |
147 | "r" ((USItype)(n0)), \ | 149 | "r" ((USItype)(n0)), \ |
148 | "r" ((USItype)(d))) | 150 | "r" ((USItype)(d))) |
149 | |||
150 | #define count_leading_zeros(count, x) \ | ||
151 | __asm__ ("clz %0,%1" \ | ||
152 | : "=r" ((USItype)(count)) \ | ||
153 | : "r" ((USItype)(x))) | ||
154 | #define COUNT_LEADING_ZEROS_0 32 | ||
155 | #endif /* __a29k__ */ | 151 | #endif /* __a29k__ */ |
156 | 152 | ||
157 | #if defined(__alpha) && W_TYPE_SIZE == 64 | 153 | #if defined(__alpha) && W_TYPE_SIZE == 64 |
@@ -298,11 +294,6 @@ extern UDItype __udiv_qrnnd(); | |||
298 | : "1" ((USItype)(nh)), \ | 294 | : "1" ((USItype)(nh)), \ |
299 | "0" ((USItype)(nl)), \ | 295 | "0" ((USItype)(nl)), \ |
300 | "g" ((USItype)(d))) | 296 | "g" ((USItype)(d))) |
301 | #define count_leading_zeros(count, x) \ | ||
302 | __asm__ ("bsch/1 %1,%0" \ | ||
303 | : "=g" (count) \ | ||
304 | : "g" ((USItype)(x)), \ | ||
305 | "0" ((USItype)0)) | ||
306 | #endif | 297 | #endif |
307 | 298 | ||
308 | /*************************************** | 299 | /*************************************** |
@@ -354,27 +345,6 @@ do { USItype __r; \ | |||
354 | } while (0) | 345 | } while (0) |
355 | extern USItype __udiv_qrnnd(); | 346 | extern USItype __udiv_qrnnd(); |
356 | #endif /* LONGLONG_STANDALONE */ | 347 | #endif /* LONGLONG_STANDALONE */ |
357 | #define count_leading_zeros(count, x) \ | ||
358 | do { \ | ||
359 | USItype __tmp; \ | ||
360 | __asm__ ( \ | ||
361 | "ldi 1,%0\n" \ | ||
362 | "extru,= %1,15,16,%%r0 ; Bits 31..16 zero?\n" \ | ||
363 | "extru,tr %1,15,16,%1 ; No. Shift down, skip add.\n" \ | ||
364 | "ldo 16(%0),%0 ; Yes. Perform add.\n" \ | ||
365 | "extru,= %1,23,8,%%r0 ; Bits 15..8 zero?\n" \ | ||
366 | "extru,tr %1,23,8,%1 ; No. Shift down, skip add.\n" \ | ||
367 | "ldo 8(%0),%0 ; Yes. Perform add.\n" \ | ||
368 | "extru,= %1,27,4,%%r0 ; Bits 7..4 zero?\n" \ | ||
369 | "extru,tr %1,27,4,%1 ; No. Shift down, skip add.\n" \ | ||
370 | "ldo 4(%0),%0 ; Yes. Perform add.\n" \ | ||
371 | "extru,= %1,29,2,%%r0 ; Bits 3..2 zero?\n" \ | ||
372 | "extru,tr %1,29,2,%1 ; No. Shift down, skip add.\n" \ | ||
373 | "ldo 2(%0),%0 ; Yes. Perform add.\n" \ | ||
374 | "extru %1,30,1,%1 ; Extract bit 1.\n" \ | ||
375 | "sub %0,%1,%0 ; Subtract it. " \ | ||
376 | : "=r" (count), "=r" (__tmp) : "1" (x)); \ | ||
377 | } while (0) | ||
378 | #endif /* hppa */ | 348 | #endif /* hppa */ |
379 | 349 | ||
380 | /*************************************** | 350 | /*************************************** |
@@ -457,15 +427,6 @@ do { \ | |||
457 | : "0" ((USItype)(n0)), \ | 427 | : "0" ((USItype)(n0)), \ |
458 | "1" ((USItype)(n1)), \ | 428 | "1" ((USItype)(n1)), \ |
459 | "rm" ((USItype)(d))) | 429 | "rm" ((USItype)(d))) |
460 | #define count_leading_zeros(count, x) \ | ||
461 | do { \ | ||
462 | USItype __cbtmp; \ | ||
463 | __asm__ ("bsrl %1,%0" \ | ||
464 | : "=r" (__cbtmp) : "rm" ((USItype)(x))); \ | ||
465 | (count) = __cbtmp ^ 31; \ | ||
466 | } while (0) | ||
467 | #define count_trailing_zeros(count, x) \ | ||
468 | __asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x))) | ||
469 | #ifndef UMUL_TIME | 430 | #ifndef UMUL_TIME |
470 | #define UMUL_TIME 40 | 431 | #define UMUL_TIME 40 |
471 | #endif | 432 | #endif |
@@ -536,15 +497,6 @@ do { \ | |||
536 | "dI" ((USItype)(d))); \ | 497 | "dI" ((USItype)(d))); \ |
537 | (r) = __rq.__i.__l; (q) = __rq.__i.__h; \ | 498 | (r) = __rq.__i.__l; (q) = __rq.__i.__h; \ |
538 | } while (0) | 499 | } while (0) |
539 | #define count_leading_zeros(count, x) \ | ||
540 | do { \ | ||
541 | USItype __cbtmp; \ | ||
542 | __asm__ ("scanbit %1,%0" \ | ||
543 | : "=r" (__cbtmp) \ | ||
544 | : "r" ((USItype)(x))); \ | ||
545 | (count) = __cbtmp ^ 31; \ | ||
546 | } while (0) | ||
547 | #define COUNT_LEADING_ZEROS_0 (-32) /* sic */ | ||
548 | #if defined(__i960mx) /* what is the proper symbol to test??? */ | 500 | #if defined(__i960mx) /* what is the proper symbol to test??? */ |
549 | #define rshift_rhlc(r, h, l, c) \ | 501 | #define rshift_rhlc(r, h, l, c) \ |
550 | do { \ | 502 | do { \ |
@@ -603,11 +555,6 @@ do { \ | |||
603 | : "0" ((USItype)(n0)), \ | 555 | : "0" ((USItype)(n0)), \ |
604 | "1" ((USItype)(n1)), \ | 556 | "1" ((USItype)(n1)), \ |
605 | "dmi" ((USItype)(d))) | 557 | "dmi" ((USItype)(d))) |
606 | #define count_leading_zeros(count, x) \ | ||
607 | __asm__ ("bfffo %1{%b2:%b2},%0" \ | ||
608 | : "=d" ((USItype)(count)) \ | ||
609 | : "od" ((USItype)(x)), "n" (0)) | ||
610 | #define COUNT_LEADING_ZEROS_0 32 | ||
611 | #else /* not mc68020 */ | 558 | #else /* not mc68020 */ |
612 | #define umul_ppmm(xh, xl, a, b) \ | 559 | #define umul_ppmm(xh, xl, a, b) \ |
613 | do { USItype __umul_tmp1, __umul_tmp2; \ | 560 | do { USItype __umul_tmp1, __umul_tmp2; \ |
@@ -664,15 +611,6 @@ do { USItype __umul_tmp1, __umul_tmp2; \ | |||
664 | "rJ" ((USItype)(bh)), \ | 611 | "rJ" ((USItype)(bh)), \ |
665 | "rJ" ((USItype)(al)), \ | 612 | "rJ" ((USItype)(al)), \ |
666 | "rJ" ((USItype)(bl))) | 613 | "rJ" ((USItype)(bl))) |
667 | #define count_leading_zeros(count, x) \ | ||
668 | do { \ | ||
669 | USItype __cbtmp; \ | ||
670 | __asm__ ("ff1 %0,%1" \ | ||
671 | : "=r" (__cbtmp) \ | ||
672 | : "r" ((USItype)(x))); \ | ||
673 | (count) = __cbtmp ^ 31; \ | ||
674 | } while (0) | ||
675 | #define COUNT_LEADING_ZEROS_0 63 /* sic */ | ||
676 | #if defined(__m88110__) | 614 | #if defined(__m88110__) |
677 | #define umul_ppmm(wh, wl, u, v) \ | 615 | #define umul_ppmm(wh, wl, u, v) \ |
678 | do { \ | 616 | do { \ |
@@ -779,12 +717,6 @@ do { \ | |||
779 | : "0" (__xx.__ll), \ | 717 | : "0" (__xx.__ll), \ |
780 | "g" ((USItype)(d))); \ | 718 | "g" ((USItype)(d))); \ |
781 | (r) = __xx.__i.__l; (q) = __xx.__i.__h; }) | 719 | (r) = __xx.__i.__l; (q) = __xx.__i.__h; }) |
782 | #define count_trailing_zeros(count, x) \ | ||
783 | do { \ | ||
784 | __asm__("ffsd %2,%0" \ | ||
785 | : "=r"((USItype) (count)) \ | ||
786 | : "0"((USItype) 0), "r"((USItype) (x))); \ | ||
787 | } while (0) | ||
788 | #endif /* __ns32000__ */ | 720 | #endif /* __ns32000__ */ |
789 | 721 | ||
790 | /*************************************** | 722 | /*************************************** |
@@ -855,11 +787,6 @@ do { \ | |||
855 | "rI" ((USItype)(al)), \ | 787 | "rI" ((USItype)(al)), \ |
856 | "r" ((USItype)(bl))); \ | 788 | "r" ((USItype)(bl))); \ |
857 | } while (0) | 789 | } while (0) |
858 | #define count_leading_zeros(count, x) \ | ||
859 | __asm__ ("{cntlz|cntlzw} %0,%1" \ | ||
860 | : "=r" ((USItype)(count)) \ | ||
861 | : "r" ((USItype)(x))) | ||
862 | #define COUNT_LEADING_ZEROS_0 32 | ||
863 | #if defined(_ARCH_PPC) | 790 | #if defined(_ARCH_PPC) |
864 | #define umul_ppmm(ph, pl, m0, m1) \ | 791 | #define umul_ppmm(ph, pl, m0, m1) \ |
865 | do { \ | 792 | do { \ |
@@ -1001,19 +928,6 @@ do { \ | |||
1001 | } while (0) | 928 | } while (0) |
1002 | #define UMUL_TIME 20 | 929 | #define UMUL_TIME 20 |
1003 | #define UDIV_TIME 200 | 930 | #define UDIV_TIME 200 |
1004 | #define count_leading_zeros(count, x) \ | ||
1005 | do { \ | ||
1006 | if ((x) >= 0x10000) \ | ||
1007 | __asm__ ("clz %0,%1" \ | ||
1008 | : "=r" ((USItype)(count)) \ | ||
1009 | : "r" ((USItype)(x) >> 16)); \ | ||
1010 | else { \ | ||
1011 | __asm__ ("clz %0,%1" \ | ||
1012 | : "=r" ((USItype)(count)) \ | ||
1013 | : "r" ((USItype)(x))); \ | ||
1014 | (count) += 16; \ | ||
1015 | } \ | ||
1016 | } while (0) | ||
1017 | #endif /* RT/ROMP */ | 931 | #endif /* RT/ROMP */ |
1018 | 932 | ||
1019 | /*************************************** | 933 | /*************************************** |
@@ -1142,13 +1056,6 @@ do { \ | |||
1142 | "rI" ((USItype)(d)) \ | 1056 | "rI" ((USItype)(d)) \ |
1143 | : "%g1" __AND_CLOBBER_CC) | 1057 | : "%g1" __AND_CLOBBER_CC) |
1144 | #define UDIV_TIME 37 | 1058 | #define UDIV_TIME 37 |
1145 | #define count_leading_zeros(count, x) \ | ||
1146 | __asm__ ("scan %1,0,%0" \ | ||
1147 | : "=r" ((USItype)(x)) \ | ||
1148 | : "r" ((USItype)(count))) | ||
1149 | /* Early sparclites return 63 for an argument of 0, but they warn that future | ||
1150 | implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0 | ||
1151 | undefined. */ | ||
1152 | #endif /* __sparclite__ */ | 1059 | #endif /* __sparclite__ */ |
1153 | #endif /* __sparc_v8__ */ | 1060 | #endif /* __sparc_v8__ */ |
1154 | /* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */ | 1061 | /* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */ |
@@ -1454,47 +1361,6 @@ do { \ | |||
1454 | #define udiv_qrnnd __udiv_qrnnd_c | 1361 | #define udiv_qrnnd __udiv_qrnnd_c |
1455 | #endif | 1362 | #endif |
1456 | 1363 | ||
1457 | #undef count_leading_zeros | ||
1458 | #if !defined(count_leading_zeros) | ||
1459 | extern | ||
1460 | #ifdef __STDC__ | ||
1461 | const | ||
1462 | #endif | ||
1463 | unsigned char __clz_tab[]; | ||
1464 | #define count_leading_zeros(count, x) \ | ||
1465 | do { \ | ||
1466 | UWtype __xr = (x); \ | ||
1467 | UWtype __a; \ | ||
1468 | \ | ||
1469 | if (W_TYPE_SIZE <= 32) { \ | ||
1470 | __a = __xr < ((UWtype) 1 << 2*__BITS4) \ | ||
1471 | ? (__xr < ((UWtype) 1 << __BITS4) ? 0 : __BITS4) \ | ||
1472 | : (__xr < ((UWtype) 1 << 3*__BITS4) ? 2*__BITS4 : 3*__BITS4); \ | ||
1473 | } \ | ||
1474 | else { \ | ||
1475 | for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \ | ||
1476 | if (((__xr >> __a) & 0xff) != 0) \ | ||
1477 | break; \ | ||
1478 | } \ | ||
1479 | \ | ||
1480 | (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \ | ||
1481 | } while (0) | ||
1482 | /* This version gives a well-defined value for zero. */ | ||
1483 | #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE | ||
1484 | #endif | ||
1485 | |||
1486 | #if !defined(count_trailing_zeros) | ||
1487 | /* Define count_trailing_zeros using count_leading_zeros. The latter might be | ||
1488 | defined in asm, but if it is not, the C version above is good enough. */ | ||
1489 | #define count_trailing_zeros(count, x) \ | ||
1490 | do { \ | ||
1491 | UWtype __ctz_x = (x); \ | ||
1492 | UWtype __ctz_c; \ | ||
1493 | count_leading_zeros(__ctz_c, __ctz_x & -__ctz_x); \ | ||
1494 | (count) = W_TYPE_SIZE - 1 - __ctz_c; \ | ||
1495 | } while (0) | ||
1496 | #endif | ||
1497 | |||
1498 | #ifndef UDIV_NEEDS_NORMALIZATION | 1364 | #ifndef UDIV_NEEDS_NORMALIZATION |
1499 | #define UDIV_NEEDS_NORMALIZATION 0 | 1365 | #define UDIV_NEEDS_NORMALIZATION 0 |
1500 | #endif | 1366 | #endif |
diff --git a/lib/mpi/mpi-bit.c b/lib/mpi/mpi-bit.c index 568724804f29..503537e08436 100644 --- a/lib/mpi/mpi-bit.c +++ b/lib/mpi/mpi-bit.c | |||
@@ -45,7 +45,7 @@ unsigned mpi_get_nbits(MPI a) | |||
45 | if (a->nlimbs) { | 45 | if (a->nlimbs) { |
46 | mpi_limb_t alimb = a->d[a->nlimbs - 1]; | 46 | mpi_limb_t alimb = a->d[a->nlimbs - 1]; |
47 | if (alimb) | 47 | if (alimb) |
48 | count_leading_zeros(n, alimb); | 48 | n = count_leading_zeros(alimb); |
49 | else | 49 | else |
50 | n = BITS_PER_MPI_LIMB; | 50 | n = BITS_PER_MPI_LIMB; |
51 | n = BITS_PER_MPI_LIMB - n + (a->nlimbs - 1) * BITS_PER_MPI_LIMB; | 51 | n = BITS_PER_MPI_LIMB - n + (a->nlimbs - 1) * BITS_PER_MPI_LIMB; |
diff --git a/lib/mpi/mpi-pow.c b/lib/mpi/mpi-pow.c index 67f3e79af914..5464c8744ea9 100644 --- a/lib/mpi/mpi-pow.c +++ b/lib/mpi/mpi-pow.c | |||
@@ -77,7 +77,7 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod) | |||
77 | mp = mp_marker = mpi_alloc_limb_space(msize); | 77 | mp = mp_marker = mpi_alloc_limb_space(msize); |
78 | if (!mp) | 78 | if (!mp) |
79 | goto enomem; | 79 | goto enomem; |
80 | count_leading_zeros(mod_shift_cnt, mod->d[msize - 1]); | 80 | mod_shift_cnt = count_leading_zeros(mod->d[msize - 1]); |
81 | if (mod_shift_cnt) | 81 | if (mod_shift_cnt) |
82 | mpihelp_lshift(mp, mod->d, msize, mod_shift_cnt); | 82 | mpihelp_lshift(mp, mod->d, msize, mod_shift_cnt); |
83 | else | 83 | else |
@@ -169,7 +169,7 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod) | |||
169 | 169 | ||
170 | i = esize - 1; | 170 | i = esize - 1; |
171 | e = ep[i]; | 171 | e = ep[i]; |
172 | count_leading_zeros(c, e); | 172 | c = count_leading_zeros(e); |
173 | e = (e << c) << 1; /* shift the exp bits to the left, lose msb */ | 173 | e = (e << c) << 1; /* shift the exp bits to the left, lose msb */ |
174 | c = BITS_PER_MPI_LIMB - 1 - c; | 174 | c = BITS_PER_MPI_LIMB - 1 - c; |
175 | 175 | ||