diff options
Diffstat (limited to 'arch/arm/nwfpe/softfloat.c')
-rw-r--r-- | arch/arm/nwfpe/softfloat.c | 334 |
1 files changed, 163 insertions, 171 deletions
diff --git a/arch/arm/nwfpe/softfloat.c b/arch/arm/nwfpe/softfloat.c index e038dd3be9b3..8b75a6e7cb3a 100644 --- a/arch/arm/nwfpe/softfloat.c +++ b/arch/arm/nwfpe/softfloat.c | |||
@@ -36,16 +36,6 @@ this code that are retained. | |||
36 | 36 | ||
37 | /* | 37 | /* |
38 | ------------------------------------------------------------------------------- | 38 | ------------------------------------------------------------------------------- |
39 | Floating-point rounding mode, extended double-precision rounding precision, | ||
40 | and exception flags. | ||
41 | ------------------------------------------------------------------------------- | ||
42 | */ | ||
43 | int8 float_rounding_mode = float_round_nearest_even; | ||
44 | int8 floatx80_rounding_precision = 80; | ||
45 | int8 float_exception_flags; | ||
46 | |||
47 | /* | ||
48 | ------------------------------------------------------------------------------- | ||
49 | Primitive arithmetic functions, including multi-word arithmetic, and | 39 | Primitive arithmetic functions, including multi-word arithmetic, and |
50 | division and square root approximations. (Can be specialized to target if | 40 | division and square root approximations. (Can be specialized to target if |
51 | desired.) | 41 | desired.) |
@@ -77,14 +67,14 @@ input is too large, however, the invalid exception is raised and the largest | |||
77 | positive or negative integer is returned. | 67 | positive or negative integer is returned. |
78 | ------------------------------------------------------------------------------- | 68 | ------------------------------------------------------------------------------- |
79 | */ | 69 | */ |
80 | static int32 roundAndPackInt32( flag zSign, bits64 absZ ) | 70 | static int32 roundAndPackInt32( struct roundingData *roundData, flag zSign, bits64 absZ ) |
81 | { | 71 | { |
82 | int8 roundingMode; | 72 | int8 roundingMode; |
83 | flag roundNearestEven; | 73 | flag roundNearestEven; |
84 | int8 roundIncrement, roundBits; | 74 | int8 roundIncrement, roundBits; |
85 | int32 z; | 75 | int32 z; |
86 | 76 | ||
87 | roundingMode = float_rounding_mode; | 77 | roundingMode = roundData->mode; |
88 | roundNearestEven = ( roundingMode == float_round_nearest_even ); | 78 | roundNearestEven = ( roundingMode == float_round_nearest_even ); |
89 | roundIncrement = 0x40; | 79 | roundIncrement = 0x40; |
90 | if ( ! roundNearestEven ) { | 80 | if ( ! roundNearestEven ) { |
@@ -107,10 +97,10 @@ static int32 roundAndPackInt32( flag zSign, bits64 absZ ) | |||
107 | z = absZ; | 97 | z = absZ; |
108 | if ( zSign ) z = - z; | 98 | if ( zSign ) z = - z; |
109 | if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { | 99 | if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { |
110 | float_exception_flags |= float_flag_invalid; | 100 | roundData->exception |= float_flag_invalid; |
111 | return zSign ? 0x80000000 : 0x7FFFFFFF; | 101 | return zSign ? 0x80000000 : 0x7FFFFFFF; |
112 | } | 102 | } |
113 | if ( roundBits ) float_exception_flags |= float_flag_inexact; | 103 | if ( roundBits ) roundData->exception |= float_flag_inexact; |
114 | return z; | 104 | return z; |
115 | 105 | ||
116 | } | 106 | } |
@@ -224,14 +214,14 @@ The handling of underflow and overflow follows the IEC/IEEE Standard for | |||
224 | Binary Floating-point Arithmetic. | 214 | Binary Floating-point Arithmetic. |
225 | ------------------------------------------------------------------------------- | 215 | ------------------------------------------------------------------------------- |
226 | */ | 216 | */ |
227 | static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) | 217 | static float32 roundAndPackFloat32( struct roundingData *roundData, flag zSign, int16 zExp, bits32 zSig ) |
228 | { | 218 | { |
229 | int8 roundingMode; | 219 | int8 roundingMode; |
230 | flag roundNearestEven; | 220 | flag roundNearestEven; |
231 | int8 roundIncrement, roundBits; | 221 | int8 roundIncrement, roundBits; |
232 | flag isTiny; | 222 | flag isTiny; |
233 | 223 | ||
234 | roundingMode = float_rounding_mode; | 224 | roundingMode = roundData->mode; |
235 | roundNearestEven = ( roundingMode == float_round_nearest_even ); | 225 | roundNearestEven = ( roundingMode == float_round_nearest_even ); |
236 | roundIncrement = 0x40; | 226 | roundIncrement = 0x40; |
237 | if ( ! roundNearestEven ) { | 227 | if ( ! roundNearestEven ) { |
@@ -254,7 +244,7 @@ static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) | |||
254 | || ( ( zExp == 0xFD ) | 244 | || ( ( zExp == 0xFD ) |
255 | && ( (sbits32) ( zSig + roundIncrement ) < 0 ) ) | 245 | && ( (sbits32) ( zSig + roundIncrement ) < 0 ) ) |
256 | ) { | 246 | ) { |
257 | float_raise( float_flag_overflow | float_flag_inexact ); | 247 | roundData->exception |= float_flag_overflow | float_flag_inexact; |
258 | return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 ); | 248 | return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 ); |
259 | } | 249 | } |
260 | if ( zExp < 0 ) { | 250 | if ( zExp < 0 ) { |
@@ -265,10 +255,10 @@ static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) | |||
265 | shift32RightJamming( zSig, - zExp, &zSig ); | 255 | shift32RightJamming( zSig, - zExp, &zSig ); |
266 | zExp = 0; | 256 | zExp = 0; |
267 | roundBits = zSig & 0x7F; | 257 | roundBits = zSig & 0x7F; |
268 | if ( isTiny && roundBits ) float_raise( float_flag_underflow ); | 258 | if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow; |
269 | } | 259 | } |
270 | } | 260 | } |
271 | if ( roundBits ) float_exception_flags |= float_flag_inexact; | 261 | if ( roundBits ) roundData->exception |= float_flag_inexact; |
272 | zSig = ( zSig + roundIncrement )>>7; | 262 | zSig = ( zSig + roundIncrement )>>7; |
273 | zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); | 263 | zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); |
274 | if ( zSig == 0 ) zExp = 0; | 264 | if ( zSig == 0 ) zExp = 0; |
@@ -287,12 +277,12 @@ point exponent. | |||
287 | ------------------------------------------------------------------------------- | 277 | ------------------------------------------------------------------------------- |
288 | */ | 278 | */ |
289 | static float32 | 279 | static float32 |
290 | normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) | 280 | normalizeRoundAndPackFloat32( struct roundingData *roundData, flag zSign, int16 zExp, bits32 zSig ) |
291 | { | 281 | { |
292 | int8 shiftCount; | 282 | int8 shiftCount; |
293 | 283 | ||
294 | shiftCount = countLeadingZeros32( zSig ) - 1; | 284 | shiftCount = countLeadingZeros32( zSig ) - 1; |
295 | return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount ); | 285 | return roundAndPackFloat32( roundData, zSign, zExp - shiftCount, zSig<<shiftCount ); |
296 | 286 | ||
297 | } | 287 | } |
298 | 288 | ||
@@ -395,14 +385,14 @@ The handling of underflow and overflow follows the IEC/IEEE Standard for | |||
395 | Binary Floating-point Arithmetic. | 385 | Binary Floating-point Arithmetic. |
396 | ------------------------------------------------------------------------------- | 386 | ------------------------------------------------------------------------------- |
397 | */ | 387 | */ |
398 | static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) | 388 | static float64 roundAndPackFloat64( struct roundingData *roundData, flag zSign, int16 zExp, bits64 zSig ) |
399 | { | 389 | { |
400 | int8 roundingMode; | 390 | int8 roundingMode; |
401 | flag roundNearestEven; | 391 | flag roundNearestEven; |
402 | int16 roundIncrement, roundBits; | 392 | int16 roundIncrement, roundBits; |
403 | flag isTiny; | 393 | flag isTiny; |
404 | 394 | ||
405 | roundingMode = float_rounding_mode; | 395 | roundingMode = roundData->mode; |
406 | roundNearestEven = ( roundingMode == float_round_nearest_even ); | 396 | roundNearestEven = ( roundingMode == float_round_nearest_even ); |
407 | roundIncrement = 0x200; | 397 | roundIncrement = 0x200; |
408 | if ( ! roundNearestEven ) { | 398 | if ( ! roundNearestEven ) { |
@@ -427,7 +417,7 @@ static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) | |||
427 | ) { | 417 | ) { |
428 | //register int lr = __builtin_return_address(0); | 418 | //register int lr = __builtin_return_address(0); |
429 | //printk("roundAndPackFloat64 called from 0x%08x\n",lr); | 419 | //printk("roundAndPackFloat64 called from 0x%08x\n",lr); |
430 | float_raise( float_flag_overflow | float_flag_inexact ); | 420 | roundData->exception |= float_flag_overflow | float_flag_inexact; |
431 | return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 ); | 421 | return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 ); |
432 | } | 422 | } |
433 | if ( zExp < 0 ) { | 423 | if ( zExp < 0 ) { |
@@ -438,10 +428,10 @@ static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) | |||
438 | shift64RightJamming( zSig, - zExp, &zSig ); | 428 | shift64RightJamming( zSig, - zExp, &zSig ); |
439 | zExp = 0; | 429 | zExp = 0; |
440 | roundBits = zSig & 0x3FF; | 430 | roundBits = zSig & 0x3FF; |
441 | if ( isTiny && roundBits ) float_raise( float_flag_underflow ); | 431 | if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow; |
442 | } | 432 | } |
443 | } | 433 | } |
444 | if ( roundBits ) float_exception_flags |= float_flag_inexact; | 434 | if ( roundBits ) roundData->exception |= float_flag_inexact; |
445 | zSig = ( zSig + roundIncrement )>>10; | 435 | zSig = ( zSig + roundIncrement )>>10; |
446 | zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven ); | 436 | zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven ); |
447 | if ( zSig == 0 ) zExp = 0; | 437 | if ( zSig == 0 ) zExp = 0; |
@@ -460,12 +450,12 @@ point exponent. | |||
460 | ------------------------------------------------------------------------------- | 450 | ------------------------------------------------------------------------------- |
461 | */ | 451 | */ |
462 | static float64 | 452 | static float64 |
463 | normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) | 453 | normalizeRoundAndPackFloat64( struct roundingData *roundData, flag zSign, int16 zExp, bits64 zSig ) |
464 | { | 454 | { |
465 | int8 shiftCount; | 455 | int8 shiftCount; |
466 | 456 | ||
467 | shiftCount = countLeadingZeros64( zSig ) - 1; | 457 | shiftCount = countLeadingZeros64( zSig ) - 1; |
468 | return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount ); | 458 | return roundAndPackFloat64( roundData, zSign, zExp - shiftCount, zSig<<shiftCount ); |
469 | 459 | ||
470 | } | 460 | } |
471 | 461 | ||
@@ -572,14 +562,15 @@ Floating-point Arithmetic. | |||
572 | */ | 562 | */ |
573 | static floatx80 | 563 | static floatx80 |
574 | roundAndPackFloatx80( | 564 | roundAndPackFloatx80( |
575 | int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 | 565 | struct roundingData *roundData, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 |
576 | ) | 566 | ) |
577 | { | 567 | { |
578 | int8 roundingMode; | 568 | int8 roundingMode, roundingPrecision; |
579 | flag roundNearestEven, increment, isTiny; | 569 | flag roundNearestEven, increment, isTiny; |
580 | int64 roundIncrement, roundMask, roundBits; | 570 | int64 roundIncrement, roundMask, roundBits; |
581 | 571 | ||
582 | roundingMode = float_rounding_mode; | 572 | roundingMode = roundData->mode; |
573 | roundingPrecision = roundData->precision; | ||
583 | roundNearestEven = ( roundingMode == float_round_nearest_even ); | 574 | roundNearestEven = ( roundingMode == float_round_nearest_even ); |
584 | if ( roundingPrecision == 80 ) goto precision80; | 575 | if ( roundingPrecision == 80 ) goto precision80; |
585 | if ( roundingPrecision == 64 ) { | 576 | if ( roundingPrecision == 64 ) { |
@@ -623,8 +614,8 @@ static floatx80 | |||
623 | shift64RightJamming( zSig0, 1 - zExp, &zSig0 ); | 614 | shift64RightJamming( zSig0, 1 - zExp, &zSig0 ); |
624 | zExp = 0; | 615 | zExp = 0; |
625 | roundBits = zSig0 & roundMask; | 616 | roundBits = zSig0 & roundMask; |
626 | if ( isTiny && roundBits ) float_raise( float_flag_underflow ); | 617 | if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow; |
627 | if ( roundBits ) float_exception_flags |= float_flag_inexact; | 618 | if ( roundBits ) roundData->exception |= float_flag_inexact; |
628 | zSig0 += roundIncrement; | 619 | zSig0 += roundIncrement; |
629 | if ( (sbits64) zSig0 < 0 ) zExp = 1; | 620 | if ( (sbits64) zSig0 < 0 ) zExp = 1; |
630 | roundIncrement = roundMask + 1; | 621 | roundIncrement = roundMask + 1; |
@@ -635,7 +626,7 @@ static floatx80 | |||
635 | return packFloatx80( zSign, zExp, zSig0 ); | 626 | return packFloatx80( zSign, zExp, zSig0 ); |
636 | } | 627 | } |
637 | } | 628 | } |
638 | if ( roundBits ) float_exception_flags |= float_flag_inexact; | 629 | if ( roundBits ) roundData->exception |= float_flag_inexact; |
639 | zSig0 += roundIncrement; | 630 | zSig0 += roundIncrement; |
640 | if ( zSig0 < roundIncrement ) { | 631 | if ( zSig0 < roundIncrement ) { |
641 | ++zExp; | 632 | ++zExp; |
@@ -672,7 +663,7 @@ static floatx80 | |||
672 | ) { | 663 | ) { |
673 | roundMask = 0; | 664 | roundMask = 0; |
674 | overflow: | 665 | overflow: |
675 | float_raise( float_flag_overflow | float_flag_inexact ); | 666 | roundData->exception |= float_flag_overflow | float_flag_inexact; |
676 | if ( ( roundingMode == float_round_to_zero ) | 667 | if ( ( roundingMode == float_round_to_zero ) |
677 | || ( zSign && ( roundingMode == float_round_up ) ) | 668 | || ( zSign && ( roundingMode == float_round_up ) ) |
678 | || ( ! zSign && ( roundingMode == float_round_down ) ) | 669 | || ( ! zSign && ( roundingMode == float_round_down ) ) |
@@ -689,8 +680,8 @@ static floatx80 | |||
689 | || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) ); | 680 | || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) ); |
690 | shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 ); | 681 | shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 ); |
691 | zExp = 0; | 682 | zExp = 0; |
692 | if ( isTiny && zSig1 ) float_raise( float_flag_underflow ); | 683 | if ( isTiny && zSig1 ) roundData->exception |= float_flag_underflow; |
693 | if ( zSig1 ) float_exception_flags |= float_flag_inexact; | 684 | if ( zSig1 ) roundData->exception |= float_flag_inexact; |
694 | if ( roundNearestEven ) { | 685 | if ( roundNearestEven ) { |
695 | increment = ( (sbits64) zSig1 < 0 ); | 686 | increment = ( (sbits64) zSig1 < 0 ); |
696 | } | 687 | } |
@@ -710,7 +701,7 @@ static floatx80 | |||
710 | return packFloatx80( zSign, zExp, zSig0 ); | 701 | return packFloatx80( zSign, zExp, zSig0 ); |
711 | } | 702 | } |
712 | } | 703 | } |
713 | if ( zSig1 ) float_exception_flags |= float_flag_inexact; | 704 | if ( zSig1 ) roundData->exception |= float_flag_inexact; |
714 | if ( increment ) { | 705 | if ( increment ) { |
715 | ++zSig0; | 706 | ++zSig0; |
716 | if ( zSig0 == 0 ) { | 707 | if ( zSig0 == 0 ) { |
@@ -740,7 +731,7 @@ normalized. | |||
740 | */ | 731 | */ |
741 | static floatx80 | 732 | static floatx80 |
742 | normalizeRoundAndPackFloatx80( | 733 | normalizeRoundAndPackFloatx80( |
743 | int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 | 734 | struct roundingData *roundData, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 |
744 | ) | 735 | ) |
745 | { | 736 | { |
746 | int8 shiftCount; | 737 | int8 shiftCount; |
@@ -754,7 +745,7 @@ static floatx80 | |||
754 | shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); | 745 | shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); |
755 | zExp -= shiftCount; | 746 | zExp -= shiftCount; |
756 | return | 747 | return |
757 | roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 ); | 748 | roundAndPackFloatx80( roundData, zSign, zExp, zSig0, zSig1 ); |
758 | 749 | ||
759 | } | 750 | } |
760 | 751 | ||
@@ -767,14 +758,14 @@ the single-precision floating-point format. The conversion is performed | |||
767 | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. | 758 | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. |
768 | ------------------------------------------------------------------------------- | 759 | ------------------------------------------------------------------------------- |
769 | */ | 760 | */ |
770 | float32 int32_to_float32( int32 a ) | 761 | float32 int32_to_float32(struct roundingData *roundData, int32 a) |
771 | { | 762 | { |
772 | flag zSign; | 763 | flag zSign; |
773 | 764 | ||
774 | if ( a == 0 ) return 0; | 765 | if ( a == 0 ) return 0; |
775 | if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 ); | 766 | if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 ); |
776 | zSign = ( a < 0 ); | 767 | zSign = ( a < 0 ); |
777 | return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a ); | 768 | return normalizeRoundAndPackFloat32( roundData, zSign, 0x9C, zSign ? - a : a ); |
778 | 769 | ||
779 | } | 770 | } |
780 | 771 | ||
@@ -840,7 +831,7 @@ positive integer is returned. Otherwise, if the conversion overflows, the | |||
840 | largest integer with the same sign as `a' is returned. | 831 | largest integer with the same sign as `a' is returned. |
841 | ------------------------------------------------------------------------------- | 832 | ------------------------------------------------------------------------------- |
842 | */ | 833 | */ |
843 | int32 float32_to_int32( float32 a ) | 834 | int32 float32_to_int32( struct roundingData *roundData, float32 a ) |
844 | { | 835 | { |
845 | flag aSign; | 836 | flag aSign; |
846 | int16 aExp, shiftCount; | 837 | int16 aExp, shiftCount; |
@@ -856,7 +847,7 @@ int32 float32_to_int32( float32 a ) | |||
856 | zSig = aSig; | 847 | zSig = aSig; |
857 | zSig <<= 32; | 848 | zSig <<= 32; |
858 | if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig ); | 849 | if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig ); |
859 | return roundAndPackInt32( aSign, zSig ); | 850 | return roundAndPackInt32( roundData, aSign, zSig ); |
860 | 851 | ||
861 | } | 852 | } |
862 | 853 | ||
@@ -889,13 +880,13 @@ int32 float32_to_int32_round_to_zero( float32 a ) | |||
889 | return 0x80000000; | 880 | return 0x80000000; |
890 | } | 881 | } |
891 | else if ( aExp <= 0x7E ) { | 882 | else if ( aExp <= 0x7E ) { |
892 | if ( aExp | aSig ) float_exception_flags |= float_flag_inexact; | 883 | if ( aExp | aSig ) float_raise( float_flag_inexact ); |
893 | return 0; | 884 | return 0; |
894 | } | 885 | } |
895 | aSig = ( aSig | 0x00800000 )<<8; | 886 | aSig = ( aSig | 0x00800000 )<<8; |
896 | z = aSig>>( - shiftCount ); | 887 | z = aSig>>( - shiftCount ); |
897 | if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) { | 888 | if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) { |
898 | float_exception_flags |= float_flag_inexact; | 889 | float_raise( float_flag_inexact ); |
899 | } | 890 | } |
900 | return aSign ? - z : z; | 891 | return aSign ? - z : z; |
901 | 892 | ||
@@ -973,7 +964,7 @@ operation is performed according to the IEC/IEEE Standard for Binary | |||
973 | Floating-point Arithmetic. | 964 | Floating-point Arithmetic. |
974 | ------------------------------------------------------------------------------- | 965 | ------------------------------------------------------------------------------- |
975 | */ | 966 | */ |
976 | float32 float32_round_to_int( float32 a ) | 967 | float32 float32_round_to_int( struct roundingData *roundData, float32 a ) |
977 | { | 968 | { |
978 | flag aSign; | 969 | flag aSign; |
979 | int16 aExp; | 970 | int16 aExp; |
@@ -988,11 +979,12 @@ float32 float32_round_to_int( float32 a ) | |||
988 | } | 979 | } |
989 | return a; | 980 | return a; |
990 | } | 981 | } |
982 | roundingMode = roundData->mode; | ||
991 | if ( aExp <= 0x7E ) { | 983 | if ( aExp <= 0x7E ) { |
992 | if ( (bits32) ( a<<1 ) == 0 ) return a; | 984 | if ( (bits32) ( a<<1 ) == 0 ) return a; |
993 | float_exception_flags |= float_flag_inexact; | 985 | roundData->exception |= float_flag_inexact; |
994 | aSign = extractFloat32Sign( a ); | 986 | aSign = extractFloat32Sign( a ); |
995 | switch ( float_rounding_mode ) { | 987 | switch ( roundingMode ) { |
996 | case float_round_nearest_even: | 988 | case float_round_nearest_even: |
997 | if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { | 989 | if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { |
998 | return packFloat32( aSign, 0x7F, 0 ); | 990 | return packFloat32( aSign, 0x7F, 0 ); |
@@ -1009,7 +1001,6 @@ float32 float32_round_to_int( float32 a ) | |||
1009 | lastBitMask <<= 0x96 - aExp; | 1001 | lastBitMask <<= 0x96 - aExp; |
1010 | roundBitsMask = lastBitMask - 1; | 1002 | roundBitsMask = lastBitMask - 1; |
1011 | z = a; | 1003 | z = a; |
1012 | roundingMode = float_rounding_mode; | ||
1013 | if ( roundingMode == float_round_nearest_even ) { | 1004 | if ( roundingMode == float_round_nearest_even ) { |
1014 | z += lastBitMask>>1; | 1005 | z += lastBitMask>>1; |
1015 | if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; | 1006 | if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; |
@@ -1020,7 +1011,7 @@ float32 float32_round_to_int( float32 a ) | |||
1020 | } | 1011 | } |
1021 | } | 1012 | } |
1022 | z &= ~ roundBitsMask; | 1013 | z &= ~ roundBitsMask; |
1023 | if ( z != a ) float_exception_flags |= float_flag_inexact; | 1014 | if ( z != a ) roundData->exception |= float_flag_inexact; |
1024 | return z; | 1015 | return z; |
1025 | 1016 | ||
1026 | } | 1017 | } |
@@ -1034,7 +1025,7 @@ addition is performed according to the IEC/IEEE Standard for Binary | |||
1034 | Floating-point Arithmetic. | 1025 | Floating-point Arithmetic. |
1035 | ------------------------------------------------------------------------------- | 1026 | ------------------------------------------------------------------------------- |
1036 | */ | 1027 | */ |
1037 | static float32 addFloat32Sigs( float32 a, float32 b, flag zSign ) | 1028 | static float32 addFloat32Sigs( struct roundingData *roundData, float32 a, float32 b, flag zSign ) |
1038 | { | 1029 | { |
1039 | int16 aExp, bExp, zExp; | 1030 | int16 aExp, bExp, zExp; |
1040 | bits32 aSig, bSig, zSig; | 1031 | bits32 aSig, bSig, zSig; |
@@ -1093,7 +1084,7 @@ static float32 addFloat32Sigs( float32 a, float32 b, flag zSign ) | |||
1093 | ++zExp; | 1084 | ++zExp; |
1094 | } | 1085 | } |
1095 | roundAndPack: | 1086 | roundAndPack: |
1096 | return roundAndPackFloat32( zSign, zExp, zSig ); | 1087 | return roundAndPackFloat32( roundData, zSign, zExp, zSig ); |
1097 | 1088 | ||
1098 | } | 1089 | } |
1099 | 1090 | ||
@@ -1106,7 +1097,7 @@ result is a NaN. The subtraction is performed according to the IEC/IEEE | |||
1106 | Standard for Binary Floating-point Arithmetic. | 1097 | Standard for Binary Floating-point Arithmetic. |
1107 | ------------------------------------------------------------------------------- | 1098 | ------------------------------------------------------------------------------- |
1108 | */ | 1099 | */ |
1109 | static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) | 1100 | static float32 subFloat32Sigs( struct roundingData *roundData, float32 a, float32 b, flag zSign ) |
1110 | { | 1101 | { |
1111 | int16 aExp, bExp, zExp; | 1102 | int16 aExp, bExp, zExp; |
1112 | bits32 aSig, bSig, zSig; | 1103 | bits32 aSig, bSig, zSig; |
@@ -1123,7 +1114,7 @@ static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) | |||
1123 | if ( expDiff < 0 ) goto bExpBigger; | 1114 | if ( expDiff < 0 ) goto bExpBigger; |
1124 | if ( aExp == 0xFF ) { | 1115 | if ( aExp == 0xFF ) { |
1125 | if ( aSig | bSig ) return propagateFloat32NaN( a, b ); | 1116 | if ( aSig | bSig ) return propagateFloat32NaN( a, b ); |
1126 | float_raise( float_flag_invalid ); | 1117 | roundData->exception |= float_flag_invalid; |
1127 | return float32_default_nan; | 1118 | return float32_default_nan; |
1128 | } | 1119 | } |
1129 | if ( aExp == 0 ) { | 1120 | if ( aExp == 0 ) { |
@@ -1132,7 +1123,7 @@ static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) | |||
1132 | } | 1123 | } |
1133 | if ( bSig < aSig ) goto aBigger; | 1124 | if ( bSig < aSig ) goto aBigger; |
1134 | if ( aSig < bSig ) goto bBigger; | 1125 | if ( aSig < bSig ) goto bBigger; |
1135 | return packFloat32( float_rounding_mode == float_round_down, 0, 0 ); | 1126 | return packFloat32( roundData->mode == float_round_down, 0, 0 ); |
1136 | bExpBigger: | 1127 | bExpBigger: |
1137 | if ( bExp == 0xFF ) { | 1128 | if ( bExp == 0xFF ) { |
1138 | if ( bSig ) return propagateFloat32NaN( a, b ); | 1129 | if ( bSig ) return propagateFloat32NaN( a, b ); |
@@ -1169,7 +1160,7 @@ static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) | |||
1169 | zExp = aExp; | 1160 | zExp = aExp; |
1170 | normalizeRoundAndPack: | 1161 | normalizeRoundAndPack: |
1171 | --zExp; | 1162 | --zExp; |
1172 | return normalizeRoundAndPackFloat32( zSign, zExp, zSig ); | 1163 | return normalizeRoundAndPackFloat32( roundData, zSign, zExp, zSig ); |
1173 | 1164 | ||
1174 | } | 1165 | } |
1175 | 1166 | ||
@@ -1180,17 +1171,17 @@ and `b'. The operation is performed according to the IEC/IEEE Standard for | |||
1180 | Binary Floating-point Arithmetic. | 1171 | Binary Floating-point Arithmetic. |
1181 | ------------------------------------------------------------------------------- | 1172 | ------------------------------------------------------------------------------- |
1182 | */ | 1173 | */ |
1183 | float32 float32_add( float32 a, float32 b ) | 1174 | float32 float32_add( struct roundingData *roundData, float32 a, float32 b ) |
1184 | { | 1175 | { |
1185 | flag aSign, bSign; | 1176 | flag aSign, bSign; |
1186 | 1177 | ||
1187 | aSign = extractFloat32Sign( a ); | 1178 | aSign = extractFloat32Sign( a ); |
1188 | bSign = extractFloat32Sign( b ); | 1179 | bSign = extractFloat32Sign( b ); |
1189 | if ( aSign == bSign ) { | 1180 | if ( aSign == bSign ) { |
1190 | return addFloat32Sigs( a, b, aSign ); | 1181 | return addFloat32Sigs( roundData, a, b, aSign ); |
1191 | } | 1182 | } |
1192 | else { | 1183 | else { |
1193 | return subFloat32Sigs( a, b, aSign ); | 1184 | return subFloat32Sigs( roundData, a, b, aSign ); |
1194 | } | 1185 | } |
1195 | 1186 | ||
1196 | } | 1187 | } |
@@ -1202,17 +1193,17 @@ Returns the result of subtracting the single-precision floating-point values | |||
1202 | for Binary Floating-point Arithmetic. | 1193 | for Binary Floating-point Arithmetic. |
1203 | ------------------------------------------------------------------------------- | 1194 | ------------------------------------------------------------------------------- |
1204 | */ | 1195 | */ |
1205 | float32 float32_sub( float32 a, float32 b ) | 1196 | float32 float32_sub( struct roundingData *roundData, float32 a, float32 b ) |
1206 | { | 1197 | { |
1207 | flag aSign, bSign; | 1198 | flag aSign, bSign; |
1208 | 1199 | ||
1209 | aSign = extractFloat32Sign( a ); | 1200 | aSign = extractFloat32Sign( a ); |
1210 | bSign = extractFloat32Sign( b ); | 1201 | bSign = extractFloat32Sign( b ); |
1211 | if ( aSign == bSign ) { | 1202 | if ( aSign == bSign ) { |
1212 | return subFloat32Sigs( a, b, aSign ); | 1203 | return subFloat32Sigs( roundData, a, b, aSign ); |
1213 | } | 1204 | } |
1214 | else { | 1205 | else { |
1215 | return addFloat32Sigs( a, b, aSign ); | 1206 | return addFloat32Sigs( roundData, a, b, aSign ); |
1216 | } | 1207 | } |
1217 | 1208 | ||
1218 | } | 1209 | } |
@@ -1224,7 +1215,7 @@ Returns the result of multiplying the single-precision floating-point values | |||
1224 | for Binary Floating-point Arithmetic. | 1215 | for Binary Floating-point Arithmetic. |
1225 | ------------------------------------------------------------------------------- | 1216 | ------------------------------------------------------------------------------- |
1226 | */ | 1217 | */ |
1227 | float32 float32_mul( float32 a, float32 b ) | 1218 | float32 float32_mul( struct roundingData *roundData, float32 a, float32 b ) |
1228 | { | 1219 | { |
1229 | flag aSign, bSign, zSign; | 1220 | flag aSign, bSign, zSign; |
1230 | int16 aExp, bExp, zExp; | 1221 | int16 aExp, bExp, zExp; |
@@ -1244,7 +1235,7 @@ float32 float32_mul( float32 a, float32 b ) | |||
1244 | return propagateFloat32NaN( a, b ); | 1235 | return propagateFloat32NaN( a, b ); |
1245 | } | 1236 | } |
1246 | if ( ( bExp | bSig ) == 0 ) { | 1237 | if ( ( bExp | bSig ) == 0 ) { |
1247 | float_raise( float_flag_invalid ); | 1238 | roundData->exception |= float_flag_invalid; |
1248 | return float32_default_nan; | 1239 | return float32_default_nan; |
1249 | } | 1240 | } |
1250 | return packFloat32( zSign, 0xFF, 0 ); | 1241 | return packFloat32( zSign, 0xFF, 0 ); |
@@ -1252,7 +1243,7 @@ float32 float32_mul( float32 a, float32 b ) | |||
1252 | if ( bExp == 0xFF ) { | 1243 | if ( bExp == 0xFF ) { |
1253 | if ( bSig ) return propagateFloat32NaN( a, b ); | 1244 | if ( bSig ) return propagateFloat32NaN( a, b ); |
1254 | if ( ( aExp | aSig ) == 0 ) { | 1245 | if ( ( aExp | aSig ) == 0 ) { |
1255 | float_raise( float_flag_invalid ); | 1246 | roundData->exception |= float_flag_invalid; |
1256 | return float32_default_nan; | 1247 | return float32_default_nan; |
1257 | } | 1248 | } |
1258 | return packFloat32( zSign, 0xFF, 0 ); | 1249 | return packFloat32( zSign, 0xFF, 0 ); |
@@ -1274,7 +1265,7 @@ float32 float32_mul( float32 a, float32 b ) | |||
1274 | zSig <<= 1; | 1265 | zSig <<= 1; |
1275 | --zExp; | 1266 | --zExp; |
1276 | } | 1267 | } |
1277 | return roundAndPackFloat32( zSign, zExp, zSig ); | 1268 | return roundAndPackFloat32( roundData, zSign, zExp, zSig ); |
1278 | 1269 | ||
1279 | } | 1270 | } |
1280 | 1271 | ||
@@ -1285,7 +1276,7 @@ by the corresponding value `b'. The operation is performed according to the | |||
1285 | IEC/IEEE Standard for Binary Floating-point Arithmetic. | 1276 | IEC/IEEE Standard for Binary Floating-point Arithmetic. |
1286 | ------------------------------------------------------------------------------- | 1277 | ------------------------------------------------------------------------------- |
1287 | */ | 1278 | */ |
1288 | float32 float32_div( float32 a, float32 b ) | 1279 | float32 float32_div( struct roundingData *roundData, float32 a, float32 b ) |
1289 | { | 1280 | { |
1290 | flag aSign, bSign, zSign; | 1281 | flag aSign, bSign, zSign; |
1291 | int16 aExp, bExp, zExp; | 1282 | int16 aExp, bExp, zExp; |
@@ -1302,7 +1293,7 @@ float32 float32_div( float32 a, float32 b ) | |||
1302 | if ( aSig ) return propagateFloat32NaN( a, b ); | 1293 | if ( aSig ) return propagateFloat32NaN( a, b ); |
1303 | if ( bExp == 0xFF ) { | 1294 | if ( bExp == 0xFF ) { |
1304 | if ( bSig ) return propagateFloat32NaN( a, b ); | 1295 | if ( bSig ) return propagateFloat32NaN( a, b ); |
1305 | float_raise( float_flag_invalid ); | 1296 | roundData->exception |= float_flag_invalid; |
1306 | return float32_default_nan; | 1297 | return float32_default_nan; |
1307 | } | 1298 | } |
1308 | return packFloat32( zSign, 0xFF, 0 ); | 1299 | return packFloat32( zSign, 0xFF, 0 ); |
@@ -1314,10 +1305,10 @@ float32 float32_div( float32 a, float32 b ) | |||
1314 | if ( bExp == 0 ) { | 1305 | if ( bExp == 0 ) { |
1315 | if ( bSig == 0 ) { | 1306 | if ( bSig == 0 ) { |
1316 | if ( ( aExp | aSig ) == 0 ) { | 1307 | if ( ( aExp | aSig ) == 0 ) { |
1317 | float_raise( float_flag_invalid ); | 1308 | roundData->exception |= float_flag_invalid; |
1318 | return float32_default_nan; | 1309 | return float32_default_nan; |
1319 | } | 1310 | } |
1320 | float_raise( float_flag_divbyzero ); | 1311 | roundData->exception |= float_flag_divbyzero; |
1321 | return packFloat32( zSign, 0xFF, 0 ); | 1312 | return packFloat32( zSign, 0xFF, 0 ); |
1322 | } | 1313 | } |
1323 | normalizeFloat32Subnormal( bSig, &bExp, &bSig ); | 1314 | normalizeFloat32Subnormal( bSig, &bExp, &bSig ); |
@@ -1341,7 +1332,7 @@ float32 float32_div( float32 a, float32 b ) | |||
1341 | if ( ( zSig & 0x3F ) == 0 ) { | 1332 | if ( ( zSig & 0x3F ) == 0 ) { |
1342 | zSig |= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<32 ); | 1333 | zSig |= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<32 ); |
1343 | } | 1334 | } |
1344 | return roundAndPackFloat32( zSign, zExp, zSig ); | 1335 | return roundAndPackFloat32( roundData, zSign, zExp, zSig ); |
1345 | 1336 | ||
1346 | } | 1337 | } |
1347 | 1338 | ||
@@ -1352,7 +1343,7 @@ with respect to the corresponding value `b'. The operation is performed | |||
1352 | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. | 1343 | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. |
1353 | ------------------------------------------------------------------------------- | 1344 | ------------------------------------------------------------------------------- |
1354 | */ | 1345 | */ |
1355 | float32 float32_rem( float32 a, float32 b ) | 1346 | float32 float32_rem( struct roundingData *roundData, float32 a, float32 b ) |
1356 | { | 1347 | { |
1357 | flag aSign, bSign, zSign; | 1348 | flag aSign, bSign, zSign; |
1358 | int16 aExp, bExp, expDiff; | 1349 | int16 aExp, bExp, expDiff; |
@@ -1372,7 +1363,7 @@ float32 float32_rem( float32 a, float32 b ) | |||
1372 | if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { | 1363 | if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { |
1373 | return propagateFloat32NaN( a, b ); | 1364 | return propagateFloat32NaN( a, b ); |
1374 | } | 1365 | } |
1375 | float_raise( float_flag_invalid ); | 1366 | roundData->exception |= float_flag_invalid; |
1376 | return float32_default_nan; | 1367 | return float32_default_nan; |
1377 | } | 1368 | } |
1378 | if ( bExp == 0xFF ) { | 1369 | if ( bExp == 0xFF ) { |
@@ -1381,7 +1372,7 @@ float32 float32_rem( float32 a, float32 b ) | |||
1381 | } | 1372 | } |
1382 | if ( bExp == 0 ) { | 1373 | if ( bExp == 0 ) { |
1383 | if ( bSig == 0 ) { | 1374 | if ( bSig == 0 ) { |
1384 | float_raise( float_flag_invalid ); | 1375 | roundData->exception |= float_flag_invalid; |
1385 | return float32_default_nan; | 1376 | return float32_default_nan; |
1386 | } | 1377 | } |
1387 | normalizeFloat32Subnormal( bSig, &bExp, &bSig ); | 1378 | normalizeFloat32Subnormal( bSig, &bExp, &bSig ); |
@@ -1444,7 +1435,7 @@ float32 float32_rem( float32 a, float32 b ) | |||
1444 | } | 1435 | } |
1445 | zSign = ( (sbits32) aSig < 0 ); | 1436 | zSign = ( (sbits32) aSig < 0 ); |
1446 | if ( zSign ) aSig = - aSig; | 1437 | if ( zSign ) aSig = - aSig; |
1447 | return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig ); | 1438 | return normalizeRoundAndPackFloat32( roundData, aSign ^ zSign, bExp, aSig ); |
1448 | 1439 | ||
1449 | } | 1440 | } |
1450 | 1441 | ||
@@ -1455,7 +1446,7 @@ The operation is performed according to the IEC/IEEE Standard for Binary | |||
1455 | Floating-point Arithmetic. | 1446 | Floating-point Arithmetic. |
1456 | ------------------------------------------------------------------------------- | 1447 | ------------------------------------------------------------------------------- |
1457 | */ | 1448 | */ |
1458 | float32 float32_sqrt( float32 a ) | 1449 | float32 float32_sqrt( struct roundingData *roundData, float32 a ) |
1459 | { | 1450 | { |
1460 | flag aSign; | 1451 | flag aSign; |
1461 | int16 aExp, zExp; | 1452 | int16 aExp, zExp; |
@@ -1468,12 +1459,12 @@ float32 float32_sqrt( float32 a ) | |||
1468 | if ( aExp == 0xFF ) { | 1459 | if ( aExp == 0xFF ) { |
1469 | if ( aSig ) return propagateFloat32NaN( a, 0 ); | 1460 | if ( aSig ) return propagateFloat32NaN( a, 0 ); |
1470 | if ( ! aSign ) return a; | 1461 | if ( ! aSign ) return a; |
1471 | float_raise( float_flag_invalid ); | 1462 | roundData->exception |= float_flag_invalid; |
1472 | return float32_default_nan; | 1463 | return float32_default_nan; |
1473 | } | 1464 | } |
1474 | if ( aSign ) { | 1465 | if ( aSign ) { |
1475 | if ( ( aExp | aSig ) == 0 ) return a; | 1466 | if ( ( aExp | aSig ) == 0 ) return a; |
1476 | float_raise( float_flag_invalid ); | 1467 | roundData->exception |= float_flag_invalid; |
1477 | return float32_default_nan; | 1468 | return float32_default_nan; |
1478 | } | 1469 | } |
1479 | if ( aExp == 0 ) { | 1470 | if ( aExp == 0 ) { |
@@ -1499,7 +1490,7 @@ float32 float32_sqrt( float32 a ) | |||
1499 | } | 1490 | } |
1500 | } | 1491 | } |
1501 | shift32RightJamming( zSig, 1, &zSig ); | 1492 | shift32RightJamming( zSig, 1, &zSig ); |
1502 | return roundAndPackFloat32( 0, zExp, zSig ); | 1493 | return roundAndPackFloat32( roundData, 0, zExp, zSig ); |
1503 | 1494 | ||
1504 | } | 1495 | } |
1505 | 1496 | ||
@@ -1661,7 +1652,7 @@ positive integer is returned. Otherwise, if the conversion overflows, the | |||
1661 | largest integer with the same sign as `a' is returned. | 1652 | largest integer with the same sign as `a' is returned. |
1662 | ------------------------------------------------------------------------------- | 1653 | ------------------------------------------------------------------------------- |
1663 | */ | 1654 | */ |
1664 | int32 float64_to_int32( float64 a ) | 1655 | int32 float64_to_int32( struct roundingData *roundData, float64 a ) |
1665 | { | 1656 | { |
1666 | flag aSign; | 1657 | flag aSign; |
1667 | int16 aExp, shiftCount; | 1658 | int16 aExp, shiftCount; |
@@ -1674,7 +1665,7 @@ int32 float64_to_int32( float64 a ) | |||
1674 | if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); | 1665 | if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); |
1675 | shiftCount = 0x42C - aExp; | 1666 | shiftCount = 0x42C - aExp; |
1676 | if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); | 1667 | if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); |
1677 | return roundAndPackInt32( aSign, aSig ); | 1668 | return roundAndPackInt32( roundData, aSign, aSig ); |
1678 | 1669 | ||
1679 | } | 1670 | } |
1680 | 1671 | ||
@@ -1705,7 +1696,7 @@ int32 float64_to_int32_round_to_zero( float64 a ) | |||
1705 | goto invalid; | 1696 | goto invalid; |
1706 | } | 1697 | } |
1707 | else if ( 52 < shiftCount ) { | 1698 | else if ( 52 < shiftCount ) { |
1708 | if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; | 1699 | if ( aExp || aSig ) float_raise( float_flag_inexact ); |
1709 | return 0; | 1700 | return 0; |
1710 | } | 1701 | } |
1711 | aSig |= LIT64( 0x0010000000000000 ); | 1702 | aSig |= LIT64( 0x0010000000000000 ); |
@@ -1715,11 +1706,11 @@ int32 float64_to_int32_round_to_zero( float64 a ) | |||
1715 | if ( aSign ) z = - z; | 1706 | if ( aSign ) z = - z; |
1716 | if ( ( z < 0 ) ^ aSign ) { | 1707 | if ( ( z < 0 ) ^ aSign ) { |
1717 | invalid: | 1708 | invalid: |
1718 | float_exception_flags |= float_flag_invalid; | 1709 | float_raise( float_flag_invalid ); |
1719 | return aSign ? 0x80000000 : 0x7FFFFFFF; | 1710 | return aSign ? 0x80000000 : 0x7FFFFFFF; |
1720 | } | 1711 | } |
1721 | if ( ( aSig<<shiftCount ) != savedASig ) { | 1712 | if ( ( aSig<<shiftCount ) != savedASig ) { |
1722 | float_exception_flags |= float_flag_inexact; | 1713 | float_raise( float_flag_inexact ); |
1723 | } | 1714 | } |
1724 | return z; | 1715 | return z; |
1725 | 1716 | ||
@@ -1736,7 +1727,7 @@ positive integer is returned. Otherwise, if the conversion overflows, the | |||
1736 | largest positive integer is returned. | 1727 | largest positive integer is returned. |
1737 | ------------------------------------------------------------------------------- | 1728 | ------------------------------------------------------------------------------- |
1738 | */ | 1729 | */ |
1739 | int32 float64_to_uint32( float64 a ) | 1730 | int32 float64_to_uint32( struct roundingData *roundData, float64 a ) |
1740 | { | 1731 | { |
1741 | flag aSign; | 1732 | flag aSign; |
1742 | int16 aExp, shiftCount; | 1733 | int16 aExp, shiftCount; |
@@ -1749,7 +1740,7 @@ int32 float64_to_uint32( float64 a ) | |||
1749 | if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); | 1740 | if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); |
1750 | shiftCount = 0x42C - aExp; | 1741 | shiftCount = 0x42C - aExp; |
1751 | if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); | 1742 | if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); |
1752 | return roundAndPackInt32( aSign, aSig ); | 1743 | return roundAndPackInt32( roundData, aSign, aSig ); |
1753 | } | 1744 | } |
1754 | 1745 | ||
1755 | /* | 1746 | /* |
@@ -1778,7 +1769,7 @@ int32 float64_to_uint32_round_to_zero( float64 a ) | |||
1778 | goto invalid; | 1769 | goto invalid; |
1779 | } | 1770 | } |
1780 | else if ( 52 < shiftCount ) { | 1771 | else if ( 52 < shiftCount ) { |
1781 | if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; | 1772 | if ( aExp || aSig ) float_raise( float_flag_inexact ); |
1782 | return 0; | 1773 | return 0; |
1783 | } | 1774 | } |
1784 | aSig |= LIT64( 0x0010000000000000 ); | 1775 | aSig |= LIT64( 0x0010000000000000 ); |
@@ -1788,11 +1779,11 @@ int32 float64_to_uint32_round_to_zero( float64 a ) | |||
1788 | if ( aSign ) z = - z; | 1779 | if ( aSign ) z = - z; |
1789 | if ( ( z < 0 ) ^ aSign ) { | 1780 | if ( ( z < 0 ) ^ aSign ) { |
1790 | invalid: | 1781 | invalid: |
1791 | float_exception_flags |= float_flag_invalid; | 1782 | float_raise( float_flag_invalid ); |
1792 | return aSign ? 0x80000000 : 0x7FFFFFFF; | 1783 | return aSign ? 0x80000000 : 0x7FFFFFFF; |
1793 | } | 1784 | } |
1794 | if ( ( aSig<<shiftCount ) != savedASig ) { | 1785 | if ( ( aSig<<shiftCount ) != savedASig ) { |
1795 | float_exception_flags |= float_flag_inexact; | 1786 | float_raise( float_flag_inexact ); |
1796 | } | 1787 | } |
1797 | return z; | 1788 | return z; |
1798 | } | 1789 | } |
@@ -1805,7 +1796,7 @@ performed according to the IEC/IEEE Standard for Binary Floating-point | |||
1805 | Arithmetic. | 1796 | Arithmetic. |
1806 | ------------------------------------------------------------------------------- | 1797 | ------------------------------------------------------------------------------- |
1807 | */ | 1798 | */ |
1808 | float32 float64_to_float32( float64 a ) | 1799 | float32 float64_to_float32( struct roundingData *roundData, float64 a ) |
1809 | { | 1800 | { |
1810 | flag aSign; | 1801 | flag aSign; |
1811 | int16 aExp; | 1802 | int16 aExp; |
@@ -1825,7 +1816,7 @@ float32 float64_to_float32( float64 a ) | |||
1825 | zSig |= 0x40000000; | 1816 | zSig |= 0x40000000; |
1826 | aExp -= 0x381; | 1817 | aExp -= 0x381; |
1827 | } | 1818 | } |
1828 | return roundAndPackFloat32( aSign, aExp, zSig ); | 1819 | return roundAndPackFloat32( roundData, aSign, aExp, zSig ); |
1829 | 1820 | ||
1830 | } | 1821 | } |
1831 | 1822 | ||
@@ -1872,7 +1863,7 @@ operation is performed according to the IEC/IEEE Standard for Binary | |||
1872 | Floating-point Arithmetic. | 1863 | Floating-point Arithmetic. |
1873 | ------------------------------------------------------------------------------- | 1864 | ------------------------------------------------------------------------------- |
1874 | */ | 1865 | */ |
1875 | float64 float64_round_to_int( float64 a ) | 1866 | float64 float64_round_to_int( struct roundingData *roundData, float64 a ) |
1876 | { | 1867 | { |
1877 | flag aSign; | 1868 | flag aSign; |
1878 | int16 aExp; | 1869 | int16 aExp; |
@@ -1889,9 +1880,9 @@ float64 float64_round_to_int( float64 a ) | |||
1889 | } | 1880 | } |
1890 | if ( aExp <= 0x3FE ) { | 1881 | if ( aExp <= 0x3FE ) { |
1891 | if ( (bits64) ( a<<1 ) == 0 ) return a; | 1882 | if ( (bits64) ( a<<1 ) == 0 ) return a; |
1892 | float_exception_flags |= float_flag_inexact; | 1883 | roundData->exception |= float_flag_inexact; |
1893 | aSign = extractFloat64Sign( a ); | 1884 | aSign = extractFloat64Sign( a ); |
1894 | switch ( float_rounding_mode ) { | 1885 | switch ( roundData->mode ) { |
1895 | case float_round_nearest_even: | 1886 | case float_round_nearest_even: |
1896 | if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) { | 1887 | if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) { |
1897 | return packFloat64( aSign, 0x3FF, 0 ); | 1888 | return packFloat64( aSign, 0x3FF, 0 ); |
@@ -1909,7 +1900,7 @@ float64 float64_round_to_int( float64 a ) | |||
1909 | lastBitMask <<= 0x433 - aExp; | 1900 | lastBitMask <<= 0x433 - aExp; |
1910 | roundBitsMask = lastBitMask - 1; | 1901 | roundBitsMask = lastBitMask - 1; |
1911 | z = a; | 1902 | z = a; |
1912 | roundingMode = float_rounding_mode; | 1903 | roundingMode = roundData->mode; |
1913 | if ( roundingMode == float_round_nearest_even ) { | 1904 | if ( roundingMode == float_round_nearest_even ) { |
1914 | z += lastBitMask>>1; | 1905 | z += lastBitMask>>1; |
1915 | if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; | 1906 | if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; |
@@ -1920,7 +1911,7 @@ float64 float64_round_to_int( float64 a ) | |||
1920 | } | 1911 | } |
1921 | } | 1912 | } |
1922 | z &= ~ roundBitsMask; | 1913 | z &= ~ roundBitsMask; |
1923 | if ( z != a ) float_exception_flags |= float_flag_inexact; | 1914 | if ( z != a ) roundData->exception |= float_flag_inexact; |
1924 | return z; | 1915 | return z; |
1925 | 1916 | ||
1926 | } | 1917 | } |
@@ -1934,7 +1925,7 @@ addition is performed according to the IEC/IEEE Standard for Binary | |||
1934 | Floating-point Arithmetic. | 1925 | Floating-point Arithmetic. |
1935 | ------------------------------------------------------------------------------- | 1926 | ------------------------------------------------------------------------------- |
1936 | */ | 1927 | */ |
1937 | static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) | 1928 | static float64 addFloat64Sigs( struct roundingData *roundData, float64 a, float64 b, flag zSign ) |
1938 | { | 1929 | { |
1939 | int16 aExp, bExp, zExp; | 1930 | int16 aExp, bExp, zExp; |
1940 | bits64 aSig, bSig, zSig; | 1931 | bits64 aSig, bSig, zSig; |
@@ -1993,7 +1984,7 @@ static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) | |||
1993 | ++zExp; | 1984 | ++zExp; |
1994 | } | 1985 | } |
1995 | roundAndPack: | 1986 | roundAndPack: |
1996 | return roundAndPackFloat64( zSign, zExp, zSig ); | 1987 | return roundAndPackFloat64( roundData, zSign, zExp, zSig ); |
1997 | 1988 | ||
1998 | } | 1989 | } |
1999 | 1990 | ||
@@ -2006,7 +1997,7 @@ result is a NaN. The subtraction is performed according to the IEC/IEEE | |||
2006 | Standard for Binary Floating-point Arithmetic. | 1997 | Standard for Binary Floating-point Arithmetic. |
2007 | ------------------------------------------------------------------------------- | 1998 | ------------------------------------------------------------------------------- |
2008 | */ | 1999 | */ |
2009 | static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) | 2000 | static float64 subFloat64Sigs( struct roundingData *roundData, float64 a, float64 b, flag zSign ) |
2010 | { | 2001 | { |
2011 | int16 aExp, bExp, zExp; | 2002 | int16 aExp, bExp, zExp; |
2012 | bits64 aSig, bSig, zSig; | 2003 | bits64 aSig, bSig, zSig; |
@@ -2023,7 +2014,7 @@ static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) | |||
2023 | if ( expDiff < 0 ) goto bExpBigger; | 2014 | if ( expDiff < 0 ) goto bExpBigger; |
2024 | if ( aExp == 0x7FF ) { | 2015 | if ( aExp == 0x7FF ) { |
2025 | if ( aSig | bSig ) return propagateFloat64NaN( a, b ); | 2016 | if ( aSig | bSig ) return propagateFloat64NaN( a, b ); |
2026 | float_raise( float_flag_invalid ); | 2017 | roundData->exception |= float_flag_invalid; |
2027 | return float64_default_nan; | 2018 | return float64_default_nan; |
2028 | } | 2019 | } |
2029 | if ( aExp == 0 ) { | 2020 | if ( aExp == 0 ) { |
@@ -2032,7 +2023,7 @@ static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) | |||
2032 | } | 2023 | } |
2033 | if ( bSig < aSig ) goto aBigger; | 2024 | if ( bSig < aSig ) goto aBigger; |
2034 | if ( aSig < bSig ) goto bBigger; | 2025 | if ( aSig < bSig ) goto bBigger; |
2035 | return packFloat64( float_rounding_mode == float_round_down, 0, 0 ); | 2026 | return packFloat64( roundData->mode == float_round_down, 0, 0 ); |
2036 | bExpBigger: | 2027 | bExpBigger: |
2037 | if ( bExp == 0x7FF ) { | 2028 | if ( bExp == 0x7FF ) { |
2038 | if ( bSig ) return propagateFloat64NaN( a, b ); | 2029 | if ( bSig ) return propagateFloat64NaN( a, b ); |
@@ -2069,7 +2060,7 @@ static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) | |||
2069 | zExp = aExp; | 2060 | zExp = aExp; |
2070 | normalizeRoundAndPack: | 2061 | normalizeRoundAndPack: |
2071 | --zExp; | 2062 | --zExp; |
2072 | return normalizeRoundAndPackFloat64( zSign, zExp, zSig ); | 2063 | return normalizeRoundAndPackFloat64( roundData, zSign, zExp, zSig ); |
2073 | 2064 | ||
2074 | } | 2065 | } |
2075 | 2066 | ||
@@ -2080,17 +2071,17 @@ and `b'. The operation is performed according to the IEC/IEEE Standard for | |||
2080 | Binary Floating-point Arithmetic. | 2071 | Binary Floating-point Arithmetic. |
2081 | ------------------------------------------------------------------------------- | 2072 | ------------------------------------------------------------------------------- |
2082 | */ | 2073 | */ |
2083 | float64 float64_add( float64 a, float64 b ) | 2074 | float64 float64_add( struct roundingData *roundData, float64 a, float64 b ) |
2084 | { | 2075 | { |
2085 | flag aSign, bSign; | 2076 | flag aSign, bSign; |
2086 | 2077 | ||
2087 | aSign = extractFloat64Sign( a ); | 2078 | aSign = extractFloat64Sign( a ); |
2088 | bSign = extractFloat64Sign( b ); | 2079 | bSign = extractFloat64Sign( b ); |
2089 | if ( aSign == bSign ) { | 2080 | if ( aSign == bSign ) { |
2090 | return addFloat64Sigs( a, b, aSign ); | 2081 | return addFloat64Sigs( roundData, a, b, aSign ); |
2091 | } | 2082 | } |
2092 | else { | 2083 | else { |
2093 | return subFloat64Sigs( a, b, aSign ); | 2084 | return subFloat64Sigs( roundData, a, b, aSign ); |
2094 | } | 2085 | } |
2095 | 2086 | ||
2096 | } | 2087 | } |
@@ -2102,17 +2093,17 @@ Returns the result of subtracting the double-precision floating-point values | |||
2102 | for Binary Floating-point Arithmetic. | 2093 | for Binary Floating-point Arithmetic. |
2103 | ------------------------------------------------------------------------------- | 2094 | ------------------------------------------------------------------------------- |
2104 | */ | 2095 | */ |
2105 | float64 float64_sub( float64 a, float64 b ) | 2096 | float64 float64_sub( struct roundingData *roundData, float64 a, float64 b ) |
2106 | { | 2097 | { |
2107 | flag aSign, bSign; | 2098 | flag aSign, bSign; |
2108 | 2099 | ||
2109 | aSign = extractFloat64Sign( a ); | 2100 | aSign = extractFloat64Sign( a ); |
2110 | bSign = extractFloat64Sign( b ); | 2101 | bSign = extractFloat64Sign( b ); |
2111 | if ( aSign == bSign ) { | 2102 | if ( aSign == bSign ) { |
2112 | return subFloat64Sigs( a, b, aSign ); | 2103 | return subFloat64Sigs( roundData, a, b, aSign ); |
2113 | } | 2104 | } |
2114 | else { | 2105 | else { |
2115 | return addFloat64Sigs( a, b, aSign ); | 2106 | return addFloat64Sigs( roundData, a, b, aSign ); |
2116 | } | 2107 | } |
2117 | 2108 | ||
2118 | } | 2109 | } |
@@ -2124,7 +2115,7 @@ Returns the result of multiplying the double-precision floating-point values | |||
2124 | for Binary Floating-point Arithmetic. | 2115 | for Binary Floating-point Arithmetic. |
2125 | ------------------------------------------------------------------------------- | 2116 | ------------------------------------------------------------------------------- |
2126 | */ | 2117 | */ |
2127 | float64 float64_mul( float64 a, float64 b ) | 2118 | float64 float64_mul( struct roundingData *roundData, float64 a, float64 b ) |
2128 | { | 2119 | { |
2129 | flag aSign, bSign, zSign; | 2120 | flag aSign, bSign, zSign; |
2130 | int16 aExp, bExp, zExp; | 2121 | int16 aExp, bExp, zExp; |
@@ -2142,7 +2133,7 @@ float64 float64_mul( float64 a, float64 b ) | |||
2142 | return propagateFloat64NaN( a, b ); | 2133 | return propagateFloat64NaN( a, b ); |
2143 | } | 2134 | } |
2144 | if ( ( bExp | bSig ) == 0 ) { | 2135 | if ( ( bExp | bSig ) == 0 ) { |
2145 | float_raise( float_flag_invalid ); | 2136 | roundData->exception |= float_flag_invalid; |
2146 | return float64_default_nan; | 2137 | return float64_default_nan; |
2147 | } | 2138 | } |
2148 | return packFloat64( zSign, 0x7FF, 0 ); | 2139 | return packFloat64( zSign, 0x7FF, 0 ); |
@@ -2150,7 +2141,7 @@ float64 float64_mul( float64 a, float64 b ) | |||
2150 | if ( bExp == 0x7FF ) { | 2141 | if ( bExp == 0x7FF ) { |
2151 | if ( bSig ) return propagateFloat64NaN( a, b ); | 2142 | if ( bSig ) return propagateFloat64NaN( a, b ); |
2152 | if ( ( aExp | aSig ) == 0 ) { | 2143 | if ( ( aExp | aSig ) == 0 ) { |
2153 | float_raise( float_flag_invalid ); | 2144 | roundData->exception |= float_flag_invalid; |
2154 | return float64_default_nan; | 2145 | return float64_default_nan; |
2155 | } | 2146 | } |
2156 | return packFloat64( zSign, 0x7FF, 0 ); | 2147 | return packFloat64( zSign, 0x7FF, 0 ); |
@@ -2172,7 +2163,7 @@ float64 float64_mul( float64 a, float64 b ) | |||
2172 | zSig0 <<= 1; | 2163 | zSig0 <<= 1; |
2173 | --zExp; | 2164 | --zExp; |
2174 | } | 2165 | } |
2175 | return roundAndPackFloat64( zSign, zExp, zSig0 ); | 2166 | return roundAndPackFloat64( roundData, zSign, zExp, zSig0 ); |
2176 | 2167 | ||
2177 | } | 2168 | } |
2178 | 2169 | ||
@@ -2183,7 +2174,7 @@ by the corresponding value `b'. The operation is performed according to | |||
2183 | the IEC/IEEE Standard for Binary Floating-point Arithmetic. | 2174 | the IEC/IEEE Standard for Binary Floating-point Arithmetic. |
2184 | ------------------------------------------------------------------------------- | 2175 | ------------------------------------------------------------------------------- |
2185 | */ | 2176 | */ |
2186 | float64 float64_div( float64 a, float64 b ) | 2177 | float64 float64_div( struct roundingData *roundData, float64 a, float64 b ) |
2187 | { | 2178 | { |
2188 | flag aSign, bSign, zSign; | 2179 | flag aSign, bSign, zSign; |
2189 | int16 aExp, bExp, zExp; | 2180 | int16 aExp, bExp, zExp; |
@@ -2202,7 +2193,7 @@ float64 float64_div( float64 a, float64 b ) | |||
2202 | if ( aSig ) return propagateFloat64NaN( a, b ); | 2193 | if ( aSig ) return propagateFloat64NaN( a, b ); |
2203 | if ( bExp == 0x7FF ) { | 2194 | if ( bExp == 0x7FF ) { |
2204 | if ( bSig ) return propagateFloat64NaN( a, b ); | 2195 | if ( bSig ) return propagateFloat64NaN( a, b ); |
2205 | float_raise( float_flag_invalid ); | 2196 | roundData->exception |= float_flag_invalid; |
2206 | return float64_default_nan; | 2197 | return float64_default_nan; |
2207 | } | 2198 | } |
2208 | return packFloat64( zSign, 0x7FF, 0 ); | 2199 | return packFloat64( zSign, 0x7FF, 0 ); |
@@ -2214,10 +2205,10 @@ float64 float64_div( float64 a, float64 b ) | |||
2214 | if ( bExp == 0 ) { | 2205 | if ( bExp == 0 ) { |
2215 | if ( bSig == 0 ) { | 2206 | if ( bSig == 0 ) { |
2216 | if ( ( aExp | aSig ) == 0 ) { | 2207 | if ( ( aExp | aSig ) == 0 ) { |
2217 | float_raise( float_flag_invalid ); | 2208 | roundData->exception |= float_flag_invalid; |
2218 | return float64_default_nan; | 2209 | return float64_default_nan; |
2219 | } | 2210 | } |
2220 | float_raise( float_flag_divbyzero ); | 2211 | roundData->exception |= float_flag_divbyzero; |
2221 | return packFloat64( zSign, 0x7FF, 0 ); | 2212 | return packFloat64( zSign, 0x7FF, 0 ); |
2222 | } | 2213 | } |
2223 | normalizeFloat64Subnormal( bSig, &bExp, &bSig ); | 2214 | normalizeFloat64Subnormal( bSig, &bExp, &bSig ); |
@@ -2243,7 +2234,7 @@ float64 float64_div( float64 a, float64 b ) | |||
2243 | } | 2234 | } |
2244 | zSig |= ( rem1 != 0 ); | 2235 | zSig |= ( rem1 != 0 ); |
2245 | } | 2236 | } |
2246 | return roundAndPackFloat64( zSign, zExp, zSig ); | 2237 | return roundAndPackFloat64( roundData, zSign, zExp, zSig ); |
2247 | 2238 | ||
2248 | } | 2239 | } |
2249 | 2240 | ||
@@ -2254,7 +2245,7 @@ with respect to the corresponding value `b'. The operation is performed | |||
2254 | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. | 2245 | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. |
2255 | ------------------------------------------------------------------------------- | 2246 | ------------------------------------------------------------------------------- |
2256 | */ | 2247 | */ |
2257 | float64 float64_rem( float64 a, float64 b ) | 2248 | float64 float64_rem( struct roundingData *roundData, float64 a, float64 b ) |
2258 | { | 2249 | { |
2259 | flag aSign, bSign, zSign; | 2250 | flag aSign, bSign, zSign; |
2260 | int16 aExp, bExp, expDiff; | 2251 | int16 aExp, bExp, expDiff; |
@@ -2272,7 +2263,7 @@ float64 float64_rem( float64 a, float64 b ) | |||
2272 | if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { | 2263 | if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { |
2273 | return propagateFloat64NaN( a, b ); | 2264 | return propagateFloat64NaN( a, b ); |
2274 | } | 2265 | } |
2275 | float_raise( float_flag_invalid ); | 2266 | roundData->exception |= float_flag_invalid; |
2276 | return float64_default_nan; | 2267 | return float64_default_nan; |
2277 | } | 2268 | } |
2278 | if ( bExp == 0x7FF ) { | 2269 | if ( bExp == 0x7FF ) { |
@@ -2281,7 +2272,7 @@ float64 float64_rem( float64 a, float64 b ) | |||
2281 | } | 2272 | } |
2282 | if ( bExp == 0 ) { | 2273 | if ( bExp == 0 ) { |
2283 | if ( bSig == 0 ) { | 2274 | if ( bSig == 0 ) { |
2284 | float_raise( float_flag_invalid ); | 2275 | roundData->exception |= float_flag_invalid; |
2285 | return float64_default_nan; | 2276 | return float64_default_nan; |
2286 | } | 2277 | } |
2287 | normalizeFloat64Subnormal( bSig, &bExp, &bSig ); | 2278 | normalizeFloat64Subnormal( bSig, &bExp, &bSig ); |
@@ -2329,7 +2320,7 @@ float64 float64_rem( float64 a, float64 b ) | |||
2329 | } | 2320 | } |
2330 | zSign = ( (sbits64) aSig < 0 ); | 2321 | zSign = ( (sbits64) aSig < 0 ); |
2331 | if ( zSign ) aSig = - aSig; | 2322 | if ( zSign ) aSig = - aSig; |
2332 | return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig ); | 2323 | return normalizeRoundAndPackFloat64( roundData, aSign ^ zSign, bExp, aSig ); |
2333 | 2324 | ||
2334 | } | 2325 | } |
2335 | 2326 | ||
@@ -2340,7 +2331,7 @@ The operation is performed according to the IEC/IEEE Standard for Binary | |||
2340 | Floating-point Arithmetic. | 2331 | Floating-point Arithmetic. |
2341 | ------------------------------------------------------------------------------- | 2332 | ------------------------------------------------------------------------------- |
2342 | */ | 2333 | */ |
2343 | float64 float64_sqrt( float64 a ) | 2334 | float64 float64_sqrt( struct roundingData *roundData, float64 a ) |
2344 | { | 2335 | { |
2345 | flag aSign; | 2336 | flag aSign; |
2346 | int16 aExp, zExp; | 2337 | int16 aExp, zExp; |
@@ -2354,12 +2345,12 @@ float64 float64_sqrt( float64 a ) | |||
2354 | if ( aExp == 0x7FF ) { | 2345 | if ( aExp == 0x7FF ) { |
2355 | if ( aSig ) return propagateFloat64NaN( a, a ); | 2346 | if ( aSig ) return propagateFloat64NaN( a, a ); |
2356 | if ( ! aSign ) return a; | 2347 | if ( ! aSign ) return a; |
2357 | float_raise( float_flag_invalid ); | 2348 | roundData->exception |= float_flag_invalid; |
2358 | return float64_default_nan; | 2349 | return float64_default_nan; |
2359 | } | 2350 | } |
2360 | if ( aSign ) { | 2351 | if ( aSign ) { |
2361 | if ( ( aExp | aSig ) == 0 ) return a; | 2352 | if ( ( aExp | aSig ) == 0 ) return a; |
2362 | float_raise( float_flag_invalid ); | 2353 | roundData->exception |= float_flag_invalid; |
2363 | return float64_default_nan; | 2354 | return float64_default_nan; |
2364 | } | 2355 | } |
2365 | if ( aExp == 0 ) { | 2356 | if ( aExp == 0 ) { |
@@ -2390,7 +2381,7 @@ float64 float64_sqrt( float64 a ) | |||
2390 | } | 2381 | } |
2391 | } | 2382 | } |
2392 | shift64RightJamming( zSig, 1, &zSig ); | 2383 | shift64RightJamming( zSig, 1, &zSig ); |
2393 | return roundAndPackFloat64( 0, zExp, zSig ); | 2384 | return roundAndPackFloat64( roundData, 0, zExp, zSig ); |
2394 | 2385 | ||
2395 | } | 2386 | } |
2396 | 2387 | ||
@@ -2554,7 +2545,7 @@ largest positive integer is returned. Otherwise, if the conversion | |||
2554 | overflows, the largest integer with the same sign as `a' is returned. | 2545 | overflows, the largest integer with the same sign as `a' is returned. |
2555 | ------------------------------------------------------------------------------- | 2546 | ------------------------------------------------------------------------------- |
2556 | */ | 2547 | */ |
2557 | int32 floatx80_to_int32( floatx80 a ) | 2548 | int32 floatx80_to_int32( struct roundingData *roundData, floatx80 a ) |
2558 | { | 2549 | { |
2559 | flag aSign; | 2550 | flag aSign; |
2560 | int32 aExp, shiftCount; | 2551 | int32 aExp, shiftCount; |
@@ -2567,7 +2558,7 @@ int32 floatx80_to_int32( floatx80 a ) | |||
2567 | shiftCount = 0x4037 - aExp; | 2558 | shiftCount = 0x4037 - aExp; |
2568 | if ( shiftCount <= 0 ) shiftCount = 1; | 2559 | if ( shiftCount <= 0 ) shiftCount = 1; |
2569 | shift64RightJamming( aSig, shiftCount, &aSig ); | 2560 | shift64RightJamming( aSig, shiftCount, &aSig ); |
2570 | return roundAndPackInt32( aSign, aSig ); | 2561 | return roundAndPackInt32( roundData, aSign, aSig ); |
2571 | 2562 | ||
2572 | } | 2563 | } |
2573 | 2564 | ||
@@ -2598,7 +2589,7 @@ int32 floatx80_to_int32_round_to_zero( floatx80 a ) | |||
2598 | goto invalid; | 2589 | goto invalid; |
2599 | } | 2590 | } |
2600 | else if ( 63 < shiftCount ) { | 2591 | else if ( 63 < shiftCount ) { |
2601 | if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; | 2592 | if ( aExp || aSig ) float_raise( float_flag_inexact ); |
2602 | return 0; | 2593 | return 0; |
2603 | } | 2594 | } |
2604 | savedASig = aSig; | 2595 | savedASig = aSig; |
@@ -2607,11 +2598,11 @@ int32 floatx80_to_int32_round_to_zero( floatx80 a ) | |||
2607 | if ( aSign ) z = - z; | 2598 | if ( aSign ) z = - z; |
2608 | if ( ( z < 0 ) ^ aSign ) { | 2599 | if ( ( z < 0 ) ^ aSign ) { |
2609 | invalid: | 2600 | invalid: |
2610 | float_exception_flags |= float_flag_invalid; | 2601 | float_raise( float_flag_invalid ); |
2611 | return aSign ? 0x80000000 : 0x7FFFFFFF; | 2602 | return aSign ? 0x80000000 : 0x7FFFFFFF; |
2612 | } | 2603 | } |
2613 | if ( ( aSig<<shiftCount ) != savedASig ) { | 2604 | if ( ( aSig<<shiftCount ) != savedASig ) { |
2614 | float_exception_flags |= float_flag_inexact; | 2605 | float_raise( float_flag_inexact ); |
2615 | } | 2606 | } |
2616 | return z; | 2607 | return z; |
2617 | 2608 | ||
@@ -2625,7 +2616,7 @@ conversion is performed according to the IEC/IEEE Standard for Binary | |||
2625 | Floating-point Arithmetic. | 2616 | Floating-point Arithmetic. |
2626 | ------------------------------------------------------------------------------- | 2617 | ------------------------------------------------------------------------------- |
2627 | */ | 2618 | */ |
2628 | float32 floatx80_to_float32( floatx80 a ) | 2619 | float32 floatx80_to_float32( struct roundingData *roundData, floatx80 a ) |
2629 | { | 2620 | { |
2630 | flag aSign; | 2621 | flag aSign; |
2631 | int32 aExp; | 2622 | int32 aExp; |
@@ -2642,7 +2633,7 @@ float32 floatx80_to_float32( floatx80 a ) | |||
2642 | } | 2633 | } |
2643 | shift64RightJamming( aSig, 33, &aSig ); | 2634 | shift64RightJamming( aSig, 33, &aSig ); |
2644 | if ( aExp || aSig ) aExp -= 0x3F81; | 2635 | if ( aExp || aSig ) aExp -= 0x3F81; |
2645 | return roundAndPackFloat32( aSign, aExp, aSig ); | 2636 | return roundAndPackFloat32( roundData, aSign, aExp, aSig ); |
2646 | 2637 | ||
2647 | } | 2638 | } |
2648 | 2639 | ||
@@ -2654,7 +2645,7 @@ conversion is performed according to the IEC/IEEE Standard for Binary | |||
2654 | Floating-point Arithmetic. | 2645 | Floating-point Arithmetic. |
2655 | ------------------------------------------------------------------------------- | 2646 | ------------------------------------------------------------------------------- |
2656 | */ | 2647 | */ |
2657 | float64 floatx80_to_float64( floatx80 a ) | 2648 | float64 floatx80_to_float64( struct roundingData *roundData, floatx80 a ) |
2658 | { | 2649 | { |
2659 | flag aSign; | 2650 | flag aSign; |
2660 | int32 aExp; | 2651 | int32 aExp; |
@@ -2671,7 +2662,7 @@ float64 floatx80_to_float64( floatx80 a ) | |||
2671 | } | 2662 | } |
2672 | shift64RightJamming( aSig, 1, &zSig ); | 2663 | shift64RightJamming( aSig, 1, &zSig ); |
2673 | if ( aExp || aSig ) aExp -= 0x3C01; | 2664 | if ( aExp || aSig ) aExp -= 0x3C01; |
2674 | return roundAndPackFloat64( aSign, aExp, zSig ); | 2665 | return roundAndPackFloat64( roundData, aSign, aExp, zSig ); |
2675 | 2666 | ||
2676 | } | 2667 | } |
2677 | 2668 | ||
@@ -2683,7 +2674,7 @@ value. The operation is performed according to the IEC/IEEE Standard for | |||
2683 | Binary Floating-point Arithmetic. | 2674 | Binary Floating-point Arithmetic. |
2684 | ------------------------------------------------------------------------------- | 2675 | ------------------------------------------------------------------------------- |
2685 | */ | 2676 | */ |
2686 | floatx80 floatx80_round_to_int( floatx80 a ) | 2677 | floatx80 floatx80_round_to_int( struct roundingData *roundData, floatx80 a ) |
2687 | { | 2678 | { |
2688 | flag aSign; | 2679 | flag aSign; |
2689 | int32 aExp; | 2680 | int32 aExp; |
@@ -2703,9 +2694,9 @@ floatx80 floatx80_round_to_int( floatx80 a ) | |||
2703 | && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { | 2694 | && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { |
2704 | return a; | 2695 | return a; |
2705 | } | 2696 | } |
2706 | float_exception_flags |= float_flag_inexact; | 2697 | roundData->exception |= float_flag_inexact; |
2707 | aSign = extractFloatx80Sign( a ); | 2698 | aSign = extractFloatx80Sign( a ); |
2708 | switch ( float_rounding_mode ) { | 2699 | switch ( roundData->mode ) { |
2709 | case float_round_nearest_even: | 2700 | case float_round_nearest_even: |
2710 | if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 ) | 2701 | if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 ) |
2711 | ) { | 2702 | ) { |
@@ -2729,7 +2720,7 @@ floatx80 floatx80_round_to_int( floatx80 a ) | |||
2729 | lastBitMask <<= 0x403E - aExp; | 2720 | lastBitMask <<= 0x403E - aExp; |
2730 | roundBitsMask = lastBitMask - 1; | 2721 | roundBitsMask = lastBitMask - 1; |
2731 | z = a; | 2722 | z = a; |
2732 | roundingMode = float_rounding_mode; | 2723 | roundingMode = roundData->mode; |
2733 | if ( roundingMode == float_round_nearest_even ) { | 2724 | if ( roundingMode == float_round_nearest_even ) { |
2734 | z.low += lastBitMask>>1; | 2725 | z.low += lastBitMask>>1; |
2735 | if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; | 2726 | if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; |
@@ -2744,7 +2735,7 @@ floatx80 floatx80_round_to_int( floatx80 a ) | |||
2744 | ++z.high; | 2735 | ++z.high; |
2745 | z.low = LIT64( 0x8000000000000000 ); | 2736 | z.low = LIT64( 0x8000000000000000 ); |
2746 | } | 2737 | } |
2747 | if ( z.low != a.low ) float_exception_flags |= float_flag_inexact; | 2738 | if ( z.low != a.low ) roundData->exception |= float_flag_inexact; |
2748 | return z; | 2739 | return z; |
2749 | 2740 | ||
2750 | } | 2741 | } |
@@ -2758,7 +2749,7 @@ The addition is performed according to the IEC/IEEE Standard for Binary | |||
2758 | Floating-point Arithmetic. | 2749 | Floating-point Arithmetic. |
2759 | ------------------------------------------------------------------------------- | 2750 | ------------------------------------------------------------------------------- |
2760 | */ | 2751 | */ |
2761 | static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) | 2752 | static floatx80 addFloatx80Sigs( struct roundingData *roundData, floatx80 a, floatx80 b, flag zSign ) |
2762 | { | 2753 | { |
2763 | int32 aExp, bExp, zExp; | 2754 | int32 aExp, bExp, zExp; |
2764 | bits64 aSig, bSig, zSig0, zSig1; | 2755 | bits64 aSig, bSig, zSig0, zSig1; |
@@ -2814,7 +2805,7 @@ static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) | |||
2814 | roundAndPack: | 2805 | roundAndPack: |
2815 | return | 2806 | return |
2816 | roundAndPackFloatx80( | 2807 | roundAndPackFloatx80( |
2817 | floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); | 2808 | roundData, zSign, zExp, zSig0, zSig1 ); |
2818 | 2809 | ||
2819 | } | 2810 | } |
2820 | 2811 | ||
@@ -2827,7 +2818,7 @@ result is a NaN. The subtraction is performed according to the IEC/IEEE | |||
2827 | Standard for Binary Floating-point Arithmetic. | 2818 | Standard for Binary Floating-point Arithmetic. |
2828 | ------------------------------------------------------------------------------- | 2819 | ------------------------------------------------------------------------------- |
2829 | */ | 2820 | */ |
2830 | static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) | 2821 | static floatx80 subFloatx80Sigs( struct roundingData *roundData, floatx80 a, floatx80 b, flag zSign ) |
2831 | { | 2822 | { |
2832 | int32 aExp, bExp, zExp; | 2823 | int32 aExp, bExp, zExp; |
2833 | bits64 aSig, bSig, zSig0, zSig1; | 2824 | bits64 aSig, bSig, zSig0, zSig1; |
@@ -2845,7 +2836,7 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) | |||
2845 | if ( (bits64) ( ( aSig | bSig )<<1 ) ) { | 2836 | if ( (bits64) ( ( aSig | bSig )<<1 ) ) { |
2846 | return propagateFloatx80NaN( a, b ); | 2837 | return propagateFloatx80NaN( a, b ); |
2847 | } | 2838 | } |
2848 | float_raise( float_flag_invalid ); | 2839 | roundData->exception |= float_flag_invalid; |
2849 | z.low = floatx80_default_nan_low; | 2840 | z.low = floatx80_default_nan_low; |
2850 | z.high = floatx80_default_nan_high; | 2841 | z.high = floatx80_default_nan_high; |
2851 | return z; | 2842 | return z; |
@@ -2857,7 +2848,7 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) | |||
2857 | zSig1 = 0; | 2848 | zSig1 = 0; |
2858 | if ( bSig < aSig ) goto aBigger; | 2849 | if ( bSig < aSig ) goto aBigger; |
2859 | if ( aSig < bSig ) goto bBigger; | 2850 | if ( aSig < bSig ) goto bBigger; |
2860 | return packFloatx80( float_rounding_mode == float_round_down, 0, 0 ); | 2851 | return packFloatx80( roundData->mode == float_round_down, 0, 0 ); |
2861 | bExpBigger: | 2852 | bExpBigger: |
2862 | if ( bExp == 0x7FFF ) { | 2853 | if ( bExp == 0x7FFF ) { |
2863 | if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); | 2854 | if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); |
@@ -2883,7 +2874,7 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) | |||
2883 | normalizeRoundAndPack: | 2874 | normalizeRoundAndPack: |
2884 | return | 2875 | return |
2885 | normalizeRoundAndPackFloatx80( | 2876 | normalizeRoundAndPackFloatx80( |
2886 | floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); | 2877 | roundData, zSign, zExp, zSig0, zSig1 ); |
2887 | 2878 | ||
2888 | } | 2879 | } |
2889 | 2880 | ||
@@ -2894,17 +2885,17 @@ values `a' and `b'. The operation is performed according to the IEC/IEEE | |||
2894 | Standard for Binary Floating-point Arithmetic. | 2885 | Standard for Binary Floating-point Arithmetic. |
2895 | ------------------------------------------------------------------------------- | 2886 | ------------------------------------------------------------------------------- |
2896 | */ | 2887 | */ |
2897 | floatx80 floatx80_add( floatx80 a, floatx80 b ) | 2888 | floatx80 floatx80_add( struct roundingData *roundData, floatx80 a, floatx80 b ) |
2898 | { | 2889 | { |
2899 | flag aSign, bSign; | 2890 | flag aSign, bSign; |
2900 | 2891 | ||
2901 | aSign = extractFloatx80Sign( a ); | 2892 | aSign = extractFloatx80Sign( a ); |
2902 | bSign = extractFloatx80Sign( b ); | 2893 | bSign = extractFloatx80Sign( b ); |
2903 | if ( aSign == bSign ) { | 2894 | if ( aSign == bSign ) { |
2904 | return addFloatx80Sigs( a, b, aSign ); | 2895 | return addFloatx80Sigs( roundData, a, b, aSign ); |
2905 | } | 2896 | } |
2906 | else { | 2897 | else { |
2907 | return subFloatx80Sigs( a, b, aSign ); | 2898 | return subFloatx80Sigs( roundData, a, b, aSign ); |
2908 | } | 2899 | } |
2909 | 2900 | ||
2910 | } | 2901 | } |
@@ -2916,17 +2907,17 @@ point values `a' and `b'. The operation is performed according to the | |||
2916 | IEC/IEEE Standard for Binary Floating-point Arithmetic. | 2907 | IEC/IEEE Standard for Binary Floating-point Arithmetic. |
2917 | ------------------------------------------------------------------------------- | 2908 | ------------------------------------------------------------------------------- |
2918 | */ | 2909 | */ |
2919 | floatx80 floatx80_sub( floatx80 a, floatx80 b ) | 2910 | floatx80 floatx80_sub( struct roundingData *roundData, floatx80 a, floatx80 b ) |
2920 | { | 2911 | { |
2921 | flag aSign, bSign; | 2912 | flag aSign, bSign; |
2922 | 2913 | ||
2923 | aSign = extractFloatx80Sign( a ); | 2914 | aSign = extractFloatx80Sign( a ); |
2924 | bSign = extractFloatx80Sign( b ); | 2915 | bSign = extractFloatx80Sign( b ); |
2925 | if ( aSign == bSign ) { | 2916 | if ( aSign == bSign ) { |
2926 | return subFloatx80Sigs( a, b, aSign ); | 2917 | return subFloatx80Sigs( roundData, a, b, aSign ); |
2927 | } | 2918 | } |
2928 | else { | 2919 | else { |
2929 | return addFloatx80Sigs( a, b, aSign ); | 2920 | return addFloatx80Sigs( roundData, a, b, aSign ); |
2930 | } | 2921 | } |
2931 | 2922 | ||
2932 | } | 2923 | } |
@@ -2938,7 +2929,7 @@ point values `a' and `b'. The operation is performed according to the | |||
2938 | IEC/IEEE Standard for Binary Floating-point Arithmetic. | 2929 | IEC/IEEE Standard for Binary Floating-point Arithmetic. |
2939 | ------------------------------------------------------------------------------- | 2930 | ------------------------------------------------------------------------------- |
2940 | */ | 2931 | */ |
2941 | floatx80 floatx80_mul( floatx80 a, floatx80 b ) | 2932 | floatx80 floatx80_mul( struct roundingData *roundData, floatx80 a, floatx80 b ) |
2942 | { | 2933 | { |
2943 | flag aSign, bSign, zSign; | 2934 | flag aSign, bSign, zSign; |
2944 | int32 aExp, bExp, zExp; | 2935 | int32 aExp, bExp, zExp; |
@@ -2964,7 +2955,7 @@ floatx80 floatx80_mul( floatx80 a, floatx80 b ) | |||
2964 | if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); | 2955 | if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); |
2965 | if ( ( aExp | aSig ) == 0 ) { | 2956 | if ( ( aExp | aSig ) == 0 ) { |
2966 | invalid: | 2957 | invalid: |
2967 | float_raise( float_flag_invalid ); | 2958 | roundData->exception |= float_flag_invalid; |
2968 | z.low = floatx80_default_nan_low; | 2959 | z.low = floatx80_default_nan_low; |
2969 | z.high = floatx80_default_nan_high; | 2960 | z.high = floatx80_default_nan_high; |
2970 | return z; | 2961 | return z; |
@@ -2987,7 +2978,7 @@ floatx80 floatx80_mul( floatx80 a, floatx80 b ) | |||
2987 | } | 2978 | } |
2988 | return | 2979 | return |
2989 | roundAndPackFloatx80( | 2980 | roundAndPackFloatx80( |
2990 | floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); | 2981 | roundData, zSign, zExp, zSig0, zSig1 ); |
2991 | 2982 | ||
2992 | } | 2983 | } |
2993 | 2984 | ||
@@ -2998,7 +2989,7 @@ value `a' by the corresponding value `b'. The operation is performed | |||
2998 | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. | 2989 | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. |
2999 | ------------------------------------------------------------------------------- | 2990 | ------------------------------------------------------------------------------- |
3000 | */ | 2991 | */ |
3001 | floatx80 floatx80_div( floatx80 a, floatx80 b ) | 2992 | floatx80 floatx80_div( struct roundingData *roundData, floatx80 a, floatx80 b ) |
3002 | { | 2993 | { |
3003 | flag aSign, bSign, zSign; | 2994 | flag aSign, bSign, zSign; |
3004 | int32 aExp, bExp, zExp; | 2995 | int32 aExp, bExp, zExp; |
@@ -3029,12 +3020,12 @@ floatx80 floatx80_div( floatx80 a, floatx80 b ) | |||
3029 | if ( bSig == 0 ) { | 3020 | if ( bSig == 0 ) { |
3030 | if ( ( aExp | aSig ) == 0 ) { | 3021 | if ( ( aExp | aSig ) == 0 ) { |
3031 | invalid: | 3022 | invalid: |
3032 | float_raise( float_flag_invalid ); | 3023 | roundData->exception |= float_flag_invalid; |
3033 | z.low = floatx80_default_nan_low; | 3024 | z.low = floatx80_default_nan_low; |
3034 | z.high = floatx80_default_nan_high; | 3025 | z.high = floatx80_default_nan_high; |
3035 | return z; | 3026 | return z; |
3036 | } | 3027 | } |
3037 | float_raise( float_flag_divbyzero ); | 3028 | roundData->exception |= float_flag_divbyzero; |
3038 | return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); | 3029 | return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); |
3039 | } | 3030 | } |
3040 | normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); | 3031 | normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); |
@@ -3068,7 +3059,7 @@ floatx80 floatx80_div( floatx80 a, floatx80 b ) | |||
3068 | } | 3059 | } |
3069 | return | 3060 | return |
3070 | roundAndPackFloatx80( | 3061 | roundAndPackFloatx80( |
3071 | floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); | 3062 | roundData, zSign, zExp, zSig0, zSig1 ); |
3072 | 3063 | ||
3073 | } | 3064 | } |
3074 | 3065 | ||
@@ -3079,7 +3070,7 @@ Returns the remainder of the extended double-precision floating-point value | |||
3079 | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. | 3070 | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. |
3080 | ------------------------------------------------------------------------------- | 3071 | ------------------------------------------------------------------------------- |
3081 | */ | 3072 | */ |
3082 | floatx80 floatx80_rem( floatx80 a, floatx80 b ) | 3073 | floatx80 floatx80_rem( struct roundingData *roundData, floatx80 a, floatx80 b ) |
3083 | { | 3074 | { |
3084 | flag aSign, bSign, zSign; | 3075 | flag aSign, bSign, zSign; |
3085 | int32 aExp, bExp, expDiff; | 3076 | int32 aExp, bExp, expDiff; |
@@ -3107,7 +3098,7 @@ floatx80 floatx80_rem( floatx80 a, floatx80 b ) | |||
3107 | if ( bExp == 0 ) { | 3098 | if ( bExp == 0 ) { |
3108 | if ( bSig == 0 ) { | 3099 | if ( bSig == 0 ) { |
3109 | invalid: | 3100 | invalid: |
3110 | float_raise( float_flag_invalid ); | 3101 | roundData->exception |= float_flag_invalid; |
3111 | z.low = floatx80_default_nan_low; | 3102 | z.low = floatx80_default_nan_low; |
3112 | z.high = floatx80_default_nan_high; | 3103 | z.high = floatx80_default_nan_high; |
3113 | return z; | 3104 | return z; |
@@ -3164,9 +3155,10 @@ floatx80 floatx80_rem( floatx80 a, floatx80 b ) | |||
3164 | aSig1 = alternateASig1; | 3155 | aSig1 = alternateASig1; |
3165 | zSign = ! zSign; | 3156 | zSign = ! zSign; |
3166 | } | 3157 | } |
3158 | |||
3167 | return | 3159 | return |
3168 | normalizeRoundAndPackFloatx80( | 3160 | normalizeRoundAndPackFloatx80( |
3169 | 80, zSign, bExp + expDiff, aSig0, aSig1 ); | 3161 | roundData, zSign, bExp + expDiff, aSig0, aSig1 ); |
3170 | 3162 | ||
3171 | } | 3163 | } |
3172 | 3164 | ||
@@ -3177,7 +3169,7 @@ value `a'. The operation is performed according to the IEC/IEEE Standard | |||
3177 | for Binary Floating-point Arithmetic. | 3169 | for Binary Floating-point Arithmetic. |
3178 | ------------------------------------------------------------------------------- | 3170 | ------------------------------------------------------------------------------- |
3179 | */ | 3171 | */ |
3180 | floatx80 floatx80_sqrt( floatx80 a ) | 3172 | floatx80 floatx80_sqrt( struct roundingData *roundData, floatx80 a ) |
3181 | { | 3173 | { |
3182 | flag aSign; | 3174 | flag aSign; |
3183 | int32 aExp, zExp; | 3175 | int32 aExp, zExp; |
@@ -3197,7 +3189,7 @@ floatx80 floatx80_sqrt( floatx80 a ) | |||
3197 | if ( aSign ) { | 3189 | if ( aSign ) { |
3198 | if ( ( aExp | aSig0 ) == 0 ) return a; | 3190 | if ( ( aExp | aSig0 ) == 0 ) return a; |
3199 | invalid: | 3191 | invalid: |
3200 | float_raise( float_flag_invalid ); | 3192 | roundData->exception |= float_flag_invalid; |
3201 | z.low = floatx80_default_nan_low; | 3193 | z.low = floatx80_default_nan_low; |
3202 | z.high = floatx80_default_nan_high; | 3194 | z.high = floatx80_default_nan_high; |
3203 | return z; | 3195 | return z; |
@@ -3242,7 +3234,7 @@ floatx80 floatx80_sqrt( floatx80 a ) | |||
3242 | } | 3234 | } |
3243 | return | 3235 | return |
3244 | roundAndPackFloatx80( | 3236 | roundAndPackFloatx80( |
3245 | floatx80_rounding_precision, 0, zExp, zSig0, zSig1 ); | 3237 | roundData, 0, zExp, zSig0, zSig1 ); |
3246 | 3238 | ||
3247 | } | 3239 | } |
3248 | 3240 | ||
@@ -3264,7 +3256,7 @@ flag floatx80_eq( floatx80 a, floatx80 b ) | |||
3264 | ) { | 3256 | ) { |
3265 | if ( floatx80_is_signaling_nan( a ) | 3257 | if ( floatx80_is_signaling_nan( a ) |
3266 | || floatx80_is_signaling_nan( b ) ) { | 3258 | || floatx80_is_signaling_nan( b ) ) { |
3267 | float_raise( float_flag_invalid ); | 3259 | roundData->exception |= float_flag_invalid; |
3268 | } | 3260 | } |
3269 | return 0; | 3261 | return 0; |
3270 | } | 3262 | } |
@@ -3294,7 +3286,7 @@ flag floatx80_le( floatx80 a, floatx80 b ) | |||
3294 | || ( ( extractFloatx80Exp( b ) == 0x7FFF ) | 3286 | || ( ( extractFloatx80Exp( b ) == 0x7FFF ) |
3295 | && (bits64) ( extractFloatx80Frac( b )<<1 ) ) | 3287 | && (bits64) ( extractFloatx80Frac( b )<<1 ) ) |
3296 | ) { | 3288 | ) { |
3297 | float_raise( float_flag_invalid ); | 3289 | roundData->exception |= float_flag_invalid; |
3298 | return 0; | 3290 | return 0; |
3299 | } | 3291 | } |
3300 | aSign = extractFloatx80Sign( a ); | 3292 | aSign = extractFloatx80Sign( a ); |
@@ -3328,7 +3320,7 @@ flag floatx80_lt( floatx80 a, floatx80 b ) | |||
3328 | || ( ( extractFloatx80Exp( b ) == 0x7FFF ) | 3320 | || ( ( extractFloatx80Exp( b ) == 0x7FFF ) |
3329 | && (bits64) ( extractFloatx80Frac( b )<<1 ) ) | 3321 | && (bits64) ( extractFloatx80Frac( b )<<1 ) ) |
3330 | ) { | 3322 | ) { |
3331 | float_raise( float_flag_invalid ); | 3323 | roundData->exception |= float_flag_invalid; |
3332 | return 0; | 3324 | return 0; |
3333 | } | 3325 | } |
3334 | aSign = extractFloatx80Sign( a ); | 3326 | aSign = extractFloatx80Sign( a ); |
@@ -3361,7 +3353,7 @@ flag floatx80_eq_signaling( floatx80 a, floatx80 b ) | |||
3361 | || ( ( extractFloatx80Exp( b ) == 0x7FFF ) | 3353 | || ( ( extractFloatx80Exp( b ) == 0x7FFF ) |
3362 | && (bits64) ( extractFloatx80Frac( b )<<1 ) ) | 3354 | && (bits64) ( extractFloatx80Frac( b )<<1 ) ) |
3363 | ) { | 3355 | ) { |
3364 | float_raise( float_flag_invalid ); | 3356 | roundData->exception |= float_flag_invalid; |
3365 | return 0; | 3357 | return 0; |
3366 | } | 3358 | } |
3367 | return | 3359 | return |
@@ -3392,7 +3384,7 @@ flag floatx80_le_quiet( floatx80 a, floatx80 b ) | |||
3392 | ) { | 3384 | ) { |
3393 | if ( floatx80_is_signaling_nan( a ) | 3385 | if ( floatx80_is_signaling_nan( a ) |
3394 | || floatx80_is_signaling_nan( b ) ) { | 3386 | || floatx80_is_signaling_nan( b ) ) { |
3395 | float_raise( float_flag_invalid ); | 3387 | roundData->exception |= float_flag_invalid; |
3396 | } | 3388 | } |
3397 | return 0; | 3389 | return 0; |
3398 | } | 3390 | } |
@@ -3429,7 +3421,7 @@ flag floatx80_lt_quiet( floatx80 a, floatx80 b ) | |||
3429 | ) { | 3421 | ) { |
3430 | if ( floatx80_is_signaling_nan( a ) | 3422 | if ( floatx80_is_signaling_nan( a ) |
3431 | || floatx80_is_signaling_nan( b ) ) { | 3423 | || floatx80_is_signaling_nan( b ) ) { |
3432 | float_raise( float_flag_invalid ); | 3424 | roundData->exception |= float_flag_invalid; |
3433 | } | 3425 | } |
3434 | return 0; | 3426 | return 0; |
3435 | } | 3427 | } |