diff options
author | Richard Purdie <rpurdie@rpsys.net> | 2005-08-03 14:49:17 -0400 |
---|---|---|
committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2005-08-03 14:49:17 -0400 |
commit | f148af2593ef76ac705d1cc6abe48f455c9912cc (patch) | |
tree | cd1e0b0959624234ca3489df8888434ffea5050e /arch/arm/nwfpe/softfloat.c | |
parent | 1fcf844861eb08ee05e05dba13b5436f2f2e29ed (diff) |
[PATCH] ARM: 2837/2: Re: ARM: Make NWFPE preempt safe
Patch from Richard Purdie
NWFPE used global variables which meant it wasn't safe for use with
preemptive kernels. This patch removes them and communicates the
information between functions in a preempt safe manner. Generation
of some exceptions was broken and this has also been corrected.
Tests with glibc's maths test suite show no change in the results
before/after this patch.
Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Diffstat (limited to 'arch/arm/nwfpe/softfloat.c')
-rw-r--r-- | arch/arm/nwfpe/softfloat.c | 334 |
1 files changed, 163 insertions, 171 deletions
diff --git a/arch/arm/nwfpe/softfloat.c b/arch/arm/nwfpe/softfloat.c index e038dd3be9b3..8b75a6e7cb3a 100644 --- a/arch/arm/nwfpe/softfloat.c +++ b/arch/arm/nwfpe/softfloat.c | |||
@@ -36,16 +36,6 @@ this code that are retained. | |||
36 | 36 | ||
37 | /* | 37 | /* |
38 | ------------------------------------------------------------------------------- | 38 | ------------------------------------------------------------------------------- |
39 | Floating-point rounding mode, extended double-precision rounding precision, | ||
40 | and exception flags. | ||
41 | ------------------------------------------------------------------------------- | ||
42 | */ | ||
43 | int8 float_rounding_mode = float_round_nearest_even; | ||
44 | int8 floatx80_rounding_precision = 80; | ||
45 | int8 float_exception_flags; | ||
46 | |||
47 | /* | ||
48 | ------------------------------------------------------------------------------- | ||
49 | Primitive arithmetic functions, including multi-word arithmetic, and | 39 | Primitive arithmetic functions, including multi-word arithmetic, and |
50 | division and square root approximations. (Can be specialized to target if | 40 | division and square root approximations. (Can be specialized to target if |
51 | desired.) | 41 | desired.) |
@@ -77,14 +67,14 @@ input is too large, however, the invalid exception is raised and the largest | |||
77 | positive or negative integer is returned. | 67 | positive or negative integer is returned. |
78 | ------------------------------------------------------------------------------- | 68 | ------------------------------------------------------------------------------- |
79 | */ | 69 | */ |
80 | static int32 roundAndPackInt32( flag zSign, bits64 absZ ) | 70 | static int32 roundAndPackInt32( struct roundingData *roundData, flag zSign, bits64 absZ ) |
81 | { | 71 | { |
82 | int8 roundingMode; | 72 | int8 roundingMode; |
83 | flag roundNearestEven; | 73 | flag roundNearestEven; |
84 | int8 roundIncrement, roundBits; | 74 | int8 roundIncrement, roundBits; |
85 | int32 z; | 75 | int32 z; |
86 | 76 | ||
87 | roundingMode = float_rounding_mode; | 77 | roundingMode = roundData->mode; |
88 | roundNearestEven = ( roundingMode == float_round_nearest_even ); | 78 | roundNearestEven = ( roundingMode == float_round_nearest_even ); |
89 | roundIncrement = 0x40; | 79 | roundIncrement = 0x40; |
90 | if ( ! roundNearestEven ) { | 80 | if ( ! roundNearestEven ) { |
@@ -107,10 +97,10 @@ static int32 roundAndPackInt32( flag zSign, bits64 absZ ) | |||
107 | z = absZ; | 97 | z = absZ; |
108 | if ( zSign ) z = - z; | 98 | if ( zSign ) z = - z; |
109 | if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { | 99 | if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { |
110 | float_exception_flags |= float_flag_invalid; | 100 | roundData->exception |= float_flag_invalid; |
111 | return zSign ? 0x80000000 : 0x7FFFFFFF; | 101 | return zSign ? 0x80000000 : 0x7FFFFFFF; |
112 | } | 102 | } |
113 | if ( roundBits ) float_exception_flags |= float_flag_inexact; | 103 | if ( roundBits ) roundData->exception |= float_flag_inexact; |
114 | return z; | 104 | return z; |
115 | 105 | ||
116 | } | 106 | } |
@@ -224,14 +214,14 @@ The handling of underflow and overflow follows the IEC/IEEE Standard for | |||
224 | Binary Floating-point Arithmetic. | 214 | Binary Floating-point Arithmetic. |
225 | ------------------------------------------------------------------------------- | 215 | ------------------------------------------------------------------------------- |
226 | */ | 216 | */ |
227 | static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) | 217 | static float32 roundAndPackFloat32( struct roundingData *roundData, flag zSign, int16 zExp, bits32 zSig ) |
228 | { | 218 | { |
229 | int8 roundingMode; | 219 | int8 roundingMode; |
230 | flag roundNearestEven; | 220 | flag roundNearestEven; |
231 | int8 roundIncrement, roundBits; | 221 | int8 roundIncrement, roundBits; |
232 | flag isTiny; | 222 | flag isTiny; |
233 | 223 | ||
234 | roundingMode = float_rounding_mode; | 224 | roundingMode = roundData->mode; |
235 | roundNearestEven = ( roundingMode == float_round_nearest_even ); | 225 | roundNearestEven = ( roundingMode == float_round_nearest_even ); |
236 | roundIncrement = 0x40; | 226 | roundIncrement = 0x40; |
237 | if ( ! roundNearestEven ) { | 227 | if ( ! roundNearestEven ) { |
@@ -254,7 +244,7 @@ static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) | |||
254 | || ( ( zExp == 0xFD ) | 244 | || ( ( zExp == 0xFD ) |
255 | && ( (sbits32) ( zSig + roundIncrement ) < 0 ) ) | 245 | && ( (sbits32) ( zSig + roundIncrement ) < 0 ) ) |
256 | ) { | 246 | ) { |
257 | float_raise( float_flag_overflow | float_flag_inexact ); | 247 | roundData->exception |= float_flag_overflow | float_flag_inexact; |
258 | return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 ); | 248 | return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 ); |
259 | } | 249 | } |
260 | if ( zExp < 0 ) { | 250 | if ( zExp < 0 ) { |
@@ -265,10 +255,10 @@ static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) | |||
265 | shift32RightJamming( zSig, - zExp, &zSig ); | 255 | shift32RightJamming( zSig, - zExp, &zSig ); |
266 | zExp = 0; | 256 | zExp = 0; |
267 | roundBits = zSig & 0x7F; | 257 | roundBits = zSig & 0x7F; |
268 | if ( isTiny && roundBits ) float_raise( float_flag_underflow ); | 258 | if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow; |
269 | } | 259 | } |
270 | } | 260 | } |
271 | if ( roundBits ) float_exception_flags |= float_flag_inexact; | 261 | if ( roundBits ) roundData->exception |= float_flag_inexact; |
272 | zSig = ( zSig + roundIncrement )>>7; | 262 | zSig = ( zSig + roundIncrement )>>7; |
273 | zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); | 263 | zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); |
274 | if ( zSig == 0 ) zExp = 0; | 264 | if ( zSig == 0 ) zExp = 0; |
@@ -287,12 +277,12 @@ point exponent. | |||
287 | ------------------------------------------------------------------------------- | 277 | ------------------------------------------------------------------------------- |
288 | */ | 278 | */ |
289 | static float32 | 279 | static float32 |
290 | normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) | 280 | normalizeRoundAndPackFloat32( struct roundingData *roundData, flag zSign, int16 zExp, bits32 zSig ) |
291 | { | 281 | { |
292 | int8 shiftCount; | 282 | int8 shiftCount; |
293 | 283 | ||
294 | shiftCount = countLeadingZeros32( zSig ) - 1; | 284 | shiftCount = countLeadingZeros32( zSig ) - 1; |
295 | return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount ); | 285 | return roundAndPackFloat32( roundData, zSign, zExp - shiftCount, zSig<<shiftCount ); |
296 | 286 | ||
297 | } | 287 | } |
298 | 288 | ||
@@ -395,14 +385,14 @@ The handling of underflow and overflow follows the IEC/IEEE Standard for | |||
395 | Binary Floating-point Arithmetic. | 385 | Binary Floating-point Arithmetic. |
396 | ------------------------------------------------------------------------------- | 386 | ------------------------------------------------------------------------------- |
397 | */ | 387 | */ |
398 | static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) | 388 | static float64 roundAndPackFloat64( struct roundingData *roundData, flag zSign, int16 zExp, bits64 zSig ) |
399 | { | 389 | { |
400 | int8 roundingMode; | 390 | int8 roundingMode; |
401 | flag roundNearestEven; | 391 | flag roundNearestEven; |
402 | int16 roundIncrement, roundBits; | 392 | int16 roundIncrement, roundBits; |
403 | flag isTiny; | 393 | flag isTiny; |
404 | 394 | ||
405 | roundingMode = float_rounding_mode; | 395 | roundingMode = roundData->mode; |
406 | roundNearestEven = ( roundingMode == float_round_nearest_even ); | 396 | roundNearestEven = ( roundingMode == float_round_nearest_even ); |
407 | roundIncrement = 0x200; | 397 | roundIncrement = 0x200; |
408 | if ( ! roundNearestEven ) { | 398 | if ( ! roundNearestEven ) { |
@@ -427,7 +417,7 @@ static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) | |||
427 | ) { | 417 | ) { |
428 | //register int lr = __builtin_return_address(0); | 418 | //register int lr = __builtin_return_address(0); |
429 | //printk("roundAndPackFloat64 called from 0x%08x\n",lr); | 419 | //printk("roundAndPackFloat64 called from 0x%08x\n",lr); |
430 | float_raise( float_flag_overflow | float_flag_inexact ); | 420 | roundData->exception |= float_flag_overflow | float_flag_inexact; |
431 | return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 ); | 421 | return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 ); |
432 | } | 422 | } |
433 | if ( zExp < 0 ) { | 423 | if ( zExp < 0 ) { |
@@ -438,10 +428,10 @@ static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) | |||
438 | shift64RightJamming( zSig, - zExp, &zSig ); | 428 | shift64RightJamming( zSig, - zExp, &zSig ); |
439 | zExp = 0; | 429 | zExp = 0; |
440 | roundBits = zSig & 0x3FF; | 430 | roundBits = zSig & 0x3FF; |
441 | if ( isTiny && roundBits ) float_raise( float_flag_underflow ); | 431 | if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow; |
442 | } | 432 | } |
443 | } | 433 | } |
444 | if ( roundBits ) float_exception_flags |= float_flag_inexact; | 434 | if ( roundBits ) roundData->exception |= float_flag_inexact; |
445 | zSig = ( zSig + roundIncrement )>>10; | 435 | zSig = ( zSig + roundIncrement )>>10; |
446 | zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven ); | 436 | zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven ); |
447 | if ( zSig == 0 ) zExp = 0; | 437 | if ( zSig == 0 ) zExp = 0; |
@@ -460,12 +450,12 @@ point exponent. | |||
460 | ------------------------------------------------------------------------------- | 450 | ------------------------------------------------------------------------------- |
461 | */ | 451 | */ |
462 | static float64 | 452 | static float64 |
463 | normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) | 453 | normalizeRoundAndPackFloat64( struct roundingData *roundData, flag zSign, int16 zExp, bits64 zSig ) |
464 | { | 454 | { |
465 | int8 shiftCount; | 455 | int8 shiftCount; |
466 | 456 | ||
467 | shiftCount = countLeadingZeros64( zSig ) - 1; | 457 | shiftCount = countLeadingZeros64( zSig ) - 1; |
468 | return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount ); | 458 | return roundAndPackFloat64( roundData, zSign, zExp - shiftCount, zSig<<shiftCount ); |
469 | 459 | ||
470 | } | 460 | } |
471 | 461 | ||
@@ -572,14 +562,15 @@ Floating-point Arithmetic. | |||
572 | */ | 562 | */ |
573 | static floatx80 | 563 | static floatx80 |
574 | roundAndPackFloatx80( | 564 | roundAndPackFloatx80( |
575 | int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 | 565 | struct roundingData *roundData, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 |
576 | ) | 566 | ) |
577 | { | 567 | { |
578 | int8 roundingMode; | 568 | int8 roundingMode, roundingPrecision; |
579 | flag roundNearestEven, increment, isTiny; | 569 | flag roundNearestEven, increment, isTiny; |
580 | int64 roundIncrement, roundMask, roundBits; | 570 | int64 roundIncrement, roundMask, roundBits; |
581 | 571 | ||
582 | roundingMode = float_rounding_mode; | 572 | roundingMode = roundData->mode; |
573 | roundingPrecision = roundData->precision; | ||
583 | roundNearestEven = ( roundingMode == float_round_nearest_even ); | 574 | roundNearestEven = ( roundingMode == float_round_nearest_even ); |
584 | if ( roundingPrecision == 80 ) goto precision80; | 575 | if ( roundingPrecision == 80 ) goto precision80; |
585 | if ( roundingPrecision == 64 ) { | 576 | if ( roundingPrecision == 64 ) { |
@@ -623,8 +614,8 @@ static floatx80 | |||
623 | shift64RightJamming( zSig0, 1 - zExp, &zSig0 ); | 614 | shift64RightJamming( zSig0, 1 - zExp, &zSig0 ); |
624 | zExp = 0; | 615 | zExp = 0; |
625 | roundBits = zSig0 & roundMask; | 616 | roundBits = zSig0 & roundMask; |
626 | if ( isTiny && roundBits ) float_raise( float_flag_underflow ); | 617 | if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow; |
627 | if ( roundBits ) float_exception_flags |= float_flag_inexact; | 618 | if ( roundBits ) roundData->exception |= float_flag_inexact; |
628 | zSig0 += roundIncrement; | 619 | zSig0 += roundIncrement; |
629 | if ( (sbits64) zSig0 < 0 ) zExp = 1; | 620 | if ( (sbits64) zSig0 < 0 ) zExp = 1; |
630 | roundIncrement = roundMask + 1; | 621 | roundIncrement = roundMask + 1; |
@@ -635,7 +626,7 @@ static floatx80 | |||
635 | return packFloatx80( zSign, zExp, zSig0 ); | 626 | return packFloatx80( zSign, zExp, zSig0 ); |
636 | } | 627 | } |
637 | } | 628 | } |
638 | if ( roundBits ) float_exception_flags |= float_flag_inexact; | 629 | if ( roundBits ) roundData->exception |= float_flag_inexact; |
639 | zSig0 += roundIncrement; | 630 | zSig0 += roundIncrement; |
640 | if ( zSig0 < roundIncrement ) { | 631 | if ( zSig0 < roundIncrement ) { |
641 | ++zExp; | 632 | ++zExp; |
@@ -672,7 +663,7 @@ static floatx80 | |||
672 | ) { | 663 | ) { |
673 | roundMask = 0; | 664 | roundMask = 0; |
674 | overflow: | 665 | overflow: |
675 | float_raise( float_flag_overflow | float_flag_inexact ); | 666 | roundData->exception |= float_flag_overflow | float_flag_inexact; |
676 | if ( ( roundingMode == float_round_to_zero ) | 667 | if ( ( roundingMode == float_round_to_zero ) |
677 | || ( zSign && ( roundingMode == float_round_up ) ) | 668 | || ( zSign && ( roundingMode == float_round_up ) ) |
678 | || ( ! zSign && ( roundingMode == float_round_down ) ) | 669 | || ( ! zSign && ( roundingMode == float_round_down ) ) |
@@ -689,8 +680,8 @@ static floatx80 | |||
689 | || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) ); | 680 | || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) ); |
690 | shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 ); | 681 | shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 ); |
691 | zExp = 0; | 682 | zExp = 0; |
692 | if ( isTiny && zSig1 ) float_raise( float_flag_underflow ); | 683 | if ( isTiny && zSig1 ) roundData->exception |= float_flag_underflow; |
693 | if ( zSig1 ) float_exception_flags |= float_flag_inexact; | 684 | if ( zSig1 ) roundData->exception |= float_flag_inexact; |
694 | if ( roundNearestEven ) { | 685 | if ( roundNearestEven ) { |
695 | increment = ( (sbits64) zSig1 < 0 ); | 686 | increment = ( (sbits64) zSig1 < 0 ); |
696 | } | 687 | } |
@@ -710,7 +701,7 @@ static floatx80 | |||
710 | return packFloatx80( zSign, zExp, zSig0 ); | 701 | return packFloatx80( zSign, zExp, zSig0 ); |
711 | } | 702 | } |
712 | } | 703 | } |
713 | if ( zSig1 ) float_exception_flags |= float_flag_inexact; | 704 | if ( zSig1 ) roundData->exception |= float_flag_inexact; |
714 | if ( increment ) { | 705 | if ( increment ) { |
715 | ++zSig0; | 706 | ++zSig0; |
716 | if ( zSig0 == 0 ) { | 707 | if ( zSig0 == 0 ) { |
@@ -740,7 +731,7 @@ normalized. | |||
740 | */ | 731 | */ |
741 | static floatx80 | 732 | static floatx80 |
742 | normalizeRoundAndPackFloatx80( | 733 | normalizeRoundAndPackFloatx80( |
743 | int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 | 734 | struct roundingData *roundData, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 |
744 | ) | 735 | ) |
745 | { | 736 | { |
746 | int8 shiftCount; | 737 | int8 shiftCount; |
@@ -754,7 +745,7 @@ static floatx80 | |||
754 | shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); | 745 | shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); |
755 | zExp -= shiftCount; | 746 | zExp -= shiftCount; |
756 | return | 747 | return |
757 | roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 ); | 748 | roundAndPackFloatx80( roundData, zSign, zExp, zSig0, zSig1 ); |
758 | 749 | ||
759 | } | 750 | } |
760 | 751 | ||
@@ -767,14 +758,14 @@ the single-precision floating-point format. The conversion is performed | |||
767 | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. | 758 | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. |
768 | ------------------------------------------------------------------------------- | 759 | ------------------------------------------------------------------------------- |
769 | */ | 760 | */ |
770 | float32 int32_to_float32( int32 a ) | 761 | float32 int32_to_float32(struct roundingData *roundData, int32 a) |
771 | { | 762 | { |
772 | flag zSign; | 763 | flag zSign; |
773 | 764 | ||
774 | if ( a == 0 ) return 0; | 765 | if ( a == 0 ) return 0; |
775 | if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 ); | 766 | if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 ); |
776 | zSign = ( a < 0 ); | 767 | zSign = ( a < 0 ); |
777 | return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a ); | 768 | return normalizeRoundAndPackFloat32( roundData, zSign, 0x9C, zSign ? - a : a ); |
778 | 769 | ||
779 | } | 770 | } |
780 | 771 | ||
@@ -840,7 +831,7 @@ positive integer is returned. Otherwise, if the conversion overflows, the | |||
840 | largest integer with the same sign as `a' is returned. | 831 | largest integer with the same sign as `a' is returned. |
841 | ------------------------------------------------------------------------------- | 832 | ------------------------------------------------------------------------------- |
842 | */ | 833 | */ |
843 | int32 float32_to_int32( float32 a ) | 834 | int32 float32_to_int32( struct roundingData *roundData, float32 a ) |
844 | { | 835 | { |
845 | flag aSign; | 836 | flag aSign; |
846 | int16 aExp, shiftCount; | 837 | int16 aExp, shiftCount; |
@@ -856,7 +847,7 @@ int32 float32_to_int32( float32 a ) | |||
856 | zSig = aSig; | 847 | zSig = aSig; |
857 | zSig <<= 32; | 848 | zSig <<= 32; |
858 | if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig ); | 849 | if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig ); |
859 | return roundAndPackInt32( aSign, zSig ); | 850 | return roundAndPackInt32( roundData, aSign, zSig ); |
860 | 851 | ||
861 | } | 852 | } |
862 | 853 | ||
@@ -889,13 +880,13 @@ int32 float32_to_int32_round_to_zero( float32 a ) | |||
889 | return 0x80000000; | 880 | return 0x80000000; |
890 | } | 881 | } |
891 | else if ( aExp <= 0x7E ) { | 882 | else if ( aExp <= 0x7E ) { |
892 | if ( aExp | aSig ) float_exception_flags |= float_flag_inexact; | 883 | if ( aExp | aSig ) float_raise( float_flag_inexact ); |
893 | return 0; | 884 | return 0; |
894 | } | 885 | } |
895 | aSig = ( aSig | 0x00800000 )<<8; | 886 | aSig = ( aSig | 0x00800000 )<<8; |
896 | z = aSig>>( - shiftCount ); | 887 | z = aSig>>( - shiftCount ); |
897 | if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) { | 888 | if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) { |
898 | float_exception_flags |= float_flag_inexact; | 889 | float_raise( float_flag_inexact ); |
899 | } | 890 | } |
900 | return aSign ? - z : z; | 891 | return aSign ? - z : z; |
901 | 892 | ||
@@ -973,7 +964,7 @@ operation is performed according to the IEC/IEEE Standard for Binary | |||
973 | Floating-point Arithmetic. | 964 | Floating-point Arithmetic. |
974 | ------------------------------------------------------------------------------- | 965 | ------------------------------------------------------------------------------- |
975 | */ | 966 | */ |
976 | float32 float32_round_to_int( float32 a ) | 967 | float32 float32_round_to_int( struct roundingData *roundData, float32 a ) |
977 | { | 968 | { |
978 | flag aSign; | 969 | flag aSign; |
979 | int16 aExp; | 970 | int16 aExp; |
@@ -988,11 +979,12 @@ float32 float32_round_to_int( float32 a ) | |||
988 | } | 979 | } |
989 | return a; | 980 | return a; |
990 | } | 981 | } |
982 | roundingMode = roundData->mode; | ||
991 | if ( aExp <= 0x7E ) { | 983 | if ( aExp <= 0x7E ) { |
992 | if ( (bits32) ( a<<1 ) == 0 ) return a; | 984 | if ( (bits32) ( a<<1 ) == 0 ) return a; |
993 | float_exception_flags |= float_flag_inexact; | 985 | roundData->exception |= float_flag_inexact; |
994 | aSign = extractFloat32Sign( a ); | 986 | aSign = extractFloat32Sign( a ); |
995 | switch ( float_rounding_mode ) { | 987 | switch ( roundingMode ) { |
996 | case float_round_nearest_even: | 988 | case float_round_nearest_even: |
997 | if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { | 989 | if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { |
998 | return packFloat32( aSign, 0x7F, 0 ); | 990 | return packFloat32( aSign, 0x7F, 0 ); |
@@ -1009,7 +1001,6 @@ float32 float32_round_to_int( float32 a ) | |||
1009 | lastBitMask <<= 0x96 - aExp; | 1001 | lastBitMask <<= 0x96 - aExp; |
1010 | roundBitsMask = lastBitMask - 1; | 1002 | roundBitsMask = lastBitMask - 1; |
1011 | z = a; | 1003 | z = a; |
1012 | roundingMode = float_rounding_mode; | ||
1013 | if ( roundingMode == float_round_nearest_even ) { | 1004 | if ( roundingMode == float_round_nearest_even ) { |
1014 | z += lastBitMask>>1; | 1005 | z += lastBitMask>>1; |
1015 | if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; | 1006 | if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; |
@@ -1020,7 +1011,7 @@ float32 float32_round_to_int( float32 a ) | |||
1020 | } | 1011 | } |
1021 | } | 1012 | } |
1022 | z &= ~ roundBitsMask; | 1013 | z &= ~ roundBitsMask; |
1023 | if ( z != a ) float_exception_flags |= float_flag_inexact; | 1014 | if ( z != a ) roundData->exception |= float_flag_inexact; |
1024 | return z; | 1015 | return z; |
1025 | 1016 | ||
1026 | } | 1017 | } |
@@ -1034,7 +1025,7 @@ addition is performed according to the IEC/IEEE Standard for Binary | |||
1034 | Floating-point Arithmetic. | 1025 | Floating-point Arithmetic. |
1035 | ------------------------------------------------------------------------------- | 1026 | ------------------------------------------------------------------------------- |
1036 | */ | 1027 | */ |
1037 | static float32 addFloat32Sigs( float32 a, float32 b, flag zSign ) | 1028 | static float32 addFloat32Sigs( struct roundingData *roundData, float32 a, float32 b, flag zSign ) |
1038 | { | 1029 | { |
1039 | int16 aExp, bExp, zExp; | 1030 | int16 aExp, bExp, zExp; |
1040 | bits32 aSig, bSig, zSig; | 1031 | bits32 aSig, bSig, zSig; |
@@ -1093,7 +1084,7 @@ static float32 addFloat32Sigs( float32 a, float32 b, flag zSign ) | |||
1093 | ++zExp; | 1084 | ++zExp; |
1094 | } | 1085 | } |
1095 | roundAndPack: | 1086 | roundAndPack: |
1096 | return roundAndPackFloat32( zSign, zExp, zSig ); | 1087 | return roundAndPackFloat32( roundData, zSign, zExp, zSig ); |
1097 | 1088 | ||
1098 | } | 1089 | } |
1099 | 1090 | ||
@@ -1106,7 +1097,7 @@ result is a NaN. The subtraction is performed according to the IEC/IEEE | |||
1106 | Standard for Binary Floating-point Arithmetic. | 1097 | Standard for Binary Floating-point Arithmetic. |
1107 | ------------------------------------------------------------------------------- | 1098 | ------------------------------------------------------------------------------- |
1108 | */ | 1099 | */ |
1109 | static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) | 1100 | static float32 subFloat32Sigs( struct roundingData *roundData, float32 a, float32 b, flag zSign ) |
1110 | { | 1101 | { |
1111 | int16 aExp, bExp, zExp; | 1102 | int16 aExp, bExp, zExp; |
1112 | bits32 aSig, bSig, zSig; | 1103 | bits32 aSig, bSig, zSig; |
@@ -1123,7 +1114,7 @@ static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) | |||
1123 | if ( expDiff < 0 ) goto bExpBigger; | 1114 | if ( expDiff < 0 ) goto bExpBigger; |
1124 | if ( aExp == 0xFF ) { | 1115 | if ( aExp == 0xFF ) { |
1125 | if ( aSig | bSig ) return propagateFloat32NaN( a, b ); | 1116 | if ( aSig | bSig ) return propagateFloat32NaN( a, b ); |
1126 | float_raise( float_flag_invalid ); | 1117 | roundData->exception |= float_flag_invalid; |
1127 | return float32_default_nan; | 1118 | return float32_default_nan; |
1128 | } | 1119 | } |
1129 | if ( aExp == 0 ) { | 1120 | if ( aExp == 0 ) { |
@@ -1132,7 +1123,7 @@ static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) | |||
1132 | } | 1123 | } |
1133 | if ( bSig < aSig ) goto aBigger; | 1124 | if ( bSig < aSig ) goto aBigger; |
1134 | if ( aSig < bSig ) goto bBigger; | 1125 | if ( aSig < bSig ) goto bBigger; |
1135 | return packFloat32( float_rounding_mode == float_round_down, 0, 0 ); | 1126 | return packFloat32( roundData->mode == float_round_down, 0, 0 ); |
1136 | bExpBigger: | 1127 | bExpBigger: |
1137 | if ( bExp == 0xFF ) { | 1128 | if ( bExp == 0xFF ) { |
1138 | if ( bSig ) return propagateFloat32NaN( a, b ); | 1129 | if ( bSig ) return propagateFloat32NaN( a, b ); |
@@ -1169,7 +1160,7 @@ static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) | |||
1169 | zExp = aExp; | 1160 | zExp = aExp; |
1170 | normalizeRoundAndPack: | 1161 | normalizeRoundAndPack: |
1171 | --zExp; | 1162 | --zExp; |
1172 | return normalizeRoundAndPackFloat32( zSign, zExp, zSig ); | 1163 | return normalizeRoundAndPackFloat32( roundData, zSign, zExp, zSig ); |
1173 | 1164 | ||
1174 | } | 1165 | } |
1175 | 1166 | ||
@@ -1180,17 +1171,17 @@ and `b'. The operation is performed according to the IEC/IEEE Standard for | |||
1180 | Binary Floating-point Arithmetic. | 1171 | Binary Floating-point Arithmetic. |
1181 | ------------------------------------------------------------------------------- | 1172 | ------------------------------------------------------------------------------- |
1182 | */ | 1173 | */ |
1183 | float32 float32_add( float32 a, float32 b ) | 1174 | float32 float32_add( struct roundingData *roundData, float32 a, float32 b ) |
1184 | { | 1175 | { |
1185 | flag aSign, bSign; | 1176 | flag aSign, bSign; |
1186 | 1177 | ||
1187 | aSign = extractFloat32Sign( a ); | 1178 | aSign = extractFloat32Sign( a ); |
1188 | bSign = extractFloat32Sign( b ); | 1179 | bSign = extractFloat32Sign( b ); |
1189 | if ( aSign == bSign ) { | 1180 | if ( aSign == bSign ) { |
1190 | return addFloat32Sigs( a, b, aSign ); | 1181 | return addFloat32Sigs( roundData, a, b, aSign ); |
1191 | } | 1182 | } |
1192 | else { | 1183 | else { |
1193 | return subFloat32Sigs( a, b, aSign ); | 1184 | return subFloat32Sigs( roundData, a, b, aSign ); |
1194 | } | 1185 | } |
1195 | 1186 | ||
1196 | } | 1187 | } |
@@ -1202,17 +1193,17 @@ Returns the result of subtracting the single-precision floating-point values | |||
1202 | for Binary Floating-point Arithmetic. | 1193 | for Binary Floating-point Arithmetic. |
1203 | ------------------------------------------------------------------------------- | 1194 | ------------------------------------------------------------------------------- |
1204 | */ | 1195 | */ |
1205 | float32 float32_sub( float32 a, float32 b ) | 1196 | float32 float32_sub( struct roundingData *roundData, float32 a, float32 b ) |
1206 | { | 1197 | { |
1207 | flag aSign, bSign; | 1198 | flag aSign, bSign; |
1208 | 1199 | ||
1209 | aSign = extractFloat32Sign( a ); | 1200 | aSign = extractFloat32Sign( a ); |
1210 | bSign = extractFloat32Sign( b ); | 1201 | bSign = extractFloat32Sign( b ); |
1211 | if ( aSign == bSign ) { | 1202 | if ( aSign == bSign ) { |
1212 | return subFloat32Sigs( a, b, aSign ); | 1203 | return subFloat32Sigs( roundData, a, b, aSign ); |
1213 | } | 1204 | } |
1214 | else { | 1205 | else { |
1215 | return addFloat32Sigs( a, b, aSign ); | 1206 | return addFloat32Sigs( roundData, a, b, aSign ); |
1216 | } | 1207 | } |
1217 | 1208 | ||
1218 | } | 1209 | } |
@@ -1224,7 +1215,7 @@ Returns the result of multiplying the single-precision floating-point values | |||
1224 | for Binary Floating-point Arithmetic. | 1215 | for Binary Floating-point Arithmetic. |
1225 | ------------------------------------------------------------------------------- | 1216 | ------------------------------------------------------------------------------- |
1226 | */ | 1217 | */ |
1227 | float32 float32_mul( float32 a, float32 b ) | 1218 | float32 float32_mul( struct roundingData *roundData, float32 a, float32 b ) |
1228 | { | 1219 | { |
1229 | flag aSign, bSign, zSign; | 1220 | flag aSign, bSign, zSign; |
1230 | int16 aExp, bExp, zExp; | 1221 | int16 aExp, bExp, zExp; |
@@ -1244,7 +1235,7 @@ float32 float32_mul( float32 a, float32 b ) | |||
1244 | return propagateFloat32NaN( a, b ); | 1235 | return propagateFloat32NaN( a, b ); |
1245 | } | 1236 | } |
1246 | if ( ( bExp | bSig ) == 0 ) { | 1237 | if ( ( bExp | bSig ) == 0 ) { |
1247 | float_raise( float_flag_invalid ); | 1238 | roundData->exception |= float_flag_invalid; |
1248 | return float32_default_nan; | 1239 | return float32_default_nan; |
1249 | } | 1240 | } |
1250 | return packFloat32( zSign, 0xFF, 0 ); | 1241 | return packFloat32( zSign, 0xFF, 0 ); |
@@ -1252,7 +1243,7 @@ float32 float32_mul( float32 a, float32 b ) | |||
1252 | if ( bExp == 0xFF ) { | 1243 | if ( bExp == 0xFF ) { |
1253 | if ( bSig ) return propagateFloat32NaN( a, b ); | 1244 | if ( bSig ) return propagateFloat32NaN( a, b ); |
1254 | if ( ( aExp | aSig ) == 0 ) { | 1245 | if ( ( aExp | aSig ) == 0 ) { |
1255 | float_raise( float_flag_invalid ); | 1246 | roundData->exception |= float_flag_invalid; |
1256 | return float32_default_nan; | 1247 | return float32_default_nan; |
1257 | } | 1248 | } |
1258 | return packFloat32( zSign, 0xFF, 0 ); | 1249 | return packFloat32( zSign, 0xFF, 0 ); |
@@ -1274,7 +1265,7 @@ float32 float32_mul( float32 a, float32 b ) | |||
1274 | zSig <<= 1; | 1265 | zSig <<= 1; |
1275 | --zExp; | 1266 | --zExp; |
1276 | } | 1267 | } |
1277 | return roundAndPackFloat32( zSign, zExp, zSig ); | 1268 | return roundAndPackFloat32( roundData, zSign, zExp, zSig ); |
1278 | 1269 | ||
1279 | } | 1270 | } |
1280 | 1271 | ||
@@ -1285,7 +1276,7 @@ by the corresponding value `b'. The operation is performed according to the | |||
1285 | IEC/IEEE Standard for Binary Floating-point Arithmetic. | 1276 | IEC/IEEE Standard for Binary Floating-point Arithmetic. |
1286 | ------------------------------------------------------------------------------- | 1277 | ------------------------------------------------------------------------------- |
1287 | */ | 1278 | */ |
1288 | float32 float32_div( float32 a, float32 b ) | 1279 | float32 float32_div( struct roundingData *roundData, float32 a, float32 b ) |
1289 | { | 1280 | { |
1290 | flag aSign, bSign, zSign; | 1281 | flag aSign, bSign, zSign; |
1291 | int16 aExp, bExp, zExp; | 1282 | int16 aExp, bExp, zExp; |
@@ -1302,7 +1293,7 @@ float32 float32_div( float32 a, float32 b ) | |||
1302 | if ( aSig ) return propagateFloat32NaN( a, b ); | 1293 | if ( aSig ) return propagateFloat32NaN( a, b ); |
1303 | if ( bExp == 0xFF ) { | 1294 | if ( bExp == 0xFF ) { |
1304 | if ( bSig ) return propagateFloat32NaN( a, b ); | 1295 | if ( bSig ) return propagateFloat32NaN( a, b ); |
1305 | float_raise( float_flag_invalid ); | 1296 | roundData->exception |= float_flag_invalid; |
1306 | return float32_default_nan; | 1297 | return float32_default_nan; |
1307 | } | 1298 | } |
1308 | return packFloat32( zSign, 0xFF, 0 ); | 1299 | return packFloat32( zSign, 0xFF, 0 ); |
@@ -1314,10 +1305,10 @@ float32 float32_div( float32 a, float32 b ) | |||
1314 | if ( bExp == 0 ) { | 1305 | if ( bExp == 0 ) { |
1315 | if ( bSig == 0 ) { | 1306 | if ( bSig == 0 ) { |
1316 | if ( ( aExp | aSig ) == 0 ) { | 1307 | if ( ( aExp | aSig ) == 0 ) { |
1317 | float_raise( float_flag_invalid ); | 1308 | roundData->exception |= float_flag_invalid; |
1318 | return float32_default_nan; | 1309 | return float32_default_nan; |
1319 | } | 1310 | } |
1320 | float_raise( float_flag_divbyzero ); | 1311 | roundData->exception |= float_flag_divbyzero; |
1321 | return packFloat32( zSign, 0xFF, 0 ); | 1312 | return packFloat32( zSign, 0xFF, 0 ); |
1322 | } | 1313 | } |
1323 | normalizeFloat32Subnormal( bSig, &bExp, &bSig ); | 1314 | normalizeFloat32Subnormal( bSig, &bExp, &bSig ); |
@@ -1341,7 +1332,7 @@ float32 float32_div( float32 a, float32 b ) | |||
1341 | if ( ( zSig & 0x3F ) == 0 ) { | 1332 | if ( ( zSig & 0x3F ) == 0 ) { |
1342 | zSig |= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<32 ); | 1333 | zSig |= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<32 ); |
1343 | } | 1334 | } |
1344 | return roundAndPackFloat32( zSign, zExp, zSig ); | 1335 | return roundAndPackFloat32( roundData, zSign, zExp, zSig ); |
1345 | 1336 | ||
1346 | } | 1337 | } |
1347 | 1338 | ||
@@ -1352,7 +1343,7 @@ with respect to the corresponding value `b'. The operation is performed | |||
1352 | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. | 1343 | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. |
1353 | ------------------------------------------------------------------------------- | 1344 | ------------------------------------------------------------------------------- |
1354 | */ | 1345 | */ |
1355 | float32 float32_rem( float32 a, float32 b ) | 1346 | float32 float32_rem( struct roundingData *roundData, float32 a, float32 b ) |
1356 | { | 1347 | { |
1357 | flag aSign, bSign, zSign; | 1348 | flag aSign, bSign, zSign; |
1358 | int16 aExp, bExp, expDiff; | 1349 | int16 aExp, bExp, expDiff; |
@@ -1372,7 +1363,7 @@ float32 float32_rem( float32 a, float32 b ) | |||
1372 | if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { | 1363 | if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { |
1373 | return propagateFloat32NaN( a, b ); | 1364 | return propagateFloat32NaN( a, b ); |
1374 | } | 1365 | } |
1375 | float_raise( float_flag_invalid ); | 1366 | roundData->exception |= float_flag_invalid; |
1376 | return float32_default_nan; | 1367 | return float32_default_nan; |
1377 | } | 1368 | } |
1378 | if ( bExp == 0xFF ) { | 1369 | if ( bExp == 0xFF ) { |
@@ -1381,7 +1372,7 @@ float32 float32_rem( float32 a, float32 b ) | |||
1381 | } | 1372 | } |
1382 | if ( bExp == 0 ) { | 1373 | if ( bExp == 0 ) { |
1383 | if ( bSig == 0 ) { | 1374 | if ( bSig == 0 ) { |
1384 | float_raise( float_flag_invalid ); | 1375 | roundData->exception |= float_flag_invalid; |
1385 | return float32_default_nan; | 1376 | return float32_default_nan; |
1386 | } | 1377 | } |
1387 | normalizeFloat32Subnormal( bSig, &bExp, &bSig ); | 1378 | normalizeFloat32Subnormal( bSig, &bExp, &bSig ); |
@@ -1444,7 +1435,7 @@ float32 float32_rem( float32 a, float32 b ) | |||
1444 | } | 1435 | } |
1445 | zSign = ( (sbits32) aSig < 0 ); | 1436 | zSign = ( (sbits32) aSig < 0 ); |
1446 | if ( zSign ) aSig = - aSig; | 1437 | if ( zSign ) aSig = - aSig; |
1447 | return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig ); | 1438 | return normalizeRoundAndPackFloat32( roundData, aSign ^ zSign, bExp, aSig ); |
1448 | 1439 | ||
1449 | } | 1440 | } |
1450 | 1441 | ||
@@ -1455,7 +1446,7 @@ The operation is performed according to the IEC/IEEE Standard for Binary | |||
1455 | Floating-point Arithmetic. | 1446 | Floating-point Arithmetic. |
1456 | ------------------------------------------------------------------------------- | 1447 | ------------------------------------------------------------------------------- |
1457 | */ | 1448 | */ |
1458 | float32 float32_sqrt( float32 a ) | 1449 | float32 float32_sqrt( struct roundingData *roundData, float32 a ) |
1459 | { | 1450 | { |
1460 | flag aSign; | 1451 | flag aSign; |
1461 | int16 aExp, zExp; | 1452 | int16 aExp, zExp; |
@@ -1468,12 +1459,12 @@ float32 float32_sqrt( float32 a ) | |||
1468 | if ( aExp == 0xFF ) { | 1459 | if ( aExp == 0xFF ) { |
1469 | if ( aSig ) return propagateFloat32NaN( a, 0 ); | 1460 | if ( aSig ) return propagateFloat32NaN( a, 0 ); |
1470 | if ( ! aSign ) return a; | 1461 | if ( ! aSign ) return a; |
1471 | float_raise( float_flag_invalid ); | 1462 | roundData->exception |= float_flag_invalid; |
1472 | return float32_default_nan; | 1463 | return float32_default_nan; |
1473 | } | 1464 | } |
1474 | if ( aSign ) { | 1465 | if ( aSign ) { |
1475 | if ( ( aExp | aSig ) == 0 ) return a; | 1466 | if ( ( aExp | aSig ) == 0 ) return a; |
1476 | float_raise( float_flag_invalid ); | 1467 | roundData->exception |= float_flag_invalid; |
1477 | return float32_default_nan; | 1468 | return float32_default_nan; |
1478 | } | 1469 | } |
1479 | if ( aExp == 0 ) { | 1470 | if ( aExp == 0 ) { |
@@ -1499,7 +1490,7 @@ float32 float32_sqrt( float32 a ) | |||
1499 | } | 1490 | } |
1500 | } | 1491 | } |
1501 | shift32RightJamming( zSig, 1, &zSig ); | 1492 | shift32RightJamming( zSig, 1, &zSig ); |
1502 | return roundAndPackFloat32( 0, zExp, zSig ); | 1493 | return roundAndPackFloat32( roundData, 0, zExp, zSig ); |
1503 | 1494 | ||
1504 | } | 1495 | } |
1505 | 1496 | ||
@@ -1661,7 +1652,7 @@ positive integer is returned. Otherwise, if the conversion overflows, the | |||
1661 | largest integer with the same sign as `a' is returned. | 1652 | largest integer with the same sign as `a' is returned. |
1662 | ------------------------------------------------------------------------------- | 1653 | ------------------------------------------------------------------------------- |
1663 | */ | 1654 | */ |
1664 | int32 float64_to_int32( float64 a ) | 1655 | int32 float64_to_int32( struct roundingData *roundData, float64 a ) |
1665 | { | 1656 | { |
1666 | flag aSign; | 1657 | flag aSign; |
1667 | int16 aExp, shiftCount; | 1658 | int16 aExp, shiftCount; |
@@ -1674,7 +1665,7 @@ int32 float64_to_int32( float64 a ) | |||
1674 | if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); | 1665 | if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); |
1675 | shiftCount = 0x42C - aExp; | 1666 | shiftCount = 0x42C - aExp; |
1676 | if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); | 1667 | if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); |
1677 | return roundAndPackInt32( aSign, aSig ); | 1668 | return roundAndPackInt32( roundData, aSign, aSig ); |
1678 | 1669 | ||
1679 | } | 1670 | } |
1680 | 1671 | ||
@@ -1705,7 +1696,7 @@ int32 float64_to_int32_round_to_zero( float64 a ) | |||
1705 | goto invalid; | 1696 | goto invalid; |
1706 | } | 1697 | } |
1707 | else if ( 52 < shiftCount ) { | 1698 | else if ( 52 < shiftCount ) { |
1708 | if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; | 1699 | if ( aExp || aSig ) float_raise( float_flag_inexact ); |
1709 | return 0; | 1700 | return 0; |
1710 | } | 1701 | } |
1711 | aSig |= LIT64( 0x0010000000000000 ); | 1702 | aSig |= LIT64( 0x0010000000000000 ); |
@@ -1715,11 +1706,11 @@ int32 float64_to_int32_round_to_zero( float64 a ) | |||
1715 | if ( aSign ) z = - z; | 1706 | if ( aSign ) z = - z; |
1716 | if ( ( z < 0 ) ^ aSign ) { | 1707 | if ( ( z < 0 ) ^ aSign ) { |
1717 | invalid: | 1708 | invalid: |
1718 | float_exception_flags |= float_flag_invalid; | 1709 | float_raise( float_flag_invalid ); |
1719 | return aSign ? 0x80000000 : 0x7FFFFFFF; | 1710 | return aSign ? 0x80000000 : 0x7FFFFFFF; |
1720 | } | 1711 | } |
1721 | if ( ( aSig<<shiftCount ) != savedASig ) { | 1712 | if ( ( aSig<<shiftCount ) != savedASig ) { |
1722 | float_exception_flags |= float_flag_inexact; | 1713 | float_raise( float_flag_inexact ); |
1723 | } | 1714 | } |
1724 | return z; | 1715 | return z; |
1725 | 1716 | ||
@@ -1736,7 +1727,7 @@ positive integer is returned. Otherwise, if the conversion overflows, the | |||
1736 | largest positive integer is returned. | 1727 | largest positive integer is returned. |
1737 | ------------------------------------------------------------------------------- | 1728 | ------------------------------------------------------------------------------- |
1738 | */ | 1729 | */ |
1739 | int32 float64_to_uint32( float64 a ) | 1730 | int32 float64_to_uint32( struct roundingData *roundData, float64 a ) |
1740 | { | 1731 | { |
1741 | flag aSign; | 1732 | flag aSign; |
1742 | int16 aExp, shiftCount; | 1733 | int16 aExp, shiftCount; |
@@ -1749,7 +1740,7 @@ int32 float64_to_uint32( float64 a ) | |||
1749 | if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); | 1740 | if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); |
1750 | shiftCount = 0x42C - aExp; | 1741 | shiftCount = 0x42C - aExp; |
1751 | if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); | 1742 | if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); |
1752 | return roundAndPackInt32( aSign, aSig ); | 1743 | return roundAndPackInt32( roundData, aSign, aSig ); |
1753 | } | 1744 | } |
1754 | 1745 | ||
1755 | /* | 1746 | /* |
@@ -1778,7 +1769,7 @@ int32 float64_to_uint32_round_to_zero( float64 a ) | |||
1778 | goto invalid; | 1769 | goto invalid; |
1779 | } | 1770 | } |
1780 | else if ( 52 < shiftCount ) { | 1771 | else if ( 52 < shiftCount ) { |
1781 | if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; | 1772 | if ( aExp || aSig ) float_raise( float_flag_inexact ); |
1782 | return 0; | 1773 | return 0; |
1783 | } | 1774 | } |
1784 | aSig |= LIT64( 0x0010000000000000 ); | 1775 | aSig |= LIT64( 0x0010000000000000 ); |
@@ -1788,11 +1779,11 @@ int32 float64_to_uint32_round_to_zero( float64 a ) | |||
1788 | if ( aSign ) z = - z; | 1779 | if ( aSign ) z = - z; |
1789 | if ( ( z < 0 ) ^ aSign ) { | 1780 | if ( ( z < 0 ) ^ aSign ) { |
1790 | invalid: | 1781 | invalid: |
1791 | float_exception_flags |= float_flag_invalid; | 1782 | float_raise( float_flag_invalid ); |
1792 | return aSign ? 0x80000000 : 0x7FFFFFFF; | 1783 | return aSign ? 0x80000000 : 0x7FFFFFFF; |
1793 | } | 1784 | } |
1794 | if ( ( aSig<<shiftCount ) != savedASig ) { | 1785 | if ( ( aSig<<shiftCount ) != savedASig ) { |
1795 | float_exception_flags |= float_flag_inexact; | 1786 | float_raise( float_flag_inexact ); |
1796 | } | 1787 | } |
1797 | return z; | 1788 | return z; |
1798 | } | 1789 | } |
@@ -1805,7 +1796,7 @@ performed according to the IEC/IEEE Standard for Binary Floating-point | |||
1805 | Arithmetic. | 1796 | Arithmetic. |
1806 | ------------------------------------------------------------------------------- | 1797 | ------------------------------------------------------------------------------- |
1807 | */ | 1798 | */ |
1808 | float32 float64_to_float32( float64 a ) | 1799 | float32 float64_to_float32( struct roundingData *roundData, float64 a ) |
1809 | { | 1800 | { |
1810 | flag aSign; | 1801 | flag aSign; |
1811 | int16 aExp; | 1802 | int16 aExp; |
@@ -1825,7 +1816,7 @@ float32 float64_to_float32( float64 a ) | |||
1825 | zSig |= 0x40000000; | 1816 | zSig |= 0x40000000; |
1826 | aExp -= 0x381; | 1817 | aExp -= 0x381; |
1827 | } | 1818 | } |
1828 | return roundAndPackFloat32( aSign, aExp, zSig ); | 1819 | return roundAndPackFloat32( roundData, aSign, aExp, zSig ); |
1829 | 1820 | ||
1830 | } | 1821 | } |
1831 | 1822 | ||
@@ -1872,7 +1863,7 @@ operation is performed according to the IEC/IEEE Standard for Binary | |||
1872 | Floating-point Arithmetic. | 1863 | Floating-point Arithmetic. |
1873 | ------------------------------------------------------------------------------- | 1864 | ------------------------------------------------------------------------------- |
1874 | */ | 1865 | */ |
1875 | float64 float64_round_to_int( float64 a ) | 1866 | float64 float64_round_to_int( struct roundingData *roundData, float64 a ) |
1876 | { | 1867 | { |
1877 | flag aSign; | 1868 | flag aSign; |
1878 | int16 aExp; | 1869 | int16 aExp; |
@@ -1889,9 +1880,9 @@ float64 float64_round_to_int( float64 a ) | |||
1889 | } | 1880 | } |
1890 | if ( aExp <= 0x3FE ) { | 1881 | if ( aExp <= 0x3FE ) { |
1891 | if ( (bits64) ( a<<1 ) == 0 ) return a; | 1882 | if ( (bits64) ( a<<1 ) == 0 ) return a; |
1892 | float_exception_flags |= float_flag_inexact; | 1883 | roundData->exception |= float_flag_inexact; |
1893 | aSign = extractFloat64Sign( a ); | 1884 | aSign = extractFloat64Sign( a ); |
1894 | switch ( float_rounding_mode ) { | 1885 | switch ( roundData->mode ) { |
1895 | case float_round_nearest_even: | 1886 | case float_round_nearest_even: |
1896 | if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) { | 1887 | if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) { |
1897 | return packFloat64( aSign, 0x3FF, 0 ); | 1888 | return packFloat64( aSign, 0x3FF, 0 ); |
@@ -1909,7 +1900,7 @@ float64 float64_round_to_int( float64 a ) | |||
1909 | lastBitMask <<= 0x433 - aExp; | 1900 | lastBitMask <<= 0x433 - aExp; |
1910 | roundBitsMask = lastBitMask - 1; | 1901 | roundBitsMask = lastBitMask - 1; |
1911 | z = a; | 1902 | z = a; |
1912 | roundingMode = float_rounding_mode; | 1903 | roundingMode = roundData->mode; |
1913 | if ( roundingMode == float_round_nearest_even ) { | 1904 | if ( roundingMode == float_round_nearest_even ) { |
1914 | z += lastBitMask>>1; | 1905 | z += lastBitMask>>1; |
1915 | if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; | 1906 | if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; |
@@ -1920,7 +1911,7 @@ float64 float64_round_to_int( float64 a ) | |||
1920 | } | 1911 | } |
1921 | } | 1912 | } |
1922 | z &= ~ roundBitsMask; | 1913 | z &= ~ roundBitsMask; |
1923 | if ( z != a ) float_exception_flags |= float_flag_inexact; | 1914 | if ( z != a ) roundData->exception |= float_flag_inexact; |
1924 | return z; | 1915 | return z; |
1925 | 1916 | ||
1926 | } | 1917 | } |
@@ -1934,7 +1925,7 @@ addition is performed according to the IEC/IEEE Standard for Binary | |||
1934 | Floating-point Arithmetic. | 1925 | Floating-point Arithmetic. |
1935 | ------------------------------------------------------------------------------- | 1926 | ------------------------------------------------------------------------------- |
1936 | */ | 1927 | */ |
1937 | static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) | 1928 | static float64 addFloat64Sigs( struct roundingData *roundData, float64 a, float64 b, flag zSign ) |
1938 | { | 1929 | { |
1939 | int16 aExp, bExp, zExp; | 1930 | int16 aExp, bExp, zExp; |
1940 | bits64 aSig, bSig, zSig; | 1931 | bits64 aSig, bSig, zSig; |
@@ -1993,7 +1984,7 @@ static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) | |||
1993 | ++zExp; | 1984 | ++zExp; |
1994 | } | 1985 | } |
1995 | roundAndPack: | 1986 | roundAndPack: |
1996 | return roundAndPackFloat64( zSign, zExp, zSig ); | 1987 | return roundAndPackFloat64( roundData, zSign, zExp, zSig ); |
1997 | 1988 | ||
1998 | } | 1989 | } |
1999 | 1990 | ||
@@ -2006,7 +1997,7 @@ result is a NaN. The subtraction is performed according to the IEC/IEEE | |||
2006 | Standard for Binary Floating-point Arithmetic. | 1997 | Standard for Binary Floating-point Arithmetic. |
2007 | ------------------------------------------------------------------------------- | 1998 | ------------------------------------------------------------------------------- |
2008 | */ | 1999 | */ |
2009 | static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) | 2000 | static float64 subFloat64Sigs( struct roundingData *roundData, float64 a, float64 b, flag zSign ) |
2010 | { | 2001 | { |
2011 | int16 aExp, bExp, zExp; | 2002 | int16 aExp, bExp, zExp; |
2012 | bits64 aSig, bSig, zSig; | 2003 | bits64 aSig, bSig, zSig; |
@@ -2023,7 +2014,7 @@ static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) | |||
2023 | if ( expDiff < 0 ) goto bExpBigger; | 2014 | if ( expDiff < 0 ) goto bExpBigger; |
2024 | if ( aExp == 0x7FF ) { | 2015 | if ( aExp == 0x7FF ) { |
2025 | if ( aSig | bSig ) return propagateFloat64NaN( a, b ); | 2016 | if ( aSig | bSig ) return propagateFloat64NaN( a, b ); |
2026 | float_raise( float_flag_invalid ); | 2017 | roundData->exception |= float_flag_invalid; |
2027 | return float64_default_nan; | 2018 | return float64_default_nan; |
2028 | } | 2019 | } |
2029 | if ( aExp == 0 ) { | 2020 | if ( aExp == 0 ) { |
@@ -2032,7 +2023,7 @@ static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) | |||
2032 | } | 2023 | } |
2033 | if ( bSig < aSig ) goto aBigger; | 2024 | if ( bSig < aSig ) goto aBigger; |
2034 | if ( aSig < bSig ) goto bBigger; | 2025 | if ( aSig < bSig ) goto bBigger; |
2035 | return packFloat64( float_rounding_mode == float_round_down, 0, 0 ); | 2026 | return packFloat64( roundData->mode == float_round_down, 0, 0 ); |
2036 | bExpBigger: | 2027 | bExpBigger: |
2037 | if ( bExp == 0x7FF ) { | 2028 | if ( bExp == 0x7FF ) { |
2038 | if ( bSig ) return propagateFloat64NaN( a, b ); | 2029 | if ( bSig ) return propagateFloat64NaN( a, b ); |
@@ -2069,7 +2060,7 @@ static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) | |||
2069 | zExp = aExp; | 2060 | zExp = aExp; |
2070 | normalizeRoundAndPack: | 2061 | normalizeRoundAndPack: |
2071 | --zExp; | 2062 | --zExp; |
2072 | return normalizeRoundAndPackFloat64( zSign, zExp, zSig ); | 2063 | return normalizeRoundAndPackFloat64( roundData, zSign, zExp, zSig ); |
2073 | 2064 | ||
2074 | } | 2065 | } |
2075 | 2066 | ||
@@ -2080,17 +2071,17 @@ and `b'. The operation is performed according to the IEC/IEEE Standard for | |||
2080 | Binary Floating-point Arithmetic. | 2071 | Binary Floating-point Arithmetic. |
2081 | ------------------------------------------------------------------------------- | 2072 | ------------------------------------------------------------------------------- |
2082 | */ | 2073 | */ |
2083 | float64 float64_add( float64 a, float64 b ) | 2074 | float64 float64_add( struct roundingData *roundData, float64 a, float64 b ) |
2084 | { | 2075 | { |
2085 | flag aSign, bSign; | 2076 | flag aSign, bSign; |
2086 | 2077 | ||
2087 | aSign = extractFloat64Sign( a ); | 2078 | aSign = extractFloat64Sign( a ); |
2088 | bSign = extractFloat64Sign( b ); | 2079 | bSign = extractFloat64Sign( b ); |
2089 | if ( aSign == bSign ) { | 2080 | if ( aSign == bSign ) { |
2090 | return addFloat64Sigs( a, b, aSign ); | 2081 | return addFloat64Sigs( roundData, a, b, aSign ); |
2091 | } | 2082 | } |
2092 | else { | 2083 | else { |
2093 | return subFloat64Sigs( a, b, aSign ); | 2084 | return subFloat64Sigs( roundData, a, b, aSign ); |
2094 | } | 2085 | } |
2095 | 2086 | ||
2096 | } | 2087 | } |
@@ -2102,17 +2093,17 @@ Returns the result of subtracting the double-precision floating-point values | |||
2102 | for Binary Floating-point Arithmetic. | 2093 | for Binary Floating-point Arithmetic. |
2103 | ------------------------------------------------------------------------------- | 2094 | ------------------------------------------------------------------------------- |
2104 | */ | 2095 | */ |
2105 | float64 float64_sub( float64 a, float64 b ) | 2096 | float64 float64_sub( struct roundingData *roundData, float64 a, float64 b ) |
2106 | { | 2097 | { |
2107 | flag aSign, bSign; | 2098 | flag aSign, bSign; |
2108 | 2099 | ||
2109 | aSign = extractFloat64Sign( a ); | 2100 | aSign = extractFloat64Sign( a ); |
2110 | bSign = extractFloat64Sign( b ); | 2101 | bSign = extractFloat64Sign( b ); |
2111 | if ( aSign == bSign ) { | 2102 | if ( aSign == bSign ) { |
2112 | return subFloat64Sigs( a, b, aSign ); | 2103 | return subFloat64Sigs( roundData, a, b, aSign ); |
2113 | } | 2104 | } |
2114 | else { | 2105 | else { |
2115 | return addFloat64Sigs( a, b, aSign ); | 2106 | return addFloat64Sigs( roundData, a, b, aSign ); |
2116 | } | 2107 | } |
2117 | 2108 | ||
2118 | } | 2109 | } |
@@ -2124,7 +2115,7 @@ Returns the result of multiplying the double-precision floating-point values | |||
2124 | for Binary Floating-point Arithmetic. | 2115 | for Binary Floating-point Arithmetic. |
2125 | ------------------------------------------------------------------------------- | 2116 | ------------------------------------------------------------------------------- |
2126 | */ | 2117 | */ |
2127 | float64 float64_mul( float64 a, float64 b ) | 2118 | float64 float64_mul( struct roundingData *roundData, float64 a, float64 b ) |
2128 | { | 2119 | { |
2129 | flag aSign, bSign, zSign; | 2120 | flag aSign, bSign, zSign; |
2130 | int16 aExp, bExp, zExp; | 2121 | int16 aExp, bExp, zExp; |
@@ -2142,7 +2133,7 @@ float64 float64_mul( float64 a, float64 b ) | |||
2142 | return propagateFloat64NaN( a, b ); | 2133 | return propagateFloat64NaN( a, b ); |
2143 | } | 2134 | } |
2144 | if ( ( bExp | bSig ) == 0 ) { | 2135 | if ( ( bExp | bSig ) == 0 ) { |
2145 | float_raise( float_flag_invalid ); | 2136 | roundData->exception |= float_flag_invalid; |
2146 | return float64_default_nan; | 2137 | return float64_default_nan; |
2147 | } | 2138 | } |
2148 | return packFloat64( zSign, 0x7FF, 0 ); | 2139 | return packFloat64( zSign, 0x7FF, 0 ); |
@@ -2150,7 +2141,7 @@ float64 float64_mul( float64 a, float64 b ) | |||
2150 | if ( bExp == 0x7FF ) { | 2141 | if ( bExp == 0x7FF ) { |
2151 | if ( bSig ) return propagateFloat64NaN( a, b ); | 2142 | if ( bSig ) return propagateFloat64NaN( a, b ); |
2152 | if ( ( aExp | aSig ) == 0 ) { | 2143 | if ( ( aExp | aSig ) == 0 ) { |
2153 | float_raise( float_flag_invalid ); | 2144 | roundData->exception |= float_flag_invalid; |
2154 | return float64_default_nan; | 2145 | return float64_default_nan; |
2155 | } | 2146 | } |
2156 | return packFloat64( zSign, 0x7FF, 0 ); | 2147 | return packFloat64( zSign, 0x7FF, 0 ); |
@@ -2172,7 +2163,7 @@ float64 float64_mul( float64 a, float64 b ) | |||
2172 | zSig0 <<= 1; | 2163 | zSig0 <<= 1; |
2173 | --zExp; | 2164 | --zExp; |
2174 | } | 2165 | } |
2175 | return roundAndPackFloat64( zSign, zExp, zSig0 ); | 2166 | return roundAndPackFloat64( roundData, zSign, zExp, zSig0 ); |
2176 | 2167 | ||
2177 | } | 2168 | } |
2178 | 2169 | ||
@@ -2183,7 +2174,7 @@ by the corresponding value `b'. The operation is performed according to | |||
2183 | the IEC/IEEE Standard for Binary Floating-point Arithmetic. | 2174 | the IEC/IEEE Standard for Binary Floating-point Arithmetic. |
2184 | ------------------------------------------------------------------------------- | 2175 | ------------------------------------------------------------------------------- |
2185 | */ | 2176 | */ |
2186 | float64 float64_div( float64 a, float64 b ) | 2177 | float64 float64_div( struct roundingData *roundData, float64 a, float64 b ) |
2187 | { | 2178 | { |
2188 | flag aSign, bSign, zSign; | 2179 | flag aSign, bSign, zSign; |
2189 | int16 aExp, bExp, zExp; | 2180 | int16 aExp, bExp, zExp; |
@@ -2202,7 +2193,7 @@ float64 float64_div( float64 a, float64 b ) | |||
2202 | if ( aSig ) return propagateFloat64NaN( a, b ); | 2193 | if ( aSig ) return propagateFloat64NaN( a, b ); |
2203 | if ( bExp == 0x7FF ) { | 2194 | if ( bExp == 0x7FF ) { |
2204 | if ( bSig ) return propagateFloat64NaN( a, b ); | 2195 | if ( bSig ) return propagateFloat64NaN( a, b ); |
2205 | float_raise( float_flag_invalid ); | 2196 | roundData->exception |= float_flag_invalid; |
2206 | return float64_default_nan; | 2197 | return float64_default_nan; |
2207 | } | 2198 | } |
2208 | return packFloat64( zSign, 0x7FF, 0 ); | 2199 | return packFloat64( zSign, 0x7FF, 0 ); |
@@ -2214,10 +2205,10 @@ float64 float64_div( float64 a, float64 b ) | |||
2214 | if ( bExp == 0 ) { | 2205 | if ( bExp == 0 ) { |
2215 | if ( bSig == 0 ) { | 2206 | if ( bSig == 0 ) { |
2216 | if ( ( aExp | aSig ) == 0 ) { | 2207 | if ( ( aExp | aSig ) == 0 ) { |
2217 | float_raise( float_flag_invalid ); | 2208 | roundData->exception |= float_flag_invalid; |
2218 | return float64_default_nan; | 2209 | return float64_default_nan; |
2219 | } | 2210 | } |
2220 | float_raise( float_flag_divbyzero ); | 2211 | roundData->exception |= float_flag_divbyzero; |
2221 | return packFloat64( zSign, 0x7FF, 0 ); | 2212 | return packFloat64( zSign, 0x7FF, 0 ); |
2222 | } | 2213 | } |
2223 | normalizeFloat64Subnormal( bSig, &bExp, &bSig ); | 2214 | normalizeFloat64Subnormal( bSig, &bExp, &bSig ); |
@@ -2243,7 +2234,7 @@ float64 float64_div( float64 a, float64 b ) | |||
2243 | } | 2234 | } |
2244 | zSig |= ( rem1 != 0 ); | 2235 | zSig |= ( rem1 != 0 ); |
2245 | } | 2236 | } |
2246 | return roundAndPackFloat64( zSign, zExp, zSig ); | 2237 | return roundAndPackFloat64( roundData, zSign, zExp, zSig ); |
2247 | 2238 | ||
2248 | } | 2239 | } |
2249 | 2240 | ||
@@ -2254,7 +2245,7 @@ with respect to the corresponding value `b'. The operation is performed | |||
2254 | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. | 2245 | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. |
2255 | ------------------------------------------------------------------------------- | 2246 | ------------------------------------------------------------------------------- |
2256 | */ | 2247 | */ |
2257 | float64 float64_rem( float64 a, float64 b ) | 2248 | float64 float64_rem( struct roundingData *roundData, float64 a, float64 b ) |
2258 | { | 2249 | { |
2259 | flag aSign, bSign, zSign; | 2250 | flag aSign, bSign, zSign; |
2260 | int16 aExp, bExp, expDiff; | 2251 | int16 aExp, bExp, expDiff; |
@@ -2272,7 +2263,7 @@ float64 float64_rem( float64 a, float64 b ) | |||
2272 | if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { | 2263 | if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { |
2273 | return propagateFloat64NaN( a, b ); | 2264 | return propagateFloat64NaN( a, b ); |
2274 | } | 2265 | } |
2275 | float_raise( float_flag_invalid ); | 2266 | roundData->exception |= float_flag_invalid; |
2276 | return float64_default_nan; | 2267 | return float64_default_nan; |
2277 | } | 2268 | } |
2278 | if ( bExp == 0x7FF ) { | 2269 | if ( bExp == 0x7FF ) { |
@@ -2281,7 +2272,7 @@ float64 float64_rem( float64 a, float64 b ) | |||
2281 | } | 2272 | } |
2282 | if ( bExp == 0 ) { | 2273 | if ( bExp == 0 ) { |
2283 | if ( bSig == 0 ) { | 2274 | if ( bSig == 0 ) { |
2284 | float_raise( float_flag_invalid ); | 2275 | roundData->exception |= float_flag_invalid; |
2285 | return float64_default_nan; | 2276 | return float64_default_nan; |
2286 | } | 2277 | } |
2287 | normalizeFloat64Subnormal( bSig, &bExp, &bSig ); | 2278 | normalizeFloat64Subnormal( bSig, &bExp, &bSig ); |
@@ -2329,7 +2320,7 @@ float64 float64_rem( float64 a, float64 b ) | |||
2329 | } | 2320 | } |
2330 | zSign = ( (sbits64) aSig < 0 ); | 2321 | zSign = ( (sbits64) aSig < 0 ); |
2331 | if ( zSign ) aSig = - aSig; | 2322 | if ( zSign ) aSig = - aSig; |
2332 | return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig ); | 2323 | return normalizeRoundAndPackFloat64( roundData, aSign ^ zSign, bExp, aSig ); |
2333 | 2324 | ||
2334 | } | 2325 | } |
2335 | 2326 | ||
@@ -2340,7 +2331,7 @@ The operation is performed according to the IEC/IEEE Standard for Binary | |||
2340 | Floating-point Arithmetic. | 2331 | Floating-point Arithmetic. |
2341 | ------------------------------------------------------------------------------- | 2332 | ------------------------------------------------------------------------------- |
2342 | */ | 2333 | */ |
2343 | float64 float64_sqrt( float64 a ) | 2334 | float64 float64_sqrt( struct roundingData *roundData, float64 a ) |
2344 | { | 2335 | { |
2345 | flag aSign; | 2336 | flag aSign; |
2346 | int16 aExp, zExp; | 2337 | int16 aExp, zExp; |
@@ -2354,12 +2345,12 @@ float64 float64_sqrt( float64 a ) | |||
2354 | if ( aExp == 0x7FF ) { | 2345 | if ( aExp == 0x7FF ) { |
2355 | if ( aSig ) return propagateFloat64NaN( a, a ); | 2346 | if ( aSig ) return propagateFloat64NaN( a, a ); |
2356 | if ( ! aSign ) return a; | 2347 | if ( ! aSign ) return a; |
2357 | float_raise( float_flag_invalid ); | 2348 | roundData->exception |= float_flag_invalid; |
2358 | return float64_default_nan; | 2349 | return float64_default_nan; |
2359 | } | 2350 | } |
2360 | if ( aSign ) { | 2351 | if ( aSign ) { |
2361 | if ( ( aExp | aSig ) == 0 ) return a; | 2352 | if ( ( aExp | aSig ) == 0 ) return a; |
2362 | float_raise( float_flag_invalid ); | 2353 | roundData->exception |= float_flag_invalid; |
2363 | return float64_default_nan; | 2354 | return float64_default_nan; |
2364 | } | 2355 | } |
2365 | if ( aExp == 0 ) { | 2356 | if ( aExp == 0 ) { |
@@ -2390,7 +2381,7 @@ float64 float64_sqrt( float64 a ) | |||
2390 | } | 2381 | } |
2391 | } | 2382 | } |
2392 | shift64RightJamming( zSig, 1, &zSig ); | 2383 | shift64RightJamming( zSig, 1, &zSig ); |
2393 | return roundAndPackFloat64( 0, zExp, zSig ); | 2384 | return roundAndPackFloat64( roundData, 0, zExp, zSig ); |
2394 | 2385 | ||
2395 | } | 2386 | } |
2396 | 2387 | ||
@@ -2554,7 +2545,7 @@ largest positive integer is returned. Otherwise, if the conversion | |||
2554 | overflows, the largest integer with the same sign as `a' is returned. | 2545 | overflows, the largest integer with the same sign as `a' is returned. |
2555 | ------------------------------------------------------------------------------- | 2546 | ------------------------------------------------------------------------------- |
2556 | */ | 2547 | */ |
2557 | int32 floatx80_to_int32( floatx80 a ) | 2548 | int32 floatx80_to_int32( struct roundingData *roundData, floatx80 a ) |
2558 | { | 2549 | { |
2559 | flag aSign; | 2550 | flag aSign; |
2560 | int32 aExp, shiftCount; | 2551 | int32 aExp, shiftCount; |
@@ -2567,7 +2558,7 @@ int32 floatx80_to_int32( floatx80 a ) | |||
2567 | shiftCount = 0x4037 - aExp; | 2558 | shiftCount = 0x4037 - aExp; |
2568 | if ( shiftCount <= 0 ) shiftCount = 1; | 2559 | if ( shiftCount <= 0 ) shiftCount = 1; |
2569 | shift64RightJamming( aSig, shiftCount, &aSig ); | 2560 | shift64RightJamming( aSig, shiftCount, &aSig ); |
2570 | return roundAndPackInt32( aSign, aSig ); | 2561 | return roundAndPackInt32( roundData, aSign, aSig ); |
2571 | 2562 | ||
2572 | } | 2563 | } |
2573 | 2564 | ||
@@ -2598,7 +2589,7 @@ int32 floatx80_to_int32_round_to_zero( floatx80 a ) | |||
2598 | goto invalid; | 2589 | goto invalid; |
2599 | } | 2590 | } |
2600 | else if ( 63 < shiftCount ) { | 2591 | else if ( 63 < shiftCount ) { |
2601 | if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; | 2592 | if ( aExp || aSig ) float_raise( float_flag_inexact ); |
2602 | return 0; | 2593 | return 0; |
2603 | } | 2594 | } |
2604 | savedASig = aSig; | 2595 | savedASig = aSig; |
@@ -2607,11 +2598,11 @@ int32 floatx80_to_int32_round_to_zero( floatx80 a ) | |||
2607 | if ( aSign ) z = - z; | 2598 | if ( aSign ) z = - z; |
2608 | if ( ( z < 0 ) ^ aSign ) { | 2599 | if ( ( z < 0 ) ^ aSign ) { |
2609 | invalid: | 2600 | invalid: |
2610 | float_exception_flags |= float_flag_invalid; | 2601 | float_raise( float_flag_invalid ); |
2611 | return aSign ? 0x80000000 : 0x7FFFFFFF; | 2602 | return aSign ? 0x80000000 : 0x7FFFFFFF; |
2612 | } | 2603 | } |
2613 | if ( ( aSig<<shiftCount ) != savedASig ) { | 2604 | if ( ( aSig<<shiftCount ) != savedASig ) { |
2614 | float_exception_flags |= float_flag_inexact; | 2605 | float_raise( float_flag_inexact ); |
2615 | } | 2606 | } |
2616 | return z; | 2607 | return z; |
2617 | 2608 | ||
@@ -2625,7 +2616,7 @@ conversion is performed according to the IEC/IEEE Standard for Binary | |||
2625 | Floating-point Arithmetic. | 2616 | Floating-point Arithmetic. |
2626 | ------------------------------------------------------------------------------- | 2617 | ------------------------------------------------------------------------------- |
2627 | */ | 2618 | */ |
2628 | float32 floatx80_to_float32( floatx80 a ) | 2619 | float32 floatx80_to_float32( struct roundingData *roundData, floatx80 a ) |
2629 | { | 2620 | { |
2630 | flag aSign; | 2621 | flag aSign; |
2631 | int32 aExp; | 2622 | int32 aExp; |
@@ -2642,7 +2633,7 @@ float32 floatx80_to_float32( floatx80 a ) | |||
2642 | } | 2633 | } |
2643 | shift64RightJamming( aSig, 33, &aSig ); | 2634 | shift64RightJamming( aSig, 33, &aSig ); |
2644 | if ( aExp || aSig ) aExp -= 0x3F81; | 2635 | if ( aExp || aSig ) aExp -= 0x3F81; |
2645 | return roundAndPackFloat32( aSign, aExp, aSig ); | 2636 | return roundAndPackFloat32( roundData, aSign, aExp, aSig ); |
2646 | 2637 | ||
2647 | } | 2638 | } |
2648 | 2639 | ||
@@ -2654,7 +2645,7 @@ conversion is performed according to the IEC/IEEE Standard for Binary | |||
2654 | Floating-point Arithmetic. | 2645 | Floating-point Arithmetic. |
2655 | ------------------------------------------------------------------------------- | 2646 | ------------------------------------------------------------------------------- |
2656 | */ | 2647 | */ |
2657 | float64 floatx80_to_float64( floatx80 a ) | 2648 | float64 floatx80_to_float64( struct roundingData *roundData, floatx80 a ) |
2658 | { | 2649 | { |
2659 | flag aSign; | 2650 | flag aSign; |
2660 | int32 aExp; | 2651 | int32 aExp; |
@@ -2671,7 +2662,7 @@ float64 floatx80_to_float64( floatx80 a ) | |||
2671 | } | 2662 | } |
2672 | shift64RightJamming( aSig, 1, &zSig ); | 2663 | shift64RightJamming( aSig, 1, &zSig ); |
2673 | if ( aExp || aSig ) aExp -= 0x3C01; | 2664 | if ( aExp || aSig ) aExp -= 0x3C01; |
2674 | return roundAndPackFloat64( aSign, aExp, zSig ); | 2665 | return roundAndPackFloat64( roundData, aSign, aExp, zSig ); |
2675 | 2666 | ||
2676 | } | 2667 | } |
2677 | 2668 | ||
@@ -2683,7 +2674,7 @@ value. The operation is performed according to the IEC/IEEE Standard for | |||
2683 | Binary Floating-point Arithmetic. | 2674 | Binary Floating-point Arithmetic. |
2684 | ------------------------------------------------------------------------------- | 2675 | ------------------------------------------------------------------------------- |
2685 | */ | 2676 | */ |
2686 | floatx80 floatx80_round_to_int( floatx80 a ) | 2677 | floatx80 floatx80_round_to_int( struct roundingData *roundData, floatx80 a ) |
2687 | { | 2678 | { |
2688 | flag aSign; | 2679 | flag aSign; |
2689 | int32 aExp; | 2680 | int32 aExp; |
@@ -2703,9 +2694,9 @@ floatx80 floatx80_round_to_int( floatx80 a ) | |||
2703 | && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { | 2694 | && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { |
2704 | return a; | 2695 | return a; |
2705 | } | 2696 | } |
2706 | float_exception_flags |= float_flag_inexact; | 2697 | roundData->exception |= float_flag_inexact; |
2707 | aSign = extractFloatx80Sign( a ); | 2698 | aSign = extractFloatx80Sign( a ); |
2708 | switch ( float_rounding_mode ) { | 2699 | switch ( roundData->mode ) { |
2709 | case float_round_nearest_even: | 2700 | case float_round_nearest_even: |
2710 | if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 ) | 2701 | if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 ) |
2711 | ) { | 2702 | ) { |
@@ -2729,7 +2720,7 @@ floatx80 floatx80_round_to_int( floatx80 a ) | |||
2729 | lastBitMask <<= 0x403E - aExp; | 2720 | lastBitMask <<= 0x403E - aExp; |
2730 | roundBitsMask = lastBitMask - 1; | 2721 | roundBitsMask = lastBitMask - 1; |
2731 | z = a; | 2722 | z = a; |
2732 | roundingMode = float_rounding_mode; | 2723 | roundingMode = roundData->mode; |
2733 | if ( roundingMode == float_round_nearest_even ) { | 2724 | if ( roundingMode == float_round_nearest_even ) { |
2734 | z.low += lastBitMask>>1; | 2725 | z.low += lastBitMask>>1; |
2735 | if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; | 2726 | if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; |
@@ -2744,7 +2735,7 @@ floatx80 floatx80_round_to_int( floatx80 a ) | |||
2744 | ++z.high; | 2735 | ++z.high; |
2745 | z.low = LIT64( 0x8000000000000000 ); | 2736 | z.low = LIT64( 0x8000000000000000 ); |
2746 | } | 2737 | } |
2747 | if ( z.low != a.low ) float_exception_flags |= float_flag_inexact; | 2738 | if ( z.low != a.low ) roundData->exception |= float_flag_inexact; |
2748 | return z; | 2739 | return z; |
2749 | 2740 | ||
2750 | } | 2741 | } |
@@ -2758,7 +2749,7 @@ The addition is performed according to the IEC/IEEE Standard for Binary | |||
2758 | Floating-point Arithmetic. | 2749 | Floating-point Arithmetic. |
2759 | ------------------------------------------------------------------------------- | 2750 | ------------------------------------------------------------------------------- |
2760 | */ | 2751 | */ |
2761 | static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) | 2752 | static floatx80 addFloatx80Sigs( struct roundingData *roundData, floatx80 a, floatx80 b, flag zSign ) |
2762 | { | 2753 | { |
2763 | int32 aExp, bExp, zExp; | 2754 | int32 aExp, bExp, zExp; |
2764 | bits64 aSig, bSig, zSig0, zSig1; | 2755 | bits64 aSig, bSig, zSig0, zSig1; |
@@ -2814,7 +2805,7 @@ static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) | |||
2814 | roundAndPack: | 2805 | roundAndPack: |
2815 | return | 2806 | return |
2816 | roundAndPackFloatx80( | 2807 | roundAndPackFloatx80( |
2817 | floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); | 2808 | roundData, zSign, zExp, zSig0, zSig1 ); |
2818 | 2809 | ||
2819 | } | 2810 | } |
2820 | 2811 | ||
@@ -2827,7 +2818,7 @@ result is a NaN. The subtraction is performed according to the IEC/IEEE | |||
2827 | Standard for Binary Floating-point Arithmetic. | 2818 | Standard for Binary Floating-point Arithmetic. |
2828 | ------------------------------------------------------------------------------- | 2819 | ------------------------------------------------------------------------------- |
2829 | */ | 2820 | */ |
2830 | static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) | 2821 | static floatx80 subFloatx80Sigs( struct roundingData *roundData, floatx80 a, floatx80 b, flag zSign ) |
2831 | { | 2822 | { |
2832 | int32 aExp, bExp, zExp; | 2823 | int32 aExp, bExp, zExp; |
2833 | bits64 aSig, bSig, zSig0, zSig1; | 2824 | bits64 aSig, bSig, zSig0, zSig1; |
@@ -2845,7 +2836,7 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) | |||
2845 | if ( (bits64) ( ( aSig | bSig )<<1 ) ) { | 2836 | if ( (bits64) ( ( aSig | bSig )<<1 ) ) { |
2846 | return propagateFloatx80NaN( a, b ); | 2837 | return propagateFloatx80NaN( a, b ); |
2847 | } | 2838 | } |
2848 | float_raise( float_flag_invalid ); | 2839 | roundData->exception |= float_flag_invalid; |
2849 | z.low = floatx80_default_nan_low; | 2840 | z.low = floatx80_default_nan_low; |
2850 | z.high = floatx80_default_nan_high; | 2841 | z.high = floatx80_default_nan_high; |
2851 | return z; | 2842 | return z; |
@@ -2857,7 +2848,7 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) | |||
2857 | zSig1 = 0; | 2848 | zSig1 = 0; |
2858 | if ( bSig < aSig ) goto aBigger; | 2849 | if ( bSig < aSig ) goto aBigger; |
2859 | if ( aSig < bSig ) goto bBigger; | 2850 | if ( aSig < bSig ) goto bBigger; |
2860 | return packFloatx80( float_rounding_mode == float_round_down, 0, 0 ); | 2851 | return packFloatx80( roundData->mode == float_round_down, 0, 0 ); |
2861 | bExpBigger: | 2852 | bExpBigger: |
2862 | if ( bExp == 0x7FFF ) { | 2853 | if ( bExp == 0x7FFF ) { |
2863 | if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); | 2854 | if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); |
@@ -2883,7 +2874,7 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) | |||
2883 | normalizeRoundAndPack: | 2874 | normalizeRoundAndPack: |
2884 | return | 2875 | return |
2885 | normalizeRoundAndPackFloatx80( | 2876 | normalizeRoundAndPackFloatx80( |
2886 | floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); | 2877 | roundData, zSign, zExp, zSig0, zSig1 ); |
2887 | 2878 | ||
2888 | } | 2879 | } |
2889 | 2880 | ||
@@ -2894,17 +2885,17 @@ values `a' and `b'. The operation is performed according to the IEC/IEEE | |||
2894 | Standard for Binary Floating-point Arithmetic. | 2885 | Standard for Binary Floating-point Arithmetic. |
2895 | ------------------------------------------------------------------------------- | 2886 | ------------------------------------------------------------------------------- |
2896 | */ | 2887 | */ |
2897 | floatx80 floatx80_add( floatx80 a, floatx80 b ) | 2888 | floatx80 floatx80_add( struct roundingData *roundData, floatx80 a, floatx80 b ) |
2898 | { | 2889 | { |
2899 | flag aSign, bSign; | 2890 | flag aSign, bSign; |
2900 | 2891 | ||
2901 | aSign = extractFloatx80Sign( a ); | 2892 | aSign = extractFloatx80Sign( a ); |
2902 | bSign = extractFloatx80Sign( b ); | 2893 | bSign = extractFloatx80Sign( b ); |
2903 | if ( aSign == bSign ) { | 2894 | if ( aSign == bSign ) { |
2904 | return addFloatx80Sigs( a, b, aSign ); | 2895 | return addFloatx80Sigs( roundData, a, b, aSign ); |
2905 | } | 2896 | } |
2906 | else { | 2897 | else { |
2907 | return subFloatx80Sigs( a, b, aSign ); | 2898 | return subFloatx80Sigs( roundData, a, b, aSign ); |
2908 | } | 2899 | } |
2909 | 2900 | ||
2910 | } | 2901 | } |
@@ -2916,17 +2907,17 @@ point values `a' and `b'. The operation is performed according to the | |||
2916 | IEC/IEEE Standard for Binary Floating-point Arithmetic. | 2907 | IEC/IEEE Standard for Binary Floating-point Arithmetic. |
2917 | ------------------------------------------------------------------------------- | 2908 | ------------------------------------------------------------------------------- |
2918 | */ | 2909 | */ |
2919 | floatx80 floatx80_sub( floatx80 a, floatx80 b ) | 2910 | floatx80 floatx80_sub( struct roundingData *roundData, floatx80 a, floatx80 b ) |
2920 | { | 2911 | { |
2921 | flag aSign, bSign; | 2912 | flag aSign, bSign; |
2922 | 2913 | ||
2923 | aSign = extractFloatx80Sign( a ); | 2914 | aSign = extractFloatx80Sign( a ); |
2924 | bSign = extractFloatx80Sign( b ); | 2915 | bSign = extractFloatx80Sign( b ); |
2925 | if ( aSign == bSign ) { | 2916 | if ( aSign == bSign ) { |
2926 | return subFloatx80Sigs( a, b, aSign ); | 2917 | return subFloatx80Sigs( roundData, a, b, aSign ); |
2927 | } | 2918 | } |
2928 | else { | 2919 | else { |
2929 | return addFloatx80Sigs( a, b, aSign ); | 2920 | return addFloatx80Sigs( roundData, a, b, aSign ); |
2930 | } | 2921 | } |
2931 | 2922 | ||
2932 | } | 2923 | } |
@@ -2938,7 +2929,7 @@ point values `a' and `b'. The operation is performed according to the | |||
2938 | IEC/IEEE Standard for Binary Floating-point Arithmetic. | 2929 | IEC/IEEE Standard for Binary Floating-point Arithmetic. |
2939 | ------------------------------------------------------------------------------- | 2930 | ------------------------------------------------------------------------------- |
2940 | */ | 2931 | */ |
2941 | floatx80 floatx80_mul( floatx80 a, floatx80 b ) | 2932 | floatx80 floatx80_mul( struct roundingData *roundData, floatx80 a, floatx80 b ) |
2942 | { | 2933 | { |
2943 | flag aSign, bSign, zSign; | 2934 | flag aSign, bSign, zSign; |
2944 | int32 aExp, bExp, zExp; | 2935 | int32 aExp, bExp, zExp; |
@@ -2964,7 +2955,7 @@ floatx80 floatx80_mul( floatx80 a, floatx80 b ) | |||
2964 | if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); | 2955 | if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); |
2965 | if ( ( aExp | aSig ) == 0 ) { | 2956 | if ( ( aExp | aSig ) == 0 ) { |
2966 | invalid: | 2957 | invalid: |
2967 | float_raise( float_flag_invalid ); | 2958 | roundData->exception |= float_flag_invalid; |
2968 | z.low = floatx80_default_nan_low; | 2959 | z.low = floatx80_default_nan_low; |
2969 | z.high = floatx80_default_nan_high; | 2960 | z.high = floatx80_default_nan_high; |
2970 | return z; | 2961 | return z; |
@@ -2987,7 +2978,7 @@ floatx80 floatx80_mul( floatx80 a, floatx80 b ) | |||
2987 | } | 2978 | } |
2988 | return | 2979 | return |
2989 | roundAndPackFloatx80( | 2980 | roundAndPackFloatx80( |
2990 | floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); | 2981 | roundData, zSign, zExp, zSig0, zSig1 ); |
2991 | 2982 | ||
2992 | } | 2983 | } |
2993 | 2984 | ||
@@ -2998,7 +2989,7 @@ value `a' by the corresponding value `b'. The operation is performed | |||
2998 | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. | 2989 | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. |
2999 | ------------------------------------------------------------------------------- | 2990 | ------------------------------------------------------------------------------- |
3000 | */ | 2991 | */ |
3001 | floatx80 floatx80_div( floatx80 a, floatx80 b ) | 2992 | floatx80 floatx80_div( struct roundingData *roundData, floatx80 a, floatx80 b ) |
3002 | { | 2993 | { |
3003 | flag aSign, bSign, zSign; | 2994 | flag aSign, bSign, zSign; |
3004 | int32 aExp, bExp, zExp; | 2995 | int32 aExp, bExp, zExp; |
@@ -3029,12 +3020,12 @@ floatx80 floatx80_div( floatx80 a, floatx80 b ) | |||
3029 | if ( bSig == 0 ) { | 3020 | if ( bSig == 0 ) { |
3030 | if ( ( aExp | aSig ) == 0 ) { | 3021 | if ( ( aExp | aSig ) == 0 ) { |
3031 | invalid: | 3022 | invalid: |
3032 | float_raise( float_flag_invalid ); | 3023 | roundData->exception |= float_flag_invalid; |
3033 | z.low = floatx80_default_nan_low; | 3024 | z.low = floatx80_default_nan_low; |
3034 | z.high = floatx80_default_nan_high; | 3025 | z.high = floatx80_default_nan_high; |
3035 | return z; | 3026 | return z; |
3036 | } | 3027 | } |
3037 | float_raise( float_flag_divbyzero ); | 3028 | roundData->exception |= float_flag_divbyzero; |
3038 | return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); | 3029 | return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); |
3039 | } | 3030 | } |
3040 | normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); | 3031 | normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); |
@@ -3068,7 +3059,7 @@ floatx80 floatx80_div( floatx80 a, floatx80 b ) | |||
3068 | } | 3059 | } |
3069 | return | 3060 | return |
3070 | roundAndPackFloatx80( | 3061 | roundAndPackFloatx80( |
3071 | floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); | 3062 | roundData, zSign, zExp, zSig0, zSig1 ); |
3072 | 3063 | ||
3073 | } | 3064 | } |
3074 | 3065 | ||
@@ -3079,7 +3070,7 @@ Returns the remainder of the extended double-precision floating-point value | |||
3079 | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. | 3070 | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. |
3080 | ------------------------------------------------------------------------------- | 3071 | ------------------------------------------------------------------------------- |
3081 | */ | 3072 | */ |
3082 | floatx80 floatx80_rem( floatx80 a, floatx80 b ) | 3073 | floatx80 floatx80_rem( struct roundingData *roundData, floatx80 a, floatx80 b ) |
3083 | { | 3074 | { |
3084 | flag aSign, bSign, zSign; | 3075 | flag aSign, bSign, zSign; |
3085 | int32 aExp, bExp, expDiff; | 3076 | int32 aExp, bExp, expDiff; |
@@ -3107,7 +3098,7 @@ floatx80 floatx80_rem( floatx80 a, floatx80 b ) | |||
3107 | if ( bExp == 0 ) { | 3098 | if ( bExp == 0 ) { |
3108 | if ( bSig == 0 ) { | 3099 | if ( bSig == 0 ) { |
3109 | invalid: | 3100 | invalid: |
3110 | float_raise( float_flag_invalid ); | 3101 | roundData->exception |= float_flag_invalid; |
3111 | z.low = floatx80_default_nan_low; | 3102 | z.low = floatx80_default_nan_low; |
3112 | z.high = floatx80_default_nan_high; | 3103 | z.high = floatx80_default_nan_high; |
3113 | return z; | 3104 | return z; |
@@ -3164,9 +3155,10 @@ floatx80 floatx80_rem( floatx80 a, floatx80 b ) | |||
3164 | aSig1 = alternateASig1; | 3155 | aSig1 = alternateASig1; |
3165 | zSign = ! zSign; | 3156 | zSign = ! zSign; |
3166 | } | 3157 | } |
3158 | |||
3167 | return | 3159 | return |
3168 | normalizeRoundAndPackFloatx80( | 3160 | normalizeRoundAndPackFloatx80( |
3169 | 80, zSign, bExp + expDiff, aSig0, aSig1 ); | 3161 | roundData, zSign, bExp + expDiff, aSig0, aSig1 ); |
3170 | 3162 | ||
3171 | } | 3163 | } |
3172 | 3164 | ||
@@ -3177,7 +3169,7 @@ value `a'. The operation is performed according to the IEC/IEEE Standard | |||
3177 | for Binary Floating-point Arithmetic. | 3169 | for Binary Floating-point Arithmetic. |
3178 | ------------------------------------------------------------------------------- | 3170 | ------------------------------------------------------------------------------- |
3179 | */ | 3171 | */ |
3180 | floatx80 floatx80_sqrt( floatx80 a ) | 3172 | floatx80 floatx80_sqrt( struct roundingData *roundData, floatx80 a ) |
3181 | { | 3173 | { |
3182 | flag aSign; | 3174 | flag aSign; |
3183 | int32 aExp, zExp; | 3175 | int32 aExp, zExp; |
@@ -3197,7 +3189,7 @@ floatx80 floatx80_sqrt( floatx80 a ) | |||
3197 | if ( aSign ) { | 3189 | if ( aSign ) { |
3198 | if ( ( aExp | aSig0 ) == 0 ) return a; | 3190 | if ( ( aExp | aSig0 ) == 0 ) return a; |
3199 | invalid: | 3191 | invalid: |
3200 | float_raise( float_flag_invalid ); | 3192 | roundData->exception |= float_flag_invalid; |
3201 | z.low = floatx80_default_nan_low; | 3193 | z.low = floatx80_default_nan_low; |
3202 | z.high = floatx80_default_nan_high; | 3194 | z.high = floatx80_default_nan_high; |
3203 | return z; | 3195 | return z; |
@@ -3242,7 +3234,7 @@ floatx80 floatx80_sqrt( floatx80 a ) | |||
3242 | } | 3234 | } |
3243 | return | 3235 | return |
3244 | roundAndPackFloatx80( | 3236 | roundAndPackFloatx80( |
3245 | floatx80_rounding_precision, 0, zExp, zSig0, zSig1 ); | 3237 | roundData, 0, zExp, zSig0, zSig1 ); |
3246 | 3238 | ||
3247 | } | 3239 | } |
3248 | 3240 | ||
@@ -3264,7 +3256,7 @@ flag floatx80_eq( floatx80 a, floatx80 b ) | |||
3264 | ) { | 3256 | ) { |
3265 | if ( floatx80_is_signaling_nan( a ) | 3257 | if ( floatx80_is_signaling_nan( a ) |
3266 | || floatx80_is_signaling_nan( b ) ) { | 3258 | || floatx80_is_signaling_nan( b ) ) { |
3267 | float_raise( float_flag_invalid ); | 3259 | roundData->exception |= float_flag_invalid; |
3268 | } | 3260 | } |
3269 | return 0; | 3261 | return 0; |
3270 | } | 3262 | } |
@@ -3294,7 +3286,7 @@ flag floatx80_le( floatx80 a, floatx80 b ) | |||
3294 | || ( ( extractFloatx80Exp( b ) == 0x7FFF ) | 3286 | || ( ( extractFloatx80Exp( b ) == 0x7FFF ) |
3295 | && (bits64) ( extractFloatx80Frac( b )<<1 ) ) | 3287 | && (bits64) ( extractFloatx80Frac( b )<<1 ) ) |
3296 | ) { | 3288 | ) { |
3297 | float_raise( float_flag_invalid ); | 3289 | roundData->exception |= float_flag_invalid; |
3298 | return 0; | 3290 | return 0; |
3299 | } | 3291 | } |
3300 | aSign = extractFloatx80Sign( a ); | 3292 | aSign = extractFloatx80Sign( a ); |
@@ -3328,7 +3320,7 @@ flag floatx80_lt( floatx80 a, floatx80 b ) | |||
3328 | || ( ( extractFloatx80Exp( b ) == 0x7FFF ) | 3320 | || ( ( extractFloatx80Exp( b ) == 0x7FFF ) |
3329 | && (bits64) ( extractFloatx80Frac( b )<<1 ) ) | 3321 | && (bits64) ( extractFloatx80Frac( b )<<1 ) ) |
3330 | ) { | 3322 | ) { |
3331 | float_raise( float_flag_invalid ); | 3323 | roundData->exception |= float_flag_invalid; |
3332 | return 0; | 3324 | return 0; |
3333 | } | 3325 | } |
3334 | aSign = extractFloatx80Sign( a ); | 3326 | aSign = extractFloatx80Sign( a ); |
@@ -3361,7 +3353,7 @@ flag floatx80_eq_signaling( floatx80 a, floatx80 b ) | |||
3361 | || ( ( extractFloatx80Exp( b ) == 0x7FFF ) | 3353 | || ( ( extractFloatx80Exp( b ) == 0x7FFF ) |
3362 | && (bits64) ( extractFloatx80Frac( b )<<1 ) ) | 3354 | && (bits64) ( extractFloatx80Frac( b )<<1 ) ) |
3363 | ) { | 3355 | ) { |
3364 | float_raise( float_flag_invalid ); | 3356 | roundData->exception |= float_flag_invalid; |
3365 | return 0; | 3357 | return 0; |
3366 | } | 3358 | } |
3367 | return | 3359 | return |
@@ -3392,7 +3384,7 @@ flag floatx80_le_quiet( floatx80 a, floatx80 b ) | |||
3392 | ) { | 3384 | ) { |
3393 | if ( floatx80_is_signaling_nan( a ) | 3385 | if ( floatx80_is_signaling_nan( a ) |
3394 | || floatx80_is_signaling_nan( b ) ) { | 3386 | || floatx80_is_signaling_nan( b ) ) { |
3395 | float_raise( float_flag_invalid ); | 3387 | roundData->exception |= float_flag_invalid; |
3396 | } | 3388 | } |
3397 | return 0; | 3389 | return 0; |
3398 | } | 3390 | } |
@@ -3429,7 +3421,7 @@ flag floatx80_lt_quiet( floatx80 a, floatx80 b ) | |||
3429 | ) { | 3421 | ) { |
3430 | if ( floatx80_is_signaling_nan( a ) | 3422 | if ( floatx80_is_signaling_nan( a ) |
3431 | || floatx80_is_signaling_nan( b ) ) { | 3423 | || floatx80_is_signaling_nan( b ) ) { |
3432 | float_raise( float_flag_invalid ); | 3424 | roundData->exception |= float_flag_invalid; |
3433 | } | 3425 | } |
3434 | return 0; | 3426 | return 0; |
3435 | } | 3427 | } |