aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/nwfpe/softfloat.c
diff options
context:
space:
mode:
authorDavid Woodhouse <dwmw2@shinybook.infradead.org>2005-08-09 11:51:35 -0400
committerDavid Woodhouse <dwmw2@shinybook.infradead.org>2005-08-09 11:51:35 -0400
commitc973b112c76c9d8fd042991128f218a738cc8d0a (patch)
treee813b0da5d0a0e19e06de6462d145a29ad683026 /arch/arm/nwfpe/softfloat.c
parentc5fbc3966f48279dbebfde10248c977014aa9988 (diff)
parent00dd1e433967872f3997a45d5adf35056fdf2f56 (diff)
Merge with /shiny/git/linux-2.6/.git
Diffstat (limited to 'arch/arm/nwfpe/softfloat.c')
-rw-r--r--arch/arm/nwfpe/softfloat.c334
1 files changed, 163 insertions, 171 deletions
diff --git a/arch/arm/nwfpe/softfloat.c b/arch/arm/nwfpe/softfloat.c
index e038dd3be9b3..8b75a6e7cb3a 100644
--- a/arch/arm/nwfpe/softfloat.c
+++ b/arch/arm/nwfpe/softfloat.c
@@ -36,16 +36,6 @@ this code that are retained.
36 36
37/* 37/*
38------------------------------------------------------------------------------- 38-------------------------------------------------------------------------------
39Floating-point rounding mode, extended double-precision rounding precision,
40and exception flags.
41-------------------------------------------------------------------------------
42*/
43int8 float_rounding_mode = float_round_nearest_even;
44int8 floatx80_rounding_precision = 80;
45int8 float_exception_flags;
46
47/*
48-------------------------------------------------------------------------------
49Primitive arithmetic functions, including multi-word arithmetic, and 39Primitive arithmetic functions, including multi-word arithmetic, and
50division and square root approximations. (Can be specialized to target if 40division and square root approximations. (Can be specialized to target if
51desired.) 41desired.)
@@ -77,14 +67,14 @@ input is too large, however, the invalid exception is raised and the largest
77positive or negative integer is returned. 67positive or negative integer is returned.
78------------------------------------------------------------------------------- 68-------------------------------------------------------------------------------
79*/ 69*/
80static int32 roundAndPackInt32( flag zSign, bits64 absZ ) 70static int32 roundAndPackInt32( struct roundingData *roundData, flag zSign, bits64 absZ )
81{ 71{
82 int8 roundingMode; 72 int8 roundingMode;
83 flag roundNearestEven; 73 flag roundNearestEven;
84 int8 roundIncrement, roundBits; 74 int8 roundIncrement, roundBits;
85 int32 z; 75 int32 z;
86 76
87 roundingMode = float_rounding_mode; 77 roundingMode = roundData->mode;
88 roundNearestEven = ( roundingMode == float_round_nearest_even ); 78 roundNearestEven = ( roundingMode == float_round_nearest_even );
89 roundIncrement = 0x40; 79 roundIncrement = 0x40;
90 if ( ! roundNearestEven ) { 80 if ( ! roundNearestEven ) {
@@ -107,10 +97,10 @@ static int32 roundAndPackInt32( flag zSign, bits64 absZ )
107 z = absZ; 97 z = absZ;
108 if ( zSign ) z = - z; 98 if ( zSign ) z = - z;
109 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { 99 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
110 float_exception_flags |= float_flag_invalid; 100 roundData->exception |= float_flag_invalid;
111 return zSign ? 0x80000000 : 0x7FFFFFFF; 101 return zSign ? 0x80000000 : 0x7FFFFFFF;
112 } 102 }
113 if ( roundBits ) float_exception_flags |= float_flag_inexact; 103 if ( roundBits ) roundData->exception |= float_flag_inexact;
114 return z; 104 return z;
115 105
116} 106}
@@ -224,14 +214,14 @@ The handling of underflow and overflow follows the IEC/IEEE Standard for
224Binary Floating-point Arithmetic. 214Binary Floating-point Arithmetic.
225------------------------------------------------------------------------------- 215-------------------------------------------------------------------------------
226*/ 216*/
227static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) 217static float32 roundAndPackFloat32( struct roundingData *roundData, flag zSign, int16 zExp, bits32 zSig )
228{ 218{
229 int8 roundingMode; 219 int8 roundingMode;
230 flag roundNearestEven; 220 flag roundNearestEven;
231 int8 roundIncrement, roundBits; 221 int8 roundIncrement, roundBits;
232 flag isTiny; 222 flag isTiny;
233 223
234 roundingMode = float_rounding_mode; 224 roundingMode = roundData->mode;
235 roundNearestEven = ( roundingMode == float_round_nearest_even ); 225 roundNearestEven = ( roundingMode == float_round_nearest_even );
236 roundIncrement = 0x40; 226 roundIncrement = 0x40;
237 if ( ! roundNearestEven ) { 227 if ( ! roundNearestEven ) {
@@ -254,7 +244,7 @@ static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
254 || ( ( zExp == 0xFD ) 244 || ( ( zExp == 0xFD )
255 && ( (sbits32) ( zSig + roundIncrement ) < 0 ) ) 245 && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
256 ) { 246 ) {
257 float_raise( float_flag_overflow | float_flag_inexact ); 247 roundData->exception |= float_flag_overflow | float_flag_inexact;
258 return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 ); 248 return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
259 } 249 }
260 if ( zExp < 0 ) { 250 if ( zExp < 0 ) {
@@ -265,10 +255,10 @@ static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
265 shift32RightJamming( zSig, - zExp, &zSig ); 255 shift32RightJamming( zSig, - zExp, &zSig );
266 zExp = 0; 256 zExp = 0;
267 roundBits = zSig & 0x7F; 257 roundBits = zSig & 0x7F;
268 if ( isTiny && roundBits ) float_raise( float_flag_underflow ); 258 if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;
269 } 259 }
270 } 260 }
271 if ( roundBits ) float_exception_flags |= float_flag_inexact; 261 if ( roundBits ) roundData->exception |= float_flag_inexact;
272 zSig = ( zSig + roundIncrement )>>7; 262 zSig = ( zSig + roundIncrement )>>7;
273 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); 263 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
274 if ( zSig == 0 ) zExp = 0; 264 if ( zSig == 0 ) zExp = 0;
@@ -287,12 +277,12 @@ point exponent.
287------------------------------------------------------------------------------- 277-------------------------------------------------------------------------------
288*/ 278*/
289static float32 279static float32
290 normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) 280 normalizeRoundAndPackFloat32( struct roundingData *roundData, flag zSign, int16 zExp, bits32 zSig )
291{ 281{
292 int8 shiftCount; 282 int8 shiftCount;
293 283
294 shiftCount = countLeadingZeros32( zSig ) - 1; 284 shiftCount = countLeadingZeros32( zSig ) - 1;
295 return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount ); 285 return roundAndPackFloat32( roundData, zSign, zExp - shiftCount, zSig<<shiftCount );
296 286
297} 287}
298 288
@@ -395,14 +385,14 @@ The handling of underflow and overflow follows the IEC/IEEE Standard for
395Binary Floating-point Arithmetic. 385Binary Floating-point Arithmetic.
396------------------------------------------------------------------------------- 386-------------------------------------------------------------------------------
397*/ 387*/
398static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) 388static float64 roundAndPackFloat64( struct roundingData *roundData, flag zSign, int16 zExp, bits64 zSig )
399{ 389{
400 int8 roundingMode; 390 int8 roundingMode;
401 flag roundNearestEven; 391 flag roundNearestEven;
402 int16 roundIncrement, roundBits; 392 int16 roundIncrement, roundBits;
403 flag isTiny; 393 flag isTiny;
404 394
405 roundingMode = float_rounding_mode; 395 roundingMode = roundData->mode;
406 roundNearestEven = ( roundingMode == float_round_nearest_even ); 396 roundNearestEven = ( roundingMode == float_round_nearest_even );
407 roundIncrement = 0x200; 397 roundIncrement = 0x200;
408 if ( ! roundNearestEven ) { 398 if ( ! roundNearestEven ) {
@@ -427,7 +417,7 @@ static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
427 ) { 417 ) {
428 //register int lr = __builtin_return_address(0); 418 //register int lr = __builtin_return_address(0);
429 //printk("roundAndPackFloat64 called from 0x%08x\n",lr); 419 //printk("roundAndPackFloat64 called from 0x%08x\n",lr);
430 float_raise( float_flag_overflow | float_flag_inexact ); 420 roundData->exception |= float_flag_overflow | float_flag_inexact;
431 return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 ); 421 return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
432 } 422 }
433 if ( zExp < 0 ) { 423 if ( zExp < 0 ) {
@@ -438,10 +428,10 @@ static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
438 shift64RightJamming( zSig, - zExp, &zSig ); 428 shift64RightJamming( zSig, - zExp, &zSig );
439 zExp = 0; 429 zExp = 0;
440 roundBits = zSig & 0x3FF; 430 roundBits = zSig & 0x3FF;
441 if ( isTiny && roundBits ) float_raise( float_flag_underflow ); 431 if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;
442 } 432 }
443 } 433 }
444 if ( roundBits ) float_exception_flags |= float_flag_inexact; 434 if ( roundBits ) roundData->exception |= float_flag_inexact;
445 zSig = ( zSig + roundIncrement )>>10; 435 zSig = ( zSig + roundIncrement )>>10;
446 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven ); 436 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
447 if ( zSig == 0 ) zExp = 0; 437 if ( zSig == 0 ) zExp = 0;
@@ -460,12 +450,12 @@ point exponent.
460------------------------------------------------------------------------------- 450-------------------------------------------------------------------------------
461*/ 451*/
462static float64 452static float64
463 normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) 453 normalizeRoundAndPackFloat64( struct roundingData *roundData, flag zSign, int16 zExp, bits64 zSig )
464{ 454{
465 int8 shiftCount; 455 int8 shiftCount;
466 456
467 shiftCount = countLeadingZeros64( zSig ) - 1; 457 shiftCount = countLeadingZeros64( zSig ) - 1;
468 return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount ); 458 return roundAndPackFloat64( roundData, zSign, zExp - shiftCount, zSig<<shiftCount );
469 459
470} 460}
471 461
@@ -572,14 +562,15 @@ Floating-point Arithmetic.
572*/ 562*/
573static floatx80 563static floatx80
574 roundAndPackFloatx80( 564 roundAndPackFloatx80(
575 int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 565 struct roundingData *roundData, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
576 ) 566 )
577{ 567{
578 int8 roundingMode; 568 int8 roundingMode, roundingPrecision;
579 flag roundNearestEven, increment, isTiny; 569 flag roundNearestEven, increment, isTiny;
580 int64 roundIncrement, roundMask, roundBits; 570 int64 roundIncrement, roundMask, roundBits;
581 571
582 roundingMode = float_rounding_mode; 572 roundingMode = roundData->mode;
573 roundingPrecision = roundData->precision;
583 roundNearestEven = ( roundingMode == float_round_nearest_even ); 574 roundNearestEven = ( roundingMode == float_round_nearest_even );
584 if ( roundingPrecision == 80 ) goto precision80; 575 if ( roundingPrecision == 80 ) goto precision80;
585 if ( roundingPrecision == 64 ) { 576 if ( roundingPrecision == 64 ) {
@@ -623,8 +614,8 @@ static floatx80
623 shift64RightJamming( zSig0, 1 - zExp, &zSig0 ); 614 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
624 zExp = 0; 615 zExp = 0;
625 roundBits = zSig0 & roundMask; 616 roundBits = zSig0 & roundMask;
626 if ( isTiny && roundBits ) float_raise( float_flag_underflow ); 617 if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;
627 if ( roundBits ) float_exception_flags |= float_flag_inexact; 618 if ( roundBits ) roundData->exception |= float_flag_inexact;
628 zSig0 += roundIncrement; 619 zSig0 += roundIncrement;
629 if ( (sbits64) zSig0 < 0 ) zExp = 1; 620 if ( (sbits64) zSig0 < 0 ) zExp = 1;
630 roundIncrement = roundMask + 1; 621 roundIncrement = roundMask + 1;
@@ -635,7 +626,7 @@ static floatx80
635 return packFloatx80( zSign, zExp, zSig0 ); 626 return packFloatx80( zSign, zExp, zSig0 );
636 } 627 }
637 } 628 }
638 if ( roundBits ) float_exception_flags |= float_flag_inexact; 629 if ( roundBits ) roundData->exception |= float_flag_inexact;
639 zSig0 += roundIncrement; 630 zSig0 += roundIncrement;
640 if ( zSig0 < roundIncrement ) { 631 if ( zSig0 < roundIncrement ) {
641 ++zExp; 632 ++zExp;
@@ -672,7 +663,7 @@ static floatx80
672 ) { 663 ) {
673 roundMask = 0; 664 roundMask = 0;
674 overflow: 665 overflow:
675 float_raise( float_flag_overflow | float_flag_inexact ); 666 roundData->exception |= float_flag_overflow | float_flag_inexact;
676 if ( ( roundingMode == float_round_to_zero ) 667 if ( ( roundingMode == float_round_to_zero )
677 || ( zSign && ( roundingMode == float_round_up ) ) 668 || ( zSign && ( roundingMode == float_round_up ) )
678 || ( ! zSign && ( roundingMode == float_round_down ) ) 669 || ( ! zSign && ( roundingMode == float_round_down ) )
@@ -689,8 +680,8 @@ static floatx80
689 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) ); 680 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
690 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 ); 681 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
691 zExp = 0; 682 zExp = 0;
692 if ( isTiny && zSig1 ) float_raise( float_flag_underflow ); 683 if ( isTiny && zSig1 ) roundData->exception |= float_flag_underflow;
693 if ( zSig1 ) float_exception_flags |= float_flag_inexact; 684 if ( zSig1 ) roundData->exception |= float_flag_inexact;
694 if ( roundNearestEven ) { 685 if ( roundNearestEven ) {
695 increment = ( (sbits64) zSig1 < 0 ); 686 increment = ( (sbits64) zSig1 < 0 );
696 } 687 }
@@ -710,7 +701,7 @@ static floatx80
710 return packFloatx80( zSign, zExp, zSig0 ); 701 return packFloatx80( zSign, zExp, zSig0 );
711 } 702 }
712 } 703 }
713 if ( zSig1 ) float_exception_flags |= float_flag_inexact; 704 if ( zSig1 ) roundData->exception |= float_flag_inexact;
714 if ( increment ) { 705 if ( increment ) {
715 ++zSig0; 706 ++zSig0;
716 if ( zSig0 == 0 ) { 707 if ( zSig0 == 0 ) {
@@ -740,7 +731,7 @@ normalized.
740*/ 731*/
741static floatx80 732static floatx80
742 normalizeRoundAndPackFloatx80( 733 normalizeRoundAndPackFloatx80(
743 int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 734 struct roundingData *roundData, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
744 ) 735 )
745{ 736{
746 int8 shiftCount; 737 int8 shiftCount;
@@ -754,7 +745,7 @@ static floatx80
754 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); 745 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
755 zExp -= shiftCount; 746 zExp -= shiftCount;
756 return 747 return
757 roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 ); 748 roundAndPackFloatx80( roundData, zSign, zExp, zSig0, zSig1 );
758 749
759} 750}
760 751
@@ -767,14 +758,14 @@ the single-precision floating-point format. The conversion is performed
767according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. 758according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
768------------------------------------------------------------------------------- 759-------------------------------------------------------------------------------
769*/ 760*/
770float32 int32_to_float32( int32 a ) 761float32 int32_to_float32(struct roundingData *roundData, int32 a)
771{ 762{
772 flag zSign; 763 flag zSign;
773 764
774 if ( a == 0 ) return 0; 765 if ( a == 0 ) return 0;
775 if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 ); 766 if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
776 zSign = ( a < 0 ); 767 zSign = ( a < 0 );
777 return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a ); 768 return normalizeRoundAndPackFloat32( roundData, zSign, 0x9C, zSign ? - a : a );
778 769
779} 770}
780 771
@@ -840,7 +831,7 @@ positive integer is returned. Otherwise, if the conversion overflows, the
840largest integer with the same sign as `a' is returned. 831largest integer with the same sign as `a' is returned.
841------------------------------------------------------------------------------- 832-------------------------------------------------------------------------------
842*/ 833*/
843int32 float32_to_int32( float32 a ) 834int32 float32_to_int32( struct roundingData *roundData, float32 a )
844{ 835{
845 flag aSign; 836 flag aSign;
846 int16 aExp, shiftCount; 837 int16 aExp, shiftCount;
@@ -856,7 +847,7 @@ int32 float32_to_int32( float32 a )
856 zSig = aSig; 847 zSig = aSig;
857 zSig <<= 32; 848 zSig <<= 32;
858 if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig ); 849 if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig );
859 return roundAndPackInt32( aSign, zSig ); 850 return roundAndPackInt32( roundData, aSign, zSig );
860 851
861} 852}
862 853
@@ -889,13 +880,13 @@ int32 float32_to_int32_round_to_zero( float32 a )
889 return 0x80000000; 880 return 0x80000000;
890 } 881 }
891 else if ( aExp <= 0x7E ) { 882 else if ( aExp <= 0x7E ) {
892 if ( aExp | aSig ) float_exception_flags |= float_flag_inexact; 883 if ( aExp | aSig ) float_raise( float_flag_inexact );
893 return 0; 884 return 0;
894 } 885 }
895 aSig = ( aSig | 0x00800000 )<<8; 886 aSig = ( aSig | 0x00800000 )<<8;
896 z = aSig>>( - shiftCount ); 887 z = aSig>>( - shiftCount );
897 if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) { 888 if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
898 float_exception_flags |= float_flag_inexact; 889 float_raise( float_flag_inexact );
899 } 890 }
900 return aSign ? - z : z; 891 return aSign ? - z : z;
901 892
@@ -973,7 +964,7 @@ operation is performed according to the IEC/IEEE Standard for Binary
973Floating-point Arithmetic. 964Floating-point Arithmetic.
974------------------------------------------------------------------------------- 965-------------------------------------------------------------------------------
975*/ 966*/
976float32 float32_round_to_int( float32 a ) 967float32 float32_round_to_int( struct roundingData *roundData, float32 a )
977{ 968{
978 flag aSign; 969 flag aSign;
979 int16 aExp; 970 int16 aExp;
@@ -988,11 +979,12 @@ float32 float32_round_to_int( float32 a )
988 } 979 }
989 return a; 980 return a;
990 } 981 }
982 roundingMode = roundData->mode;
991 if ( aExp <= 0x7E ) { 983 if ( aExp <= 0x7E ) {
992 if ( (bits32) ( a<<1 ) == 0 ) return a; 984 if ( (bits32) ( a<<1 ) == 0 ) return a;
993 float_exception_flags |= float_flag_inexact; 985 roundData->exception |= float_flag_inexact;
994 aSign = extractFloat32Sign( a ); 986 aSign = extractFloat32Sign( a );
995 switch ( float_rounding_mode ) { 987 switch ( roundingMode ) {
996 case float_round_nearest_even: 988 case float_round_nearest_even:
997 if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { 989 if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
998 return packFloat32( aSign, 0x7F, 0 ); 990 return packFloat32( aSign, 0x7F, 0 );
@@ -1009,7 +1001,6 @@ float32 float32_round_to_int( float32 a )
1009 lastBitMask <<= 0x96 - aExp; 1001 lastBitMask <<= 0x96 - aExp;
1010 roundBitsMask = lastBitMask - 1; 1002 roundBitsMask = lastBitMask - 1;
1011 z = a; 1003 z = a;
1012 roundingMode = float_rounding_mode;
1013 if ( roundingMode == float_round_nearest_even ) { 1004 if ( roundingMode == float_round_nearest_even ) {
1014 z += lastBitMask>>1; 1005 z += lastBitMask>>1;
1015 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; 1006 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
@@ -1020,7 +1011,7 @@ float32 float32_round_to_int( float32 a )
1020 } 1011 }
1021 } 1012 }
1022 z &= ~ roundBitsMask; 1013 z &= ~ roundBitsMask;
1023 if ( z != a ) float_exception_flags |= float_flag_inexact; 1014 if ( z != a ) roundData->exception |= float_flag_inexact;
1024 return z; 1015 return z;
1025 1016
1026} 1017}
@@ -1034,7 +1025,7 @@ addition is performed according to the IEC/IEEE Standard for Binary
1034Floating-point Arithmetic. 1025Floating-point Arithmetic.
1035------------------------------------------------------------------------------- 1026-------------------------------------------------------------------------------
1036*/ 1027*/
1037static float32 addFloat32Sigs( float32 a, float32 b, flag zSign ) 1028static float32 addFloat32Sigs( struct roundingData *roundData, float32 a, float32 b, flag zSign )
1038{ 1029{
1039 int16 aExp, bExp, zExp; 1030 int16 aExp, bExp, zExp;
1040 bits32 aSig, bSig, zSig; 1031 bits32 aSig, bSig, zSig;
@@ -1093,7 +1084,7 @@ static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
1093 ++zExp; 1084 ++zExp;
1094 } 1085 }
1095 roundAndPack: 1086 roundAndPack:
1096 return roundAndPackFloat32( zSign, zExp, zSig ); 1087 return roundAndPackFloat32( roundData, zSign, zExp, zSig );
1097 1088
1098} 1089}
1099 1090
@@ -1106,7 +1097,7 @@ result is a NaN. The subtraction is performed according to the IEC/IEEE
1106Standard for Binary Floating-point Arithmetic. 1097Standard for Binary Floating-point Arithmetic.
1107------------------------------------------------------------------------------- 1098-------------------------------------------------------------------------------
1108*/ 1099*/
1109static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) 1100static float32 subFloat32Sigs( struct roundingData *roundData, float32 a, float32 b, flag zSign )
1110{ 1101{
1111 int16 aExp, bExp, zExp; 1102 int16 aExp, bExp, zExp;
1112 bits32 aSig, bSig, zSig; 1103 bits32 aSig, bSig, zSig;
@@ -1123,7 +1114,7 @@ static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
1123 if ( expDiff < 0 ) goto bExpBigger; 1114 if ( expDiff < 0 ) goto bExpBigger;
1124 if ( aExp == 0xFF ) { 1115 if ( aExp == 0xFF ) {
1125 if ( aSig | bSig ) return propagateFloat32NaN( a, b ); 1116 if ( aSig | bSig ) return propagateFloat32NaN( a, b );
1126 float_raise( float_flag_invalid ); 1117 roundData->exception |= float_flag_invalid;
1127 return float32_default_nan; 1118 return float32_default_nan;
1128 } 1119 }
1129 if ( aExp == 0 ) { 1120 if ( aExp == 0 ) {
@@ -1132,7 +1123,7 @@ static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
1132 } 1123 }
1133 if ( bSig < aSig ) goto aBigger; 1124 if ( bSig < aSig ) goto aBigger;
1134 if ( aSig < bSig ) goto bBigger; 1125 if ( aSig < bSig ) goto bBigger;
1135 return packFloat32( float_rounding_mode == float_round_down, 0, 0 ); 1126 return packFloat32( roundData->mode == float_round_down, 0, 0 );
1136 bExpBigger: 1127 bExpBigger:
1137 if ( bExp == 0xFF ) { 1128 if ( bExp == 0xFF ) {
1138 if ( bSig ) return propagateFloat32NaN( a, b ); 1129 if ( bSig ) return propagateFloat32NaN( a, b );
@@ -1169,7 +1160,7 @@ static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
1169 zExp = aExp; 1160 zExp = aExp;
1170 normalizeRoundAndPack: 1161 normalizeRoundAndPack:
1171 --zExp; 1162 --zExp;
1172 return normalizeRoundAndPackFloat32( zSign, zExp, zSig ); 1163 return normalizeRoundAndPackFloat32( roundData, zSign, zExp, zSig );
1173 1164
1174} 1165}
1175 1166
@@ -1180,17 +1171,17 @@ and `b'. The operation is performed according to the IEC/IEEE Standard for
1180Binary Floating-point Arithmetic. 1171Binary Floating-point Arithmetic.
1181------------------------------------------------------------------------------- 1172-------------------------------------------------------------------------------
1182*/ 1173*/
1183float32 float32_add( float32 a, float32 b ) 1174float32 float32_add( struct roundingData *roundData, float32 a, float32 b )
1184{ 1175{
1185 flag aSign, bSign; 1176 flag aSign, bSign;
1186 1177
1187 aSign = extractFloat32Sign( a ); 1178 aSign = extractFloat32Sign( a );
1188 bSign = extractFloat32Sign( b ); 1179 bSign = extractFloat32Sign( b );
1189 if ( aSign == bSign ) { 1180 if ( aSign == bSign ) {
1190 return addFloat32Sigs( a, b, aSign ); 1181 return addFloat32Sigs( roundData, a, b, aSign );
1191 } 1182 }
1192 else { 1183 else {
1193 return subFloat32Sigs( a, b, aSign ); 1184 return subFloat32Sigs( roundData, a, b, aSign );
1194 } 1185 }
1195 1186
1196} 1187}
@@ -1202,17 +1193,17 @@ Returns the result of subtracting the single-precision floating-point values
1202for Binary Floating-point Arithmetic. 1193for Binary Floating-point Arithmetic.
1203------------------------------------------------------------------------------- 1194-------------------------------------------------------------------------------
1204*/ 1195*/
1205float32 float32_sub( float32 a, float32 b ) 1196float32 float32_sub( struct roundingData *roundData, float32 a, float32 b )
1206{ 1197{
1207 flag aSign, bSign; 1198 flag aSign, bSign;
1208 1199
1209 aSign = extractFloat32Sign( a ); 1200 aSign = extractFloat32Sign( a );
1210 bSign = extractFloat32Sign( b ); 1201 bSign = extractFloat32Sign( b );
1211 if ( aSign == bSign ) { 1202 if ( aSign == bSign ) {
1212 return subFloat32Sigs( a, b, aSign ); 1203 return subFloat32Sigs( roundData, a, b, aSign );
1213 } 1204 }
1214 else { 1205 else {
1215 return addFloat32Sigs( a, b, aSign ); 1206 return addFloat32Sigs( roundData, a, b, aSign );
1216 } 1207 }
1217 1208
1218} 1209}
@@ -1224,7 +1215,7 @@ Returns the result of multiplying the single-precision floating-point values
1224for Binary Floating-point Arithmetic. 1215for Binary Floating-point Arithmetic.
1225------------------------------------------------------------------------------- 1216-------------------------------------------------------------------------------
1226*/ 1217*/
1227float32 float32_mul( float32 a, float32 b ) 1218float32 float32_mul( struct roundingData *roundData, float32 a, float32 b )
1228{ 1219{
1229 flag aSign, bSign, zSign; 1220 flag aSign, bSign, zSign;
1230 int16 aExp, bExp, zExp; 1221 int16 aExp, bExp, zExp;
@@ -1244,7 +1235,7 @@ float32 float32_mul( float32 a, float32 b )
1244 return propagateFloat32NaN( a, b ); 1235 return propagateFloat32NaN( a, b );
1245 } 1236 }
1246 if ( ( bExp | bSig ) == 0 ) { 1237 if ( ( bExp | bSig ) == 0 ) {
1247 float_raise( float_flag_invalid ); 1238 roundData->exception |= float_flag_invalid;
1248 return float32_default_nan; 1239 return float32_default_nan;
1249 } 1240 }
1250 return packFloat32( zSign, 0xFF, 0 ); 1241 return packFloat32( zSign, 0xFF, 0 );
@@ -1252,7 +1243,7 @@ float32 float32_mul( float32 a, float32 b )
1252 if ( bExp == 0xFF ) { 1243 if ( bExp == 0xFF ) {
1253 if ( bSig ) return propagateFloat32NaN( a, b ); 1244 if ( bSig ) return propagateFloat32NaN( a, b );
1254 if ( ( aExp | aSig ) == 0 ) { 1245 if ( ( aExp | aSig ) == 0 ) {
1255 float_raise( float_flag_invalid ); 1246 roundData->exception |= float_flag_invalid;
1256 return float32_default_nan; 1247 return float32_default_nan;
1257 } 1248 }
1258 return packFloat32( zSign, 0xFF, 0 ); 1249 return packFloat32( zSign, 0xFF, 0 );
@@ -1274,7 +1265,7 @@ float32 float32_mul( float32 a, float32 b )
1274 zSig <<= 1; 1265 zSig <<= 1;
1275 --zExp; 1266 --zExp;
1276 } 1267 }
1277 return roundAndPackFloat32( zSign, zExp, zSig ); 1268 return roundAndPackFloat32( roundData, zSign, zExp, zSig );
1278 1269
1279} 1270}
1280 1271
@@ -1285,7 +1276,7 @@ by the corresponding value `b'. The operation is performed according to the
1285IEC/IEEE Standard for Binary Floating-point Arithmetic. 1276IEC/IEEE Standard for Binary Floating-point Arithmetic.
1286------------------------------------------------------------------------------- 1277-------------------------------------------------------------------------------
1287*/ 1278*/
1288float32 float32_div( float32 a, float32 b ) 1279float32 float32_div( struct roundingData *roundData, float32 a, float32 b )
1289{ 1280{
1290 flag aSign, bSign, zSign; 1281 flag aSign, bSign, zSign;
1291 int16 aExp, bExp, zExp; 1282 int16 aExp, bExp, zExp;
@@ -1302,7 +1293,7 @@ float32 float32_div( float32 a, float32 b )
1302 if ( aSig ) return propagateFloat32NaN( a, b ); 1293 if ( aSig ) return propagateFloat32NaN( a, b );
1303 if ( bExp == 0xFF ) { 1294 if ( bExp == 0xFF ) {
1304 if ( bSig ) return propagateFloat32NaN( a, b ); 1295 if ( bSig ) return propagateFloat32NaN( a, b );
1305 float_raise( float_flag_invalid ); 1296 roundData->exception |= float_flag_invalid;
1306 return float32_default_nan; 1297 return float32_default_nan;
1307 } 1298 }
1308 return packFloat32( zSign, 0xFF, 0 ); 1299 return packFloat32( zSign, 0xFF, 0 );
@@ -1314,10 +1305,10 @@ float32 float32_div( float32 a, float32 b )
1314 if ( bExp == 0 ) { 1305 if ( bExp == 0 ) {
1315 if ( bSig == 0 ) { 1306 if ( bSig == 0 ) {
1316 if ( ( aExp | aSig ) == 0 ) { 1307 if ( ( aExp | aSig ) == 0 ) {
1317 float_raise( float_flag_invalid ); 1308 roundData->exception |= float_flag_invalid;
1318 return float32_default_nan; 1309 return float32_default_nan;
1319 } 1310 }
1320 float_raise( float_flag_divbyzero ); 1311 roundData->exception |= float_flag_divbyzero;
1321 return packFloat32( zSign, 0xFF, 0 ); 1312 return packFloat32( zSign, 0xFF, 0 );
1322 } 1313 }
1323 normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 1314 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
@@ -1341,7 +1332,7 @@ float32 float32_div( float32 a, float32 b )
1341 if ( ( zSig & 0x3F ) == 0 ) { 1332 if ( ( zSig & 0x3F ) == 0 ) {
1342 zSig |= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<32 ); 1333 zSig |= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<32 );
1343 } 1334 }
1344 return roundAndPackFloat32( zSign, zExp, zSig ); 1335 return roundAndPackFloat32( roundData, zSign, zExp, zSig );
1345 1336
1346} 1337}
1347 1338
@@ -1352,7 +1343,7 @@ with respect to the corresponding value `b'. The operation is performed
1352according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. 1343according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
1353------------------------------------------------------------------------------- 1344-------------------------------------------------------------------------------
1354*/ 1345*/
1355float32 float32_rem( float32 a, float32 b ) 1346float32 float32_rem( struct roundingData *roundData, float32 a, float32 b )
1356{ 1347{
1357 flag aSign, bSign, zSign; 1348 flag aSign, bSign, zSign;
1358 int16 aExp, bExp, expDiff; 1349 int16 aExp, bExp, expDiff;
@@ -1372,7 +1363,7 @@ float32 float32_rem( float32 a, float32 b )
1372 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { 1363 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1373 return propagateFloat32NaN( a, b ); 1364 return propagateFloat32NaN( a, b );
1374 } 1365 }
1375 float_raise( float_flag_invalid ); 1366 roundData->exception |= float_flag_invalid;
1376 return float32_default_nan; 1367 return float32_default_nan;
1377 } 1368 }
1378 if ( bExp == 0xFF ) { 1369 if ( bExp == 0xFF ) {
@@ -1381,7 +1372,7 @@ float32 float32_rem( float32 a, float32 b )
1381 } 1372 }
1382 if ( bExp == 0 ) { 1373 if ( bExp == 0 ) {
1383 if ( bSig == 0 ) { 1374 if ( bSig == 0 ) {
1384 float_raise( float_flag_invalid ); 1375 roundData->exception |= float_flag_invalid;
1385 return float32_default_nan; 1376 return float32_default_nan;
1386 } 1377 }
1387 normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 1378 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
@@ -1444,7 +1435,7 @@ float32 float32_rem( float32 a, float32 b )
1444 } 1435 }
1445 zSign = ( (sbits32) aSig < 0 ); 1436 zSign = ( (sbits32) aSig < 0 );
1446 if ( zSign ) aSig = - aSig; 1437 if ( zSign ) aSig = - aSig;
1447 return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig ); 1438 return normalizeRoundAndPackFloat32( roundData, aSign ^ zSign, bExp, aSig );
1448 1439
1449} 1440}
1450 1441
@@ -1455,7 +1446,7 @@ The operation is performed according to the IEC/IEEE Standard for Binary
1455Floating-point Arithmetic. 1446Floating-point Arithmetic.
1456------------------------------------------------------------------------------- 1447-------------------------------------------------------------------------------
1457*/ 1448*/
1458float32 float32_sqrt( float32 a ) 1449float32 float32_sqrt( struct roundingData *roundData, float32 a )
1459{ 1450{
1460 flag aSign; 1451 flag aSign;
1461 int16 aExp, zExp; 1452 int16 aExp, zExp;
@@ -1468,12 +1459,12 @@ float32 float32_sqrt( float32 a )
1468 if ( aExp == 0xFF ) { 1459 if ( aExp == 0xFF ) {
1469 if ( aSig ) return propagateFloat32NaN( a, 0 ); 1460 if ( aSig ) return propagateFloat32NaN( a, 0 );
1470 if ( ! aSign ) return a; 1461 if ( ! aSign ) return a;
1471 float_raise( float_flag_invalid ); 1462 roundData->exception |= float_flag_invalid;
1472 return float32_default_nan; 1463 return float32_default_nan;
1473 } 1464 }
1474 if ( aSign ) { 1465 if ( aSign ) {
1475 if ( ( aExp | aSig ) == 0 ) return a; 1466 if ( ( aExp | aSig ) == 0 ) return a;
1476 float_raise( float_flag_invalid ); 1467 roundData->exception |= float_flag_invalid;
1477 return float32_default_nan; 1468 return float32_default_nan;
1478 } 1469 }
1479 if ( aExp == 0 ) { 1470 if ( aExp == 0 ) {
@@ -1499,7 +1490,7 @@ float32 float32_sqrt( float32 a )
1499 } 1490 }
1500 } 1491 }
1501 shift32RightJamming( zSig, 1, &zSig ); 1492 shift32RightJamming( zSig, 1, &zSig );
1502 return roundAndPackFloat32( 0, zExp, zSig ); 1493 return roundAndPackFloat32( roundData, 0, zExp, zSig );
1503 1494
1504} 1495}
1505 1496
@@ -1661,7 +1652,7 @@ positive integer is returned. Otherwise, if the conversion overflows, the
1661largest integer with the same sign as `a' is returned. 1652largest integer with the same sign as `a' is returned.
1662------------------------------------------------------------------------------- 1653-------------------------------------------------------------------------------
1663*/ 1654*/
1664int32 float64_to_int32( float64 a ) 1655int32 float64_to_int32( struct roundingData *roundData, float64 a )
1665{ 1656{
1666 flag aSign; 1657 flag aSign;
1667 int16 aExp, shiftCount; 1658 int16 aExp, shiftCount;
@@ -1674,7 +1665,7 @@ int32 float64_to_int32( float64 a )
1674 if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); 1665 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
1675 shiftCount = 0x42C - aExp; 1666 shiftCount = 0x42C - aExp;
1676 if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); 1667 if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
1677 return roundAndPackInt32( aSign, aSig ); 1668 return roundAndPackInt32( roundData, aSign, aSig );
1678 1669
1679} 1670}
1680 1671
@@ -1705,7 +1696,7 @@ int32 float64_to_int32_round_to_zero( float64 a )
1705 goto invalid; 1696 goto invalid;
1706 } 1697 }
1707 else if ( 52 < shiftCount ) { 1698 else if ( 52 < shiftCount ) {
1708 if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; 1699 if ( aExp || aSig ) float_raise( float_flag_inexact );
1709 return 0; 1700 return 0;
1710 } 1701 }
1711 aSig |= LIT64( 0x0010000000000000 ); 1702 aSig |= LIT64( 0x0010000000000000 );
@@ -1715,11 +1706,11 @@ int32 float64_to_int32_round_to_zero( float64 a )
1715 if ( aSign ) z = - z; 1706 if ( aSign ) z = - z;
1716 if ( ( z < 0 ) ^ aSign ) { 1707 if ( ( z < 0 ) ^ aSign ) {
1717 invalid: 1708 invalid:
1718 float_exception_flags |= float_flag_invalid; 1709 float_raise( float_flag_invalid );
1719 return aSign ? 0x80000000 : 0x7FFFFFFF; 1710 return aSign ? 0x80000000 : 0x7FFFFFFF;
1720 } 1711 }
1721 if ( ( aSig<<shiftCount ) != savedASig ) { 1712 if ( ( aSig<<shiftCount ) != savedASig ) {
1722 float_exception_flags |= float_flag_inexact; 1713 float_raise( float_flag_inexact );
1723 } 1714 }
1724 return z; 1715 return z;
1725 1716
@@ -1736,7 +1727,7 @@ positive integer is returned. Otherwise, if the conversion overflows, the
1736largest positive integer is returned. 1727largest positive integer is returned.
1737------------------------------------------------------------------------------- 1728-------------------------------------------------------------------------------
1738*/ 1729*/
1739int32 float64_to_uint32( float64 a ) 1730int32 float64_to_uint32( struct roundingData *roundData, float64 a )
1740{ 1731{
1741 flag aSign; 1732 flag aSign;
1742 int16 aExp, shiftCount; 1733 int16 aExp, shiftCount;
@@ -1749,7 +1740,7 @@ int32 float64_to_uint32( float64 a )
1749 if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); 1740 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
1750 shiftCount = 0x42C - aExp; 1741 shiftCount = 0x42C - aExp;
1751 if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); 1742 if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
1752 return roundAndPackInt32( aSign, aSig ); 1743 return roundAndPackInt32( roundData, aSign, aSig );
1753} 1744}
1754 1745
1755/* 1746/*
@@ -1778,7 +1769,7 @@ int32 float64_to_uint32_round_to_zero( float64 a )
1778 goto invalid; 1769 goto invalid;
1779 } 1770 }
1780 else if ( 52 < shiftCount ) { 1771 else if ( 52 < shiftCount ) {
1781 if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; 1772 if ( aExp || aSig ) float_raise( float_flag_inexact );
1782 return 0; 1773 return 0;
1783 } 1774 }
1784 aSig |= LIT64( 0x0010000000000000 ); 1775 aSig |= LIT64( 0x0010000000000000 );
@@ -1788,11 +1779,11 @@ int32 float64_to_uint32_round_to_zero( float64 a )
1788 if ( aSign ) z = - z; 1779 if ( aSign ) z = - z;
1789 if ( ( z < 0 ) ^ aSign ) { 1780 if ( ( z < 0 ) ^ aSign ) {
1790 invalid: 1781 invalid:
1791 float_exception_flags |= float_flag_invalid; 1782 float_raise( float_flag_invalid );
1792 return aSign ? 0x80000000 : 0x7FFFFFFF; 1783 return aSign ? 0x80000000 : 0x7FFFFFFF;
1793 } 1784 }
1794 if ( ( aSig<<shiftCount ) != savedASig ) { 1785 if ( ( aSig<<shiftCount ) != savedASig ) {
1795 float_exception_flags |= float_flag_inexact; 1786 float_raise( float_flag_inexact );
1796 } 1787 }
1797 return z; 1788 return z;
1798} 1789}
@@ -1805,7 +1796,7 @@ performed according to the IEC/IEEE Standard for Binary Floating-point
1805Arithmetic. 1796Arithmetic.
1806------------------------------------------------------------------------------- 1797-------------------------------------------------------------------------------
1807*/ 1798*/
1808float32 float64_to_float32( float64 a ) 1799float32 float64_to_float32( struct roundingData *roundData, float64 a )
1809{ 1800{
1810 flag aSign; 1801 flag aSign;
1811 int16 aExp; 1802 int16 aExp;
@@ -1825,7 +1816,7 @@ float32 float64_to_float32( float64 a )
1825 zSig |= 0x40000000; 1816 zSig |= 0x40000000;
1826 aExp -= 0x381; 1817 aExp -= 0x381;
1827 } 1818 }
1828 return roundAndPackFloat32( aSign, aExp, zSig ); 1819 return roundAndPackFloat32( roundData, aSign, aExp, zSig );
1829 1820
1830} 1821}
1831 1822
@@ -1872,7 +1863,7 @@ operation is performed according to the IEC/IEEE Standard for Binary
1872Floating-point Arithmetic. 1863Floating-point Arithmetic.
1873------------------------------------------------------------------------------- 1864-------------------------------------------------------------------------------
1874*/ 1865*/
1875float64 float64_round_to_int( float64 a ) 1866float64 float64_round_to_int( struct roundingData *roundData, float64 a )
1876{ 1867{
1877 flag aSign; 1868 flag aSign;
1878 int16 aExp; 1869 int16 aExp;
@@ -1889,9 +1880,9 @@ float64 float64_round_to_int( float64 a )
1889 } 1880 }
1890 if ( aExp <= 0x3FE ) { 1881 if ( aExp <= 0x3FE ) {
1891 if ( (bits64) ( a<<1 ) == 0 ) return a; 1882 if ( (bits64) ( a<<1 ) == 0 ) return a;
1892 float_exception_flags |= float_flag_inexact; 1883 roundData->exception |= float_flag_inexact;
1893 aSign = extractFloat64Sign( a ); 1884 aSign = extractFloat64Sign( a );
1894 switch ( float_rounding_mode ) { 1885 switch ( roundData->mode ) {
1895 case float_round_nearest_even: 1886 case float_round_nearest_even:
1896 if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) { 1887 if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
1897 return packFloat64( aSign, 0x3FF, 0 ); 1888 return packFloat64( aSign, 0x3FF, 0 );
@@ -1909,7 +1900,7 @@ float64 float64_round_to_int( float64 a )
1909 lastBitMask <<= 0x433 - aExp; 1900 lastBitMask <<= 0x433 - aExp;
1910 roundBitsMask = lastBitMask - 1; 1901 roundBitsMask = lastBitMask - 1;
1911 z = a; 1902 z = a;
1912 roundingMode = float_rounding_mode; 1903 roundingMode = roundData->mode;
1913 if ( roundingMode == float_round_nearest_even ) { 1904 if ( roundingMode == float_round_nearest_even ) {
1914 z += lastBitMask>>1; 1905 z += lastBitMask>>1;
1915 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; 1906 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
@@ -1920,7 +1911,7 @@ float64 float64_round_to_int( float64 a )
1920 } 1911 }
1921 } 1912 }
1922 z &= ~ roundBitsMask; 1913 z &= ~ roundBitsMask;
1923 if ( z != a ) float_exception_flags |= float_flag_inexact; 1914 if ( z != a ) roundData->exception |= float_flag_inexact;
1924 return z; 1915 return z;
1925 1916
1926} 1917}
@@ -1934,7 +1925,7 @@ addition is performed according to the IEC/IEEE Standard for Binary
1934Floating-point Arithmetic. 1925Floating-point Arithmetic.
1935------------------------------------------------------------------------------- 1926-------------------------------------------------------------------------------
1936*/ 1927*/
1937static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) 1928static float64 addFloat64Sigs( struct roundingData *roundData, float64 a, float64 b, flag zSign )
1938{ 1929{
1939 int16 aExp, bExp, zExp; 1930 int16 aExp, bExp, zExp;
1940 bits64 aSig, bSig, zSig; 1931 bits64 aSig, bSig, zSig;
@@ -1993,7 +1984,7 @@ static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
1993 ++zExp; 1984 ++zExp;
1994 } 1985 }
1995 roundAndPack: 1986 roundAndPack:
1996 return roundAndPackFloat64( zSign, zExp, zSig ); 1987 return roundAndPackFloat64( roundData, zSign, zExp, zSig );
1997 1988
1998} 1989}
1999 1990
@@ -2006,7 +1997,7 @@ result is a NaN. The subtraction is performed according to the IEC/IEEE
2006Standard for Binary Floating-point Arithmetic. 1997Standard for Binary Floating-point Arithmetic.
2007------------------------------------------------------------------------------- 1998-------------------------------------------------------------------------------
2008*/ 1999*/
2009static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) 2000static float64 subFloat64Sigs( struct roundingData *roundData, float64 a, float64 b, flag zSign )
2010{ 2001{
2011 int16 aExp, bExp, zExp; 2002 int16 aExp, bExp, zExp;
2012 bits64 aSig, bSig, zSig; 2003 bits64 aSig, bSig, zSig;
@@ -2023,7 +2014,7 @@ static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
2023 if ( expDiff < 0 ) goto bExpBigger; 2014 if ( expDiff < 0 ) goto bExpBigger;
2024 if ( aExp == 0x7FF ) { 2015 if ( aExp == 0x7FF ) {
2025 if ( aSig | bSig ) return propagateFloat64NaN( a, b ); 2016 if ( aSig | bSig ) return propagateFloat64NaN( a, b );
2026 float_raise( float_flag_invalid ); 2017 roundData->exception |= float_flag_invalid;
2027 return float64_default_nan; 2018 return float64_default_nan;
2028 } 2019 }
2029 if ( aExp == 0 ) { 2020 if ( aExp == 0 ) {
@@ -2032,7 +2023,7 @@ static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
2032 } 2023 }
2033 if ( bSig < aSig ) goto aBigger; 2024 if ( bSig < aSig ) goto aBigger;
2034 if ( aSig < bSig ) goto bBigger; 2025 if ( aSig < bSig ) goto bBigger;
2035 return packFloat64( float_rounding_mode == float_round_down, 0, 0 ); 2026 return packFloat64( roundData->mode == float_round_down, 0, 0 );
2036 bExpBigger: 2027 bExpBigger:
2037 if ( bExp == 0x7FF ) { 2028 if ( bExp == 0x7FF ) {
2038 if ( bSig ) return propagateFloat64NaN( a, b ); 2029 if ( bSig ) return propagateFloat64NaN( a, b );
@@ -2069,7 +2060,7 @@ static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
2069 zExp = aExp; 2060 zExp = aExp;
2070 normalizeRoundAndPack: 2061 normalizeRoundAndPack:
2071 --zExp; 2062 --zExp;
2072 return normalizeRoundAndPackFloat64( zSign, zExp, zSig ); 2063 return normalizeRoundAndPackFloat64( roundData, zSign, zExp, zSig );
2073 2064
2074} 2065}
2075 2066
@@ -2080,17 +2071,17 @@ and `b'. The operation is performed according to the IEC/IEEE Standard for
2080Binary Floating-point Arithmetic. 2071Binary Floating-point Arithmetic.
2081------------------------------------------------------------------------------- 2072-------------------------------------------------------------------------------
2082*/ 2073*/
2083float64 float64_add( float64 a, float64 b ) 2074float64 float64_add( struct roundingData *roundData, float64 a, float64 b )
2084{ 2075{
2085 flag aSign, bSign; 2076 flag aSign, bSign;
2086 2077
2087 aSign = extractFloat64Sign( a ); 2078 aSign = extractFloat64Sign( a );
2088 bSign = extractFloat64Sign( b ); 2079 bSign = extractFloat64Sign( b );
2089 if ( aSign == bSign ) { 2080 if ( aSign == bSign ) {
2090 return addFloat64Sigs( a, b, aSign ); 2081 return addFloat64Sigs( roundData, a, b, aSign );
2091 } 2082 }
2092 else { 2083 else {
2093 return subFloat64Sigs( a, b, aSign ); 2084 return subFloat64Sigs( roundData, a, b, aSign );
2094 } 2085 }
2095 2086
2096} 2087}
@@ -2102,17 +2093,17 @@ Returns the result of subtracting the double-precision floating-point values
2102for Binary Floating-point Arithmetic. 2093for Binary Floating-point Arithmetic.
2103------------------------------------------------------------------------------- 2094-------------------------------------------------------------------------------
2104*/ 2095*/
2105float64 float64_sub( float64 a, float64 b ) 2096float64 float64_sub( struct roundingData *roundData, float64 a, float64 b )
2106{ 2097{
2107 flag aSign, bSign; 2098 flag aSign, bSign;
2108 2099
2109 aSign = extractFloat64Sign( a ); 2100 aSign = extractFloat64Sign( a );
2110 bSign = extractFloat64Sign( b ); 2101 bSign = extractFloat64Sign( b );
2111 if ( aSign == bSign ) { 2102 if ( aSign == bSign ) {
2112 return subFloat64Sigs( a, b, aSign ); 2103 return subFloat64Sigs( roundData, a, b, aSign );
2113 } 2104 }
2114 else { 2105 else {
2115 return addFloat64Sigs( a, b, aSign ); 2106 return addFloat64Sigs( roundData, a, b, aSign );
2116 } 2107 }
2117 2108
2118} 2109}
@@ -2124,7 +2115,7 @@ Returns the result of multiplying the double-precision floating-point values
2124for Binary Floating-point Arithmetic. 2115for Binary Floating-point Arithmetic.
2125------------------------------------------------------------------------------- 2116-------------------------------------------------------------------------------
2126*/ 2117*/
2127float64 float64_mul( float64 a, float64 b ) 2118float64 float64_mul( struct roundingData *roundData, float64 a, float64 b )
2128{ 2119{
2129 flag aSign, bSign, zSign; 2120 flag aSign, bSign, zSign;
2130 int16 aExp, bExp, zExp; 2121 int16 aExp, bExp, zExp;
@@ -2142,7 +2133,7 @@ float64 float64_mul( float64 a, float64 b )
2142 return propagateFloat64NaN( a, b ); 2133 return propagateFloat64NaN( a, b );
2143 } 2134 }
2144 if ( ( bExp | bSig ) == 0 ) { 2135 if ( ( bExp | bSig ) == 0 ) {
2145 float_raise( float_flag_invalid ); 2136 roundData->exception |= float_flag_invalid;
2146 return float64_default_nan; 2137 return float64_default_nan;
2147 } 2138 }
2148 return packFloat64( zSign, 0x7FF, 0 ); 2139 return packFloat64( zSign, 0x7FF, 0 );
@@ -2150,7 +2141,7 @@ float64 float64_mul( float64 a, float64 b )
2150 if ( bExp == 0x7FF ) { 2141 if ( bExp == 0x7FF ) {
2151 if ( bSig ) return propagateFloat64NaN( a, b ); 2142 if ( bSig ) return propagateFloat64NaN( a, b );
2152 if ( ( aExp | aSig ) == 0 ) { 2143 if ( ( aExp | aSig ) == 0 ) {
2153 float_raise( float_flag_invalid ); 2144 roundData->exception |= float_flag_invalid;
2154 return float64_default_nan; 2145 return float64_default_nan;
2155 } 2146 }
2156 return packFloat64( zSign, 0x7FF, 0 ); 2147 return packFloat64( zSign, 0x7FF, 0 );
@@ -2172,7 +2163,7 @@ float64 float64_mul( float64 a, float64 b )
2172 zSig0 <<= 1; 2163 zSig0 <<= 1;
2173 --zExp; 2164 --zExp;
2174 } 2165 }
2175 return roundAndPackFloat64( zSign, zExp, zSig0 ); 2166 return roundAndPackFloat64( roundData, zSign, zExp, zSig0 );
2176 2167
2177} 2168}
2178 2169
@@ -2183,7 +2174,7 @@ by the corresponding value `b'. The operation is performed according to
2183the IEC/IEEE Standard for Binary Floating-point Arithmetic. 2174the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2184------------------------------------------------------------------------------- 2175-------------------------------------------------------------------------------
2185*/ 2176*/
2186float64 float64_div( float64 a, float64 b ) 2177float64 float64_div( struct roundingData *roundData, float64 a, float64 b )
2187{ 2178{
2188 flag aSign, bSign, zSign; 2179 flag aSign, bSign, zSign;
2189 int16 aExp, bExp, zExp; 2180 int16 aExp, bExp, zExp;
@@ -2202,7 +2193,7 @@ float64 float64_div( float64 a, float64 b )
2202 if ( aSig ) return propagateFloat64NaN( a, b ); 2193 if ( aSig ) return propagateFloat64NaN( a, b );
2203 if ( bExp == 0x7FF ) { 2194 if ( bExp == 0x7FF ) {
2204 if ( bSig ) return propagateFloat64NaN( a, b ); 2195 if ( bSig ) return propagateFloat64NaN( a, b );
2205 float_raise( float_flag_invalid ); 2196 roundData->exception |= float_flag_invalid;
2206 return float64_default_nan; 2197 return float64_default_nan;
2207 } 2198 }
2208 return packFloat64( zSign, 0x7FF, 0 ); 2199 return packFloat64( zSign, 0x7FF, 0 );
@@ -2214,10 +2205,10 @@ float64 float64_div( float64 a, float64 b )
2214 if ( bExp == 0 ) { 2205 if ( bExp == 0 ) {
2215 if ( bSig == 0 ) { 2206 if ( bSig == 0 ) {
2216 if ( ( aExp | aSig ) == 0 ) { 2207 if ( ( aExp | aSig ) == 0 ) {
2217 float_raise( float_flag_invalid ); 2208 roundData->exception |= float_flag_invalid;
2218 return float64_default_nan; 2209 return float64_default_nan;
2219 } 2210 }
2220 float_raise( float_flag_divbyzero ); 2211 roundData->exception |= float_flag_divbyzero;
2221 return packFloat64( zSign, 0x7FF, 0 ); 2212 return packFloat64( zSign, 0x7FF, 0 );
2222 } 2213 }
2223 normalizeFloat64Subnormal( bSig, &bExp, &bSig ); 2214 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
@@ -2243,7 +2234,7 @@ float64 float64_div( float64 a, float64 b )
2243 } 2234 }
2244 zSig |= ( rem1 != 0 ); 2235 zSig |= ( rem1 != 0 );
2245 } 2236 }
2246 return roundAndPackFloat64( zSign, zExp, zSig ); 2237 return roundAndPackFloat64( roundData, zSign, zExp, zSig );
2247 2238
2248} 2239}
2249 2240
@@ -2254,7 +2245,7 @@ with respect to the corresponding value `b'. The operation is performed
2254according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. 2245according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2255------------------------------------------------------------------------------- 2246-------------------------------------------------------------------------------
2256*/ 2247*/
2257float64 float64_rem( float64 a, float64 b ) 2248float64 float64_rem( struct roundingData *roundData, float64 a, float64 b )
2258{ 2249{
2259 flag aSign, bSign, zSign; 2250 flag aSign, bSign, zSign;
2260 int16 aExp, bExp, expDiff; 2251 int16 aExp, bExp, expDiff;
@@ -2272,7 +2263,7 @@ float64 float64_rem( float64 a, float64 b )
2272 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { 2263 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2273 return propagateFloat64NaN( a, b ); 2264 return propagateFloat64NaN( a, b );
2274 } 2265 }
2275 float_raise( float_flag_invalid ); 2266 roundData->exception |= float_flag_invalid;
2276 return float64_default_nan; 2267 return float64_default_nan;
2277 } 2268 }
2278 if ( bExp == 0x7FF ) { 2269 if ( bExp == 0x7FF ) {
@@ -2281,7 +2272,7 @@ float64 float64_rem( float64 a, float64 b )
2281 } 2272 }
2282 if ( bExp == 0 ) { 2273 if ( bExp == 0 ) {
2283 if ( bSig == 0 ) { 2274 if ( bSig == 0 ) {
2284 float_raise( float_flag_invalid ); 2275 roundData->exception |= float_flag_invalid;
2285 return float64_default_nan; 2276 return float64_default_nan;
2286 } 2277 }
2287 normalizeFloat64Subnormal( bSig, &bExp, &bSig ); 2278 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
@@ -2329,7 +2320,7 @@ float64 float64_rem( float64 a, float64 b )
2329 } 2320 }
2330 zSign = ( (sbits64) aSig < 0 ); 2321 zSign = ( (sbits64) aSig < 0 );
2331 if ( zSign ) aSig = - aSig; 2322 if ( zSign ) aSig = - aSig;
2332 return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig ); 2323 return normalizeRoundAndPackFloat64( roundData, aSign ^ zSign, bExp, aSig );
2333 2324
2334} 2325}
2335 2326
@@ -2340,7 +2331,7 @@ The operation is performed according to the IEC/IEEE Standard for Binary
2340Floating-point Arithmetic. 2331Floating-point Arithmetic.
2341------------------------------------------------------------------------------- 2332-------------------------------------------------------------------------------
2342*/ 2333*/
2343float64 float64_sqrt( float64 a ) 2334float64 float64_sqrt( struct roundingData *roundData, float64 a )
2344{ 2335{
2345 flag aSign; 2336 flag aSign;
2346 int16 aExp, zExp; 2337 int16 aExp, zExp;
@@ -2354,12 +2345,12 @@ float64 float64_sqrt( float64 a )
2354 if ( aExp == 0x7FF ) { 2345 if ( aExp == 0x7FF ) {
2355 if ( aSig ) return propagateFloat64NaN( a, a ); 2346 if ( aSig ) return propagateFloat64NaN( a, a );
2356 if ( ! aSign ) return a; 2347 if ( ! aSign ) return a;
2357 float_raise( float_flag_invalid ); 2348 roundData->exception |= float_flag_invalid;
2358 return float64_default_nan; 2349 return float64_default_nan;
2359 } 2350 }
2360 if ( aSign ) { 2351 if ( aSign ) {
2361 if ( ( aExp | aSig ) == 0 ) return a; 2352 if ( ( aExp | aSig ) == 0 ) return a;
2362 float_raise( float_flag_invalid ); 2353 roundData->exception |= float_flag_invalid;
2363 return float64_default_nan; 2354 return float64_default_nan;
2364 } 2355 }
2365 if ( aExp == 0 ) { 2356 if ( aExp == 0 ) {
@@ -2390,7 +2381,7 @@ float64 float64_sqrt( float64 a )
2390 } 2381 }
2391 } 2382 }
2392 shift64RightJamming( zSig, 1, &zSig ); 2383 shift64RightJamming( zSig, 1, &zSig );
2393 return roundAndPackFloat64( 0, zExp, zSig ); 2384 return roundAndPackFloat64( roundData, 0, zExp, zSig );
2394 2385
2395} 2386}
2396 2387
@@ -2554,7 +2545,7 @@ largest positive integer is returned. Otherwise, if the conversion
2554overflows, the largest integer with the same sign as `a' is returned. 2545overflows, the largest integer with the same sign as `a' is returned.
2555------------------------------------------------------------------------------- 2546-------------------------------------------------------------------------------
2556*/ 2547*/
2557int32 floatx80_to_int32( floatx80 a ) 2548int32 floatx80_to_int32( struct roundingData *roundData, floatx80 a )
2558{ 2549{
2559 flag aSign; 2550 flag aSign;
2560 int32 aExp, shiftCount; 2551 int32 aExp, shiftCount;
@@ -2567,7 +2558,7 @@ int32 floatx80_to_int32( floatx80 a )
2567 shiftCount = 0x4037 - aExp; 2558 shiftCount = 0x4037 - aExp;
2568 if ( shiftCount <= 0 ) shiftCount = 1; 2559 if ( shiftCount <= 0 ) shiftCount = 1;
2569 shift64RightJamming( aSig, shiftCount, &aSig ); 2560 shift64RightJamming( aSig, shiftCount, &aSig );
2570 return roundAndPackInt32( aSign, aSig ); 2561 return roundAndPackInt32( roundData, aSign, aSig );
2571 2562
2572} 2563}
2573 2564
@@ -2598,7 +2589,7 @@ int32 floatx80_to_int32_round_to_zero( floatx80 a )
2598 goto invalid; 2589 goto invalid;
2599 } 2590 }
2600 else if ( 63 < shiftCount ) { 2591 else if ( 63 < shiftCount ) {
2601 if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; 2592 if ( aExp || aSig ) float_raise( float_flag_inexact );
2602 return 0; 2593 return 0;
2603 } 2594 }
2604 savedASig = aSig; 2595 savedASig = aSig;
@@ -2607,11 +2598,11 @@ int32 floatx80_to_int32_round_to_zero( floatx80 a )
2607 if ( aSign ) z = - z; 2598 if ( aSign ) z = - z;
2608 if ( ( z < 0 ) ^ aSign ) { 2599 if ( ( z < 0 ) ^ aSign ) {
2609 invalid: 2600 invalid:
2610 float_exception_flags |= float_flag_invalid; 2601 float_raise( float_flag_invalid );
2611 return aSign ? 0x80000000 : 0x7FFFFFFF; 2602 return aSign ? 0x80000000 : 0x7FFFFFFF;
2612 } 2603 }
2613 if ( ( aSig<<shiftCount ) != savedASig ) { 2604 if ( ( aSig<<shiftCount ) != savedASig ) {
2614 float_exception_flags |= float_flag_inexact; 2605 float_raise( float_flag_inexact );
2615 } 2606 }
2616 return z; 2607 return z;
2617 2608
@@ -2625,7 +2616,7 @@ conversion is performed according to the IEC/IEEE Standard for Binary
2625Floating-point Arithmetic. 2616Floating-point Arithmetic.
2626------------------------------------------------------------------------------- 2617-------------------------------------------------------------------------------
2627*/ 2618*/
2628float32 floatx80_to_float32( floatx80 a ) 2619float32 floatx80_to_float32( struct roundingData *roundData, floatx80 a )
2629{ 2620{
2630 flag aSign; 2621 flag aSign;
2631 int32 aExp; 2622 int32 aExp;
@@ -2642,7 +2633,7 @@ float32 floatx80_to_float32( floatx80 a )
2642 } 2633 }
2643 shift64RightJamming( aSig, 33, &aSig ); 2634 shift64RightJamming( aSig, 33, &aSig );
2644 if ( aExp || aSig ) aExp -= 0x3F81; 2635 if ( aExp || aSig ) aExp -= 0x3F81;
2645 return roundAndPackFloat32( aSign, aExp, aSig ); 2636 return roundAndPackFloat32( roundData, aSign, aExp, aSig );
2646 2637
2647} 2638}
2648 2639
@@ -2654,7 +2645,7 @@ conversion is performed according to the IEC/IEEE Standard for Binary
2654Floating-point Arithmetic. 2645Floating-point Arithmetic.
2655------------------------------------------------------------------------------- 2646-------------------------------------------------------------------------------
2656*/ 2647*/
2657float64 floatx80_to_float64( floatx80 a ) 2648float64 floatx80_to_float64( struct roundingData *roundData, floatx80 a )
2658{ 2649{
2659 flag aSign; 2650 flag aSign;
2660 int32 aExp; 2651 int32 aExp;
@@ -2671,7 +2662,7 @@ float64 floatx80_to_float64( floatx80 a )
2671 } 2662 }
2672 shift64RightJamming( aSig, 1, &zSig ); 2663 shift64RightJamming( aSig, 1, &zSig );
2673 if ( aExp || aSig ) aExp -= 0x3C01; 2664 if ( aExp || aSig ) aExp -= 0x3C01;
2674 return roundAndPackFloat64( aSign, aExp, zSig ); 2665 return roundAndPackFloat64( roundData, aSign, aExp, zSig );
2675 2666
2676} 2667}
2677 2668
@@ -2683,7 +2674,7 @@ value. The operation is performed according to the IEC/IEEE Standard for
2683Binary Floating-point Arithmetic. 2674Binary Floating-point Arithmetic.
2684------------------------------------------------------------------------------- 2675-------------------------------------------------------------------------------
2685*/ 2676*/
2686floatx80 floatx80_round_to_int( floatx80 a ) 2677floatx80 floatx80_round_to_int( struct roundingData *roundData, floatx80 a )
2687{ 2678{
2688 flag aSign; 2679 flag aSign;
2689 int32 aExp; 2680 int32 aExp;
@@ -2703,9 +2694,9 @@ floatx80 floatx80_round_to_int( floatx80 a )
2703 && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { 2694 && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
2704 return a; 2695 return a;
2705 } 2696 }
2706 float_exception_flags |= float_flag_inexact; 2697 roundData->exception |= float_flag_inexact;
2707 aSign = extractFloatx80Sign( a ); 2698 aSign = extractFloatx80Sign( a );
2708 switch ( float_rounding_mode ) { 2699 switch ( roundData->mode ) {
2709 case float_round_nearest_even: 2700 case float_round_nearest_even:
2710 if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 ) 2701 if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
2711 ) { 2702 ) {
@@ -2729,7 +2720,7 @@ floatx80 floatx80_round_to_int( floatx80 a )
2729 lastBitMask <<= 0x403E - aExp; 2720 lastBitMask <<= 0x403E - aExp;
2730 roundBitsMask = lastBitMask - 1; 2721 roundBitsMask = lastBitMask - 1;
2731 z = a; 2722 z = a;
2732 roundingMode = float_rounding_mode; 2723 roundingMode = roundData->mode;
2733 if ( roundingMode == float_round_nearest_even ) { 2724 if ( roundingMode == float_round_nearest_even ) {
2734 z.low += lastBitMask>>1; 2725 z.low += lastBitMask>>1;
2735 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; 2726 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
@@ -2744,7 +2735,7 @@ floatx80 floatx80_round_to_int( floatx80 a )
2744 ++z.high; 2735 ++z.high;
2745 z.low = LIT64( 0x8000000000000000 ); 2736 z.low = LIT64( 0x8000000000000000 );
2746 } 2737 }
2747 if ( z.low != a.low ) float_exception_flags |= float_flag_inexact; 2738 if ( z.low != a.low ) roundData->exception |= float_flag_inexact;
2748 return z; 2739 return z;
2749 2740
2750} 2741}
@@ -2758,7 +2749,7 @@ The addition is performed according to the IEC/IEEE Standard for Binary
2758Floating-point Arithmetic. 2749Floating-point Arithmetic.
2759------------------------------------------------------------------------------- 2750-------------------------------------------------------------------------------
2760*/ 2751*/
2761static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) 2752static floatx80 addFloatx80Sigs( struct roundingData *roundData, floatx80 a, floatx80 b, flag zSign )
2762{ 2753{
2763 int32 aExp, bExp, zExp; 2754 int32 aExp, bExp, zExp;
2764 bits64 aSig, bSig, zSig0, zSig1; 2755 bits64 aSig, bSig, zSig0, zSig1;
@@ -2814,7 +2805,7 @@ static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
2814 roundAndPack: 2805 roundAndPack:
2815 return 2806 return
2816 roundAndPackFloatx80( 2807 roundAndPackFloatx80(
2817 floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); 2808 roundData, zSign, zExp, zSig0, zSig1 );
2818 2809
2819} 2810}
2820 2811
@@ -2827,7 +2818,7 @@ result is a NaN. The subtraction is performed according to the IEC/IEEE
2827Standard for Binary Floating-point Arithmetic. 2818Standard for Binary Floating-point Arithmetic.
2828------------------------------------------------------------------------------- 2819-------------------------------------------------------------------------------
2829*/ 2820*/
2830static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) 2821static floatx80 subFloatx80Sigs( struct roundingData *roundData, floatx80 a, floatx80 b, flag zSign )
2831{ 2822{
2832 int32 aExp, bExp, zExp; 2823 int32 aExp, bExp, zExp;
2833 bits64 aSig, bSig, zSig0, zSig1; 2824 bits64 aSig, bSig, zSig0, zSig1;
@@ -2845,7 +2836,7 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
2845 if ( (bits64) ( ( aSig | bSig )<<1 ) ) { 2836 if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
2846 return propagateFloatx80NaN( a, b ); 2837 return propagateFloatx80NaN( a, b );
2847 } 2838 }
2848 float_raise( float_flag_invalid ); 2839 roundData->exception |= float_flag_invalid;
2849 z.low = floatx80_default_nan_low; 2840 z.low = floatx80_default_nan_low;
2850 z.high = floatx80_default_nan_high; 2841 z.high = floatx80_default_nan_high;
2851 return z; 2842 return z;
@@ -2857,7 +2848,7 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
2857 zSig1 = 0; 2848 zSig1 = 0;
2858 if ( bSig < aSig ) goto aBigger; 2849 if ( bSig < aSig ) goto aBigger;
2859 if ( aSig < bSig ) goto bBigger; 2850 if ( aSig < bSig ) goto bBigger;
2860 return packFloatx80( float_rounding_mode == float_round_down, 0, 0 ); 2851 return packFloatx80( roundData->mode == float_round_down, 0, 0 );
2861 bExpBigger: 2852 bExpBigger:
2862 if ( bExp == 0x7FFF ) { 2853 if ( bExp == 0x7FFF ) {
2863 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 2854 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
@@ -2883,7 +2874,7 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
2883 normalizeRoundAndPack: 2874 normalizeRoundAndPack:
2884 return 2875 return
2885 normalizeRoundAndPackFloatx80( 2876 normalizeRoundAndPackFloatx80(
2886 floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); 2877 roundData, zSign, zExp, zSig0, zSig1 );
2887 2878
2888} 2879}
2889 2880
@@ -2894,17 +2885,17 @@ values `a' and `b'. The operation is performed according to the IEC/IEEE
2894Standard for Binary Floating-point Arithmetic. 2885Standard for Binary Floating-point Arithmetic.
2895------------------------------------------------------------------------------- 2886-------------------------------------------------------------------------------
2896*/ 2887*/
2897floatx80 floatx80_add( floatx80 a, floatx80 b ) 2888floatx80 floatx80_add( struct roundingData *roundData, floatx80 a, floatx80 b )
2898{ 2889{
2899 flag aSign, bSign; 2890 flag aSign, bSign;
2900 2891
2901 aSign = extractFloatx80Sign( a ); 2892 aSign = extractFloatx80Sign( a );
2902 bSign = extractFloatx80Sign( b ); 2893 bSign = extractFloatx80Sign( b );
2903 if ( aSign == bSign ) { 2894 if ( aSign == bSign ) {
2904 return addFloatx80Sigs( a, b, aSign ); 2895 return addFloatx80Sigs( roundData, a, b, aSign );
2905 } 2896 }
2906 else { 2897 else {
2907 return subFloatx80Sigs( a, b, aSign ); 2898 return subFloatx80Sigs( roundData, a, b, aSign );
2908 } 2899 }
2909 2900
2910} 2901}
@@ -2916,17 +2907,17 @@ point values `a' and `b'. The operation is performed according to the
2916IEC/IEEE Standard for Binary Floating-point Arithmetic. 2907IEC/IEEE Standard for Binary Floating-point Arithmetic.
2917------------------------------------------------------------------------------- 2908-------------------------------------------------------------------------------
2918*/ 2909*/
2919floatx80 floatx80_sub( floatx80 a, floatx80 b ) 2910floatx80 floatx80_sub( struct roundingData *roundData, floatx80 a, floatx80 b )
2920{ 2911{
2921 flag aSign, bSign; 2912 flag aSign, bSign;
2922 2913
2923 aSign = extractFloatx80Sign( a ); 2914 aSign = extractFloatx80Sign( a );
2924 bSign = extractFloatx80Sign( b ); 2915 bSign = extractFloatx80Sign( b );
2925 if ( aSign == bSign ) { 2916 if ( aSign == bSign ) {
2926 return subFloatx80Sigs( a, b, aSign ); 2917 return subFloatx80Sigs( roundData, a, b, aSign );
2927 } 2918 }
2928 else { 2919 else {
2929 return addFloatx80Sigs( a, b, aSign ); 2920 return addFloatx80Sigs( roundData, a, b, aSign );
2930 } 2921 }
2931 2922
2932} 2923}
@@ -2938,7 +2929,7 @@ point values `a' and `b'. The operation is performed according to the
2938IEC/IEEE Standard for Binary Floating-point Arithmetic. 2929IEC/IEEE Standard for Binary Floating-point Arithmetic.
2939------------------------------------------------------------------------------- 2930-------------------------------------------------------------------------------
2940*/ 2931*/
2941floatx80 floatx80_mul( floatx80 a, floatx80 b ) 2932floatx80 floatx80_mul( struct roundingData *roundData, floatx80 a, floatx80 b )
2942{ 2933{
2943 flag aSign, bSign, zSign; 2934 flag aSign, bSign, zSign;
2944 int32 aExp, bExp, zExp; 2935 int32 aExp, bExp, zExp;
@@ -2964,7 +2955,7 @@ floatx80 floatx80_mul( floatx80 a, floatx80 b )
2964 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 2955 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
2965 if ( ( aExp | aSig ) == 0 ) { 2956 if ( ( aExp | aSig ) == 0 ) {
2966 invalid: 2957 invalid:
2967 float_raise( float_flag_invalid ); 2958 roundData->exception |= float_flag_invalid;
2968 z.low = floatx80_default_nan_low; 2959 z.low = floatx80_default_nan_low;
2969 z.high = floatx80_default_nan_high; 2960 z.high = floatx80_default_nan_high;
2970 return z; 2961 return z;
@@ -2987,7 +2978,7 @@ floatx80 floatx80_mul( floatx80 a, floatx80 b )
2987 } 2978 }
2988 return 2979 return
2989 roundAndPackFloatx80( 2980 roundAndPackFloatx80(
2990 floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); 2981 roundData, zSign, zExp, zSig0, zSig1 );
2991 2982
2992} 2983}
2993 2984
@@ -2998,7 +2989,7 @@ value `a' by the corresponding value `b'. The operation is performed
2998according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. 2989according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2999------------------------------------------------------------------------------- 2990-------------------------------------------------------------------------------
3000*/ 2991*/
3001floatx80 floatx80_div( floatx80 a, floatx80 b ) 2992floatx80 floatx80_div( struct roundingData *roundData, floatx80 a, floatx80 b )
3002{ 2993{
3003 flag aSign, bSign, zSign; 2994 flag aSign, bSign, zSign;
3004 int32 aExp, bExp, zExp; 2995 int32 aExp, bExp, zExp;
@@ -3029,12 +3020,12 @@ floatx80 floatx80_div( floatx80 a, floatx80 b )
3029 if ( bSig == 0 ) { 3020 if ( bSig == 0 ) {
3030 if ( ( aExp | aSig ) == 0 ) { 3021 if ( ( aExp | aSig ) == 0 ) {
3031 invalid: 3022 invalid:
3032 float_raise( float_flag_invalid ); 3023 roundData->exception |= float_flag_invalid;
3033 z.low = floatx80_default_nan_low; 3024 z.low = floatx80_default_nan_low;
3034 z.high = floatx80_default_nan_high; 3025 z.high = floatx80_default_nan_high;
3035 return z; 3026 return z;
3036 } 3027 }
3037 float_raise( float_flag_divbyzero ); 3028 roundData->exception |= float_flag_divbyzero;
3038 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 3029 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3039 } 3030 }
3040 normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); 3031 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
@@ -3068,7 +3059,7 @@ floatx80 floatx80_div( floatx80 a, floatx80 b )
3068 } 3059 }
3069 return 3060 return
3070 roundAndPackFloatx80( 3061 roundAndPackFloatx80(
3071 floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); 3062 roundData, zSign, zExp, zSig0, zSig1 );
3072 3063
3073} 3064}
3074 3065
@@ -3079,7 +3070,7 @@ Returns the remainder of the extended double-precision floating-point value
3079according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. 3070according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
3080------------------------------------------------------------------------------- 3071-------------------------------------------------------------------------------
3081*/ 3072*/
3082floatx80 floatx80_rem( floatx80 a, floatx80 b ) 3073floatx80 floatx80_rem( struct roundingData *roundData, floatx80 a, floatx80 b )
3083{ 3074{
3084 flag aSign, bSign, zSign; 3075 flag aSign, bSign, zSign;
3085 int32 aExp, bExp, expDiff; 3076 int32 aExp, bExp, expDiff;
@@ -3107,7 +3098,7 @@ floatx80 floatx80_rem( floatx80 a, floatx80 b )
3107 if ( bExp == 0 ) { 3098 if ( bExp == 0 ) {
3108 if ( bSig == 0 ) { 3099 if ( bSig == 0 ) {
3109 invalid: 3100 invalid:
3110 float_raise( float_flag_invalid ); 3101 roundData->exception |= float_flag_invalid;
3111 z.low = floatx80_default_nan_low; 3102 z.low = floatx80_default_nan_low;
3112 z.high = floatx80_default_nan_high; 3103 z.high = floatx80_default_nan_high;
3113 return z; 3104 return z;
@@ -3164,9 +3155,10 @@ floatx80 floatx80_rem( floatx80 a, floatx80 b )
3164 aSig1 = alternateASig1; 3155 aSig1 = alternateASig1;
3165 zSign = ! zSign; 3156 zSign = ! zSign;
3166 } 3157 }
3158
3167 return 3159 return
3168 normalizeRoundAndPackFloatx80( 3160 normalizeRoundAndPackFloatx80(
3169 80, zSign, bExp + expDiff, aSig0, aSig1 ); 3161 roundData, zSign, bExp + expDiff, aSig0, aSig1 );
3170 3162
3171} 3163}
3172 3164
@@ -3177,7 +3169,7 @@ value `a'. The operation is performed according to the IEC/IEEE Standard
3177for Binary Floating-point Arithmetic. 3169for Binary Floating-point Arithmetic.
3178------------------------------------------------------------------------------- 3170-------------------------------------------------------------------------------
3179*/ 3171*/
3180floatx80 floatx80_sqrt( floatx80 a ) 3172floatx80 floatx80_sqrt( struct roundingData *roundData, floatx80 a )
3181{ 3173{
3182 flag aSign; 3174 flag aSign;
3183 int32 aExp, zExp; 3175 int32 aExp, zExp;
@@ -3197,7 +3189,7 @@ floatx80 floatx80_sqrt( floatx80 a )
3197 if ( aSign ) { 3189 if ( aSign ) {
3198 if ( ( aExp | aSig0 ) == 0 ) return a; 3190 if ( ( aExp | aSig0 ) == 0 ) return a;
3199 invalid: 3191 invalid:
3200 float_raise( float_flag_invalid ); 3192 roundData->exception |= float_flag_invalid;
3201 z.low = floatx80_default_nan_low; 3193 z.low = floatx80_default_nan_low;
3202 z.high = floatx80_default_nan_high; 3194 z.high = floatx80_default_nan_high;
3203 return z; 3195 return z;
@@ -3242,7 +3234,7 @@ floatx80 floatx80_sqrt( floatx80 a )
3242 } 3234 }
3243 return 3235 return
3244 roundAndPackFloatx80( 3236 roundAndPackFloatx80(
3245 floatx80_rounding_precision, 0, zExp, zSig0, zSig1 ); 3237 roundData, 0, zExp, zSig0, zSig1 );
3246 3238
3247} 3239}
3248 3240
@@ -3264,7 +3256,7 @@ flag floatx80_eq( floatx80 a, floatx80 b )
3264 ) { 3256 ) {
3265 if ( floatx80_is_signaling_nan( a ) 3257 if ( floatx80_is_signaling_nan( a )
3266 || floatx80_is_signaling_nan( b ) ) { 3258 || floatx80_is_signaling_nan( b ) ) {
3267 float_raise( float_flag_invalid ); 3259 roundData->exception |= float_flag_invalid;
3268 } 3260 }
3269 return 0; 3261 return 0;
3270 } 3262 }
@@ -3294,7 +3286,7 @@ flag floatx80_le( floatx80 a, floatx80 b )
3294 || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 3286 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3295 && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 3287 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3296 ) { 3288 ) {
3297 float_raise( float_flag_invalid ); 3289 roundData->exception |= float_flag_invalid;
3298 return 0; 3290 return 0;
3299 } 3291 }
3300 aSign = extractFloatx80Sign( a ); 3292 aSign = extractFloatx80Sign( a );
@@ -3328,7 +3320,7 @@ flag floatx80_lt( floatx80 a, floatx80 b )
3328 || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 3320 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3329 && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 3321 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3330 ) { 3322 ) {
3331 float_raise( float_flag_invalid ); 3323 roundData->exception |= float_flag_invalid;
3332 return 0; 3324 return 0;
3333 } 3325 }
3334 aSign = extractFloatx80Sign( a ); 3326 aSign = extractFloatx80Sign( a );
@@ -3361,7 +3353,7 @@ flag floatx80_eq_signaling( floatx80 a, floatx80 b )
3361 || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 3353 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3362 && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 3354 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3363 ) { 3355 ) {
3364 float_raise( float_flag_invalid ); 3356 roundData->exception |= float_flag_invalid;
3365 return 0; 3357 return 0;
3366 } 3358 }
3367 return 3359 return
@@ -3392,7 +3384,7 @@ flag floatx80_le_quiet( floatx80 a, floatx80 b )
3392 ) { 3384 ) {
3393 if ( floatx80_is_signaling_nan( a ) 3385 if ( floatx80_is_signaling_nan( a )
3394 || floatx80_is_signaling_nan( b ) ) { 3386 || floatx80_is_signaling_nan( b ) ) {
3395 float_raise( float_flag_invalid ); 3387 roundData->exception |= float_flag_invalid;
3396 } 3388 }
3397 return 0; 3389 return 0;
3398 } 3390 }
@@ -3429,7 +3421,7 @@ flag floatx80_lt_quiet( floatx80 a, floatx80 b )
3429 ) { 3421 ) {
3430 if ( floatx80_is_signaling_nan( a ) 3422 if ( floatx80_is_signaling_nan( a )
3431 || floatx80_is_signaling_nan( b ) ) { 3423 || floatx80_is_signaling_nan( b ) ) {
3432 float_raise( float_flag_invalid ); 3424 roundData->exception |= float_flag_invalid;
3433 } 3425 }
3434 return 0; 3426 return 0;
3435 } 3427 }