diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/sh/kernel/cpu/sh4/softfloat.c | 73 |
1 files changed, 41 insertions, 32 deletions
diff --git a/arch/sh/kernel/cpu/sh4/softfloat.c b/arch/sh/kernel/cpu/sh4/softfloat.c index 2b747f3b02bd..42edf2e54e85 100644 --- a/arch/sh/kernel/cpu/sh4/softfloat.c +++ b/arch/sh/kernel/cpu/sh4/softfloat.c | |||
@@ -37,6 +37,7 @@ | |||
37 | */ | 37 | */ |
38 | #include <linux/kernel.h> | 38 | #include <linux/kernel.h> |
39 | #include <cpu/fpu.h> | 39 | #include <cpu/fpu.h> |
40 | #include <asm/div64.h> | ||
40 | 41 | ||
41 | #define LIT64( a ) a##LL | 42 | #define LIT64( a ) a##LL |
42 | 43 | ||
@@ -67,16 +68,16 @@ typedef unsigned long long float64; | |||
67 | extern void float_raise(unsigned int flags); /* in fpu.c */ | 68 | extern void float_raise(unsigned int flags); /* in fpu.c */ |
68 | extern int float_rounding_mode(void); /* in fpu.c */ | 69 | extern int float_rounding_mode(void); /* in fpu.c */ |
69 | 70 | ||
70 | inline bits64 extractFloat64Frac(float64 a); | 71 | bits64 extractFloat64Frac(float64 a); |
71 | inline flag extractFloat64Sign(float64 a); | 72 | flag extractFloat64Sign(float64 a); |
72 | inline int16 extractFloat64Exp(float64 a); | 73 | int16 extractFloat64Exp(float64 a); |
73 | inline int16 extractFloat32Exp(float32 a); | 74 | int16 extractFloat32Exp(float32 a); |
74 | inline flag extractFloat32Sign(float32 a); | 75 | flag extractFloat32Sign(float32 a); |
75 | inline bits32 extractFloat32Frac(float32 a); | 76 | bits32 extractFloat32Frac(float32 a); |
76 | inline float64 packFloat64(flag zSign, int16 zExp, bits64 zSig); | 77 | float64 packFloat64(flag zSign, int16 zExp, bits64 zSig); |
77 | inline void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr); | 78 | void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr); |
78 | inline float32 packFloat32(flag zSign, int16 zExp, bits32 zSig); | 79 | float32 packFloat32(flag zSign, int16 zExp, bits32 zSig); |
79 | inline void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr); | 80 | void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr); |
80 | float64 float64_sub(float64 a, float64 b); | 81 | float64 float64_sub(float64 a, float64 b); |
81 | float32 float32_sub(float32 a, float32 b); | 82 | float32 float32_sub(float32 a, float32 b); |
82 | float32 float32_add(float32 a, float32 b); | 83 | float32 float32_add(float32 a, float32 b); |
@@ -86,11 +87,11 @@ float32 float32_div(float32 a, float32 b); | |||
86 | float32 float32_mul(float32 a, float32 b); | 87 | float32 float32_mul(float32 a, float32 b); |
87 | float64 float64_mul(float64 a, float64 b); | 88 | float64 float64_mul(float64 a, float64 b); |
88 | float32 float64_to_float32(float64 a); | 89 | float32 float64_to_float32(float64 a); |
89 | inline void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr, | 90 | void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr, |
90 | bits64 * z1Ptr); | 91 | bits64 * z1Ptr); |
91 | inline void sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr, | 92 | void sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr, |
92 | bits64 * z1Ptr); | 93 | bits64 * z1Ptr); |
93 | inline void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr); | 94 | void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr); |
94 | 95 | ||
95 | static int8 countLeadingZeros32(bits32 a); | 96 | static int8 countLeadingZeros32(bits32 a); |
96 | static int8 countLeadingZeros64(bits64 a); | 97 | static int8 countLeadingZeros64(bits64 a); |
@@ -110,42 +111,42 @@ static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b); | |||
110 | static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr, | 111 | static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr, |
111 | bits32 * zSigPtr); | 112 | bits32 * zSigPtr); |
112 | 113 | ||
113 | inline bits64 extractFloat64Frac(float64 a) | 114 | bits64 extractFloat64Frac(float64 a) |
114 | { | 115 | { |
115 | return a & LIT64(0x000FFFFFFFFFFFFF); | 116 | return a & LIT64(0x000FFFFFFFFFFFFF); |
116 | } | 117 | } |
117 | 118 | ||
118 | inline flag extractFloat64Sign(float64 a) | 119 | flag extractFloat64Sign(float64 a) |
119 | { | 120 | { |
120 | return a >> 63; | 121 | return a >> 63; |
121 | } | 122 | } |
122 | 123 | ||
123 | inline int16 extractFloat64Exp(float64 a) | 124 | int16 extractFloat64Exp(float64 a) |
124 | { | 125 | { |
125 | return (a >> 52) & 0x7FF; | 126 | return (a >> 52) & 0x7FF; |
126 | } | 127 | } |
127 | 128 | ||
128 | inline int16 extractFloat32Exp(float32 a) | 129 | int16 extractFloat32Exp(float32 a) |
129 | { | 130 | { |
130 | return (a >> 23) & 0xFF; | 131 | return (a >> 23) & 0xFF; |
131 | } | 132 | } |
132 | 133 | ||
133 | inline flag extractFloat32Sign(float32 a) | 134 | flag extractFloat32Sign(float32 a) |
134 | { | 135 | { |
135 | return a >> 31; | 136 | return a >> 31; |
136 | } | 137 | } |
137 | 138 | ||
138 | inline bits32 extractFloat32Frac(float32 a) | 139 | bits32 extractFloat32Frac(float32 a) |
139 | { | 140 | { |
140 | return a & 0x007FFFFF; | 141 | return a & 0x007FFFFF; |
141 | } | 142 | } |
142 | 143 | ||
143 | inline float64 packFloat64(flag zSign, int16 zExp, bits64 zSig) | 144 | float64 packFloat64(flag zSign, int16 zExp, bits64 zSig) |
144 | { | 145 | { |
145 | return (((bits64) zSign) << 63) + (((bits64) zExp) << 52) + zSig; | 146 | return (((bits64) zSign) << 63) + (((bits64) zExp) << 52) + zSig; |
146 | } | 147 | } |
147 | 148 | ||
148 | inline void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr) | 149 | void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr) |
149 | { | 150 | { |
150 | bits64 z; | 151 | bits64 z; |
151 | 152 | ||
@@ -338,12 +339,12 @@ static float64 addFloat64Sigs(float64 a, float64 b, flag zSign) | |||
338 | 339 | ||
339 | } | 340 | } |
340 | 341 | ||
341 | inline float32 packFloat32(flag zSign, int16 zExp, bits32 zSig) | 342 | float32 packFloat32(flag zSign, int16 zExp, bits32 zSig) |
342 | { | 343 | { |
343 | return (((bits32) zSign) << 31) + (((bits32) zExp) << 23) + zSig; | 344 | return (((bits32) zSign) << 31) + (((bits32) zExp) << 23) + zSig; |
344 | } | 345 | } |
345 | 346 | ||
346 | inline void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr) | 347 | void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr) |
347 | { | 348 | { |
348 | bits32 z; | 349 | bits32 z; |
349 | if (count == 0) { | 350 | if (count == 0) { |
@@ -634,7 +635,7 @@ normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr, bits64 * zSigPtr) | |||
634 | *zExpPtr = 1 - shiftCount; | 635 | *zExpPtr = 1 - shiftCount; |
635 | } | 636 | } |
636 | 637 | ||
637 | inline void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr, | 638 | void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr, |
638 | bits64 * z1Ptr) | 639 | bits64 * z1Ptr) |
639 | { | 640 | { |
640 | bits64 z1; | 641 | bits64 z1; |
@@ -644,7 +645,7 @@ inline void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr, | |||
644 | *z0Ptr = a0 + b0 + (z1 < a1); | 645 | *z0Ptr = a0 + b0 + (z1 < a1); |
645 | } | 646 | } |
646 | 647 | ||
647 | inline void | 648 | void |
648 | sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr, | 649 | sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr, |
649 | bits64 * z1Ptr) | 650 | bits64 * z1Ptr) |
650 | { | 651 | { |
@@ -656,11 +657,14 @@ static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b) | |||
656 | { | 657 | { |
657 | bits64 b0, b1; | 658 | bits64 b0, b1; |
658 | bits64 rem0, rem1, term0, term1; | 659 | bits64 rem0, rem1, term0, term1; |
659 | bits64 z; | 660 | bits64 z, tmp; |
660 | if (b <= a0) | 661 | if (b <= a0) |
661 | return LIT64(0xFFFFFFFFFFFFFFFF); | 662 | return LIT64(0xFFFFFFFFFFFFFFFF); |
662 | b0 = b >> 32; | 663 | b0 = b >> 32; |
663 | z = (b0 << 32 <= a0) ? LIT64(0xFFFFFFFF00000000) : (a0 / b0) << 32; | 664 | tmp = a0; |
665 | do_div(tmp, b0); | ||
666 | |||
667 | z = (b0 << 32 <= a0) ? LIT64(0xFFFFFFFF00000000) : tmp << 32; | ||
664 | mul64To128(b, z, &term0, &term1); | 668 | mul64To128(b, z, &term0, &term1); |
665 | sub128(a0, a1, term0, term1, &rem0, &rem1); | 669 | sub128(a0, a1, term0, term1, &rem0, &rem1); |
666 | while (((sbits64) rem0) < 0) { | 670 | while (((sbits64) rem0) < 0) { |
@@ -669,11 +673,13 @@ static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b) | |||
669 | add128(rem0, rem1, b0, b1, &rem0, &rem1); | 673 | add128(rem0, rem1, b0, b1, &rem0, &rem1); |
670 | } | 674 | } |
671 | rem0 = (rem0 << 32) | (rem1 >> 32); | 675 | rem0 = (rem0 << 32) | (rem1 >> 32); |
672 | z |= (b0 << 32 <= rem0) ? 0xFFFFFFFF : rem0 / b0; | 676 | tmp = rem0; |
677 | do_div(tmp, b0); | ||
678 | z |= (b0 << 32 <= rem0) ? 0xFFFFFFFF : tmp; | ||
673 | return z; | 679 | return z; |
674 | } | 680 | } |
675 | 681 | ||
676 | inline void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr) | 682 | void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr) |
677 | { | 683 | { |
678 | bits32 aHigh, aLow, bHigh, bLow; | 684 | bits32 aHigh, aLow, bHigh, bLow; |
679 | bits64 z0, zMiddleA, zMiddleB, z1; | 685 | bits64 z0, zMiddleA, zMiddleB, z1; |
@@ -769,7 +775,8 @@ float32 float32_div(float32 a, float32 b) | |||
769 | { | 775 | { |
770 | flag aSign, bSign, zSign; | 776 | flag aSign, bSign, zSign; |
771 | int16 aExp, bExp, zExp; | 777 | int16 aExp, bExp, zExp; |
772 | bits32 aSig, bSig, zSig; | 778 | bits32 aSig, bSig; |
779 | uint64_t zSig; | ||
773 | 780 | ||
774 | aSig = extractFloat32Frac(a); | 781 | aSig = extractFloat32Frac(a); |
775 | aExp = extractFloat32Exp(a); | 782 | aExp = extractFloat32Exp(a); |
@@ -804,11 +811,13 @@ float32 float32_div(float32 a, float32 b) | |||
804 | aSig >>= 1; | 811 | aSig >>= 1; |
805 | ++zExp; | 812 | ++zExp; |
806 | } | 813 | } |
807 | zSig = (((bits64) aSig) << 32) / bSig; | 814 | zSig = (((bits64) aSig) << 32); |
815 | do_div(zSig, bSig); | ||
816 | |||
808 | if ((zSig & 0x3F) == 0) { | 817 | if ((zSig & 0x3F) == 0) { |
809 | zSig |= (((bits64) bSig) * zSig != ((bits64) aSig) << 32); | 818 | zSig |= (((bits64) bSig) * zSig != ((bits64) aSig) << 32); |
810 | } | 819 | } |
811 | return roundAndPackFloat32(zSign, zExp, zSig); | 820 | return roundAndPackFloat32(zSign, zExp, (bits32)zSig); |
812 | 821 | ||
813 | } | 822 | } |
814 | 823 | ||