diff options
Diffstat (limited to 'arch/sh/kernel/cpu/sh4/softfloat.c')
-rw-r--r-- | arch/sh/kernel/cpu/sh4/softfloat.c | 892 |
1 files changed, 892 insertions, 0 deletions
diff --git a/arch/sh/kernel/cpu/sh4/softfloat.c b/arch/sh/kernel/cpu/sh4/softfloat.c new file mode 100644 index 00000000000..7b2d337ee41 --- /dev/null +++ b/arch/sh/kernel/cpu/sh4/softfloat.c | |||
@@ -0,0 +1,892 @@ | |||
1 | /* | ||
2 | * Floating point emulation support for subnormalised numbers on SH4 | ||
3 | * architecture This file is derived from the SoftFloat IEC/IEEE | ||
4 | * Floating-point Arithmetic Package, Release 2 the original license of | ||
5 | * which is reproduced below. | ||
6 | * | ||
7 | * ======================================================================== | ||
8 | * | ||
9 | * This C source file is part of the SoftFloat IEC/IEEE Floating-point | ||
10 | * Arithmetic Package, Release 2. | ||
11 | * | ||
12 | * Written by John R. Hauser. This work was made possible in part by the | ||
13 | * International Computer Science Institute, located at Suite 600, 1947 Center | ||
14 | * Street, Berkeley, California 94704. Funding was partially provided by the | ||
15 | * National Science Foundation under grant MIP-9311980. The original version | ||
16 | * of this code was written as part of a project to build a fixed-point vector | ||
17 | * processor in collaboration with the University of California at Berkeley, | ||
18 | * overseen by Profs. Nelson Morgan and John Wawrzynek. More information | ||
19 | * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ | ||
20 | * arithmetic/softfloat.html'. | ||
21 | * | ||
22 | * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort | ||
23 | * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT | ||
24 | * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO | ||
25 | * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY | ||
26 | * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. | ||
27 | * | ||
28 | * Derivative works are acceptable, even for commercial purposes, so long as | ||
29 | * (1) they include prominent notice that the work is derivative, and (2) they | ||
30 | * include prominent notice akin to these three paragraphs for those parts of | ||
31 | * this code that are retained. | ||
32 | * | ||
33 | * ======================================================================== | ||
34 | * | ||
35 | * SH4 modifications by Ismail Dhaoui <ismail.dhaoui@st.com> | ||
36 | * and Kamel Khelifi <kamel.khelifi@st.com> | ||
37 | */ | ||
38 | #include <linux/kernel.h> | ||
39 | #include <asm/cpu/fpu.h> | ||
40 | |||
41 | #define LIT64( a ) a##LL | ||
42 | |||
43 | typedef char flag; | ||
44 | typedef unsigned char uint8; | ||
45 | typedef signed char int8; | ||
46 | typedef int uint16; | ||
47 | typedef int int16; | ||
48 | typedef unsigned int uint32; | ||
49 | typedef signed int int32; | ||
50 | |||
51 | typedef unsigned long long int bits64; | ||
52 | typedef signed long long int sbits64; | ||
53 | |||
54 | typedef unsigned char bits8; | ||
55 | typedef signed char sbits8; | ||
56 | typedef unsigned short int bits16; | ||
57 | typedef signed short int sbits16; | ||
58 | typedef unsigned int bits32; | ||
59 | typedef signed int sbits32; | ||
60 | |||
61 | typedef unsigned long long int uint64; | ||
62 | typedef signed long long int int64; | ||
63 | |||
64 | typedef unsigned long int float32; | ||
65 | typedef unsigned long long float64; | ||
66 | |||
67 | extern void float_raise(unsigned int flags); /* in fpu.c */ | ||
68 | extern int float_rounding_mode(void); /* in fpu.c */ | ||
69 | |||
70 | inline bits64 extractFloat64Frac(float64 a); | ||
71 | inline flag extractFloat64Sign(float64 a); | ||
72 | inline int16 extractFloat64Exp(float64 a); | ||
73 | inline int16 extractFloat32Exp(float32 a); | ||
74 | inline flag extractFloat32Sign(float32 a); | ||
75 | inline bits32 extractFloat32Frac(float32 a); | ||
76 | inline float64 packFloat64(flag zSign, int16 zExp, bits64 zSig); | ||
77 | inline void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr); | ||
78 | inline float32 packFloat32(flag zSign, int16 zExp, bits32 zSig); | ||
79 | inline void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr); | ||
80 | float64 float64_sub(float64 a, float64 b); | ||
81 | float32 float32_sub(float32 a, float32 b); | ||
82 | float32 float32_add(float32 a, float32 b); | ||
83 | float64 float64_add(float64 a, float64 b); | ||
84 | float64 float64_div(float64 a, float64 b); | ||
85 | float32 float32_div(float32 a, float32 b); | ||
86 | float32 float32_mul(float32 a, float32 b); | ||
87 | float64 float64_mul(float64 a, float64 b); | ||
88 | inline void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr, | ||
89 | bits64 * z1Ptr); | ||
90 | inline void sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr, | ||
91 | bits64 * z1Ptr); | ||
92 | inline void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr); | ||
93 | |||
94 | static int8 countLeadingZeros32(bits32 a); | ||
95 | static int8 countLeadingZeros64(bits64 a); | ||
96 | static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp, | ||
97 | bits64 zSig); | ||
98 | static float64 subFloat64Sigs(float64 a, float64 b, flag zSign); | ||
99 | static float64 addFloat64Sigs(float64 a, float64 b, flag zSign); | ||
100 | static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig); | ||
101 | static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp, | ||
102 | bits32 zSig); | ||
103 | static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig); | ||
104 | static float32 subFloat32Sigs(float32 a, float32 b, flag zSign); | ||
105 | static float32 addFloat32Sigs(float32 a, float32 b, flag zSign); | ||
106 | static void normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr, | ||
107 | bits64 * zSigPtr); | ||
108 | static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b); | ||
109 | static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr, | ||
110 | bits32 * zSigPtr); | ||
111 | |||
112 | inline bits64 extractFloat64Frac(float64 a) | ||
113 | { | ||
114 | return a & LIT64(0x000FFFFFFFFFFFFF); | ||
115 | } | ||
116 | |||
117 | inline flag extractFloat64Sign(float64 a) | ||
118 | { | ||
119 | return a >> 63; | ||
120 | } | ||
121 | |||
122 | inline int16 extractFloat64Exp(float64 a) | ||
123 | { | ||
124 | return (a >> 52) & 0x7FF; | ||
125 | } | ||
126 | |||
127 | inline int16 extractFloat32Exp(float32 a) | ||
128 | { | ||
129 | return (a >> 23) & 0xFF; | ||
130 | } | ||
131 | |||
132 | inline flag extractFloat32Sign(float32 a) | ||
133 | { | ||
134 | return a >> 31; | ||
135 | } | ||
136 | |||
137 | inline bits32 extractFloat32Frac(float32 a) | ||
138 | { | ||
139 | return a & 0x007FFFFF; | ||
140 | } | ||
141 | |||
142 | inline float64 packFloat64(flag zSign, int16 zExp, bits64 zSig) | ||
143 | { | ||
144 | return (((bits64) zSign) << 63) + (((bits64) zExp) << 52) + zSig; | ||
145 | } | ||
146 | |||
147 | inline void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr) | ||
148 | { | ||
149 | bits64 z; | ||
150 | |||
151 | if (count == 0) { | ||
152 | z = a; | ||
153 | } else if (count < 64) { | ||
154 | z = (a >> count) | ((a << ((-count) & 63)) != 0); | ||
155 | } else { | ||
156 | z = (a != 0); | ||
157 | } | ||
158 | *zPtr = z; | ||
159 | } | ||
160 | |||
161 | static int8 countLeadingZeros32(bits32 a) | ||
162 | { | ||
163 | static const int8 countLeadingZerosHigh[] = { | ||
164 | 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, | ||
165 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | ||
166 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | ||
167 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | ||
168 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
169 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
170 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
171 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
172 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
173 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
174 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
175 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
176 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
177 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
178 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
179 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | ||
180 | }; | ||
181 | int8 shiftCount; | ||
182 | |||
183 | shiftCount = 0; | ||
184 | if (a < 0x10000) { | ||
185 | shiftCount += 16; | ||
186 | a <<= 16; | ||
187 | } | ||
188 | if (a < 0x1000000) { | ||
189 | shiftCount += 8; | ||
190 | a <<= 8; | ||
191 | } | ||
192 | shiftCount += countLeadingZerosHigh[a >> 24]; | ||
193 | return shiftCount; | ||
194 | |||
195 | } | ||
196 | |||
197 | static int8 countLeadingZeros64(bits64 a) | ||
198 | { | ||
199 | int8 shiftCount; | ||
200 | |||
201 | shiftCount = 0; | ||
202 | if (a < ((bits64) 1) << 32) { | ||
203 | shiftCount += 32; | ||
204 | } else { | ||
205 | a >>= 32; | ||
206 | } | ||
207 | shiftCount += countLeadingZeros32(a); | ||
208 | return shiftCount; | ||
209 | |||
210 | } | ||
211 | |||
212 | static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig) | ||
213 | { | ||
214 | int8 shiftCount; | ||
215 | |||
216 | shiftCount = countLeadingZeros64(zSig) - 1; | ||
217 | return roundAndPackFloat64(zSign, zExp - shiftCount, | ||
218 | zSig << shiftCount); | ||
219 | |||
220 | } | ||
221 | |||
222 | static float64 subFloat64Sigs(float64 a, float64 b, flag zSign) | ||
223 | { | ||
224 | int16 aExp, bExp, zExp; | ||
225 | bits64 aSig, bSig, zSig; | ||
226 | int16 expDiff; | ||
227 | |||
228 | aSig = extractFloat64Frac(a); | ||
229 | aExp = extractFloat64Exp(a); | ||
230 | bSig = extractFloat64Frac(b); | ||
231 | bExp = extractFloat64Exp(b); | ||
232 | expDiff = aExp - bExp; | ||
233 | aSig <<= 10; | ||
234 | bSig <<= 10; | ||
235 | if (0 < expDiff) | ||
236 | goto aExpBigger; | ||
237 | if (expDiff < 0) | ||
238 | goto bExpBigger; | ||
239 | if (aExp == 0) { | ||
240 | aExp = 1; | ||
241 | bExp = 1; | ||
242 | } | ||
243 | if (bSig < aSig) | ||
244 | goto aBigger; | ||
245 | if (aSig < bSig) | ||
246 | goto bBigger; | ||
247 | return packFloat64(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0); | ||
248 | bExpBigger: | ||
249 | if (bExp == 0x7FF) { | ||
250 | return packFloat64(zSign ^ 1, 0x7FF, 0); | ||
251 | } | ||
252 | if (aExp == 0) { | ||
253 | ++expDiff; | ||
254 | } else { | ||
255 | aSig |= LIT64(0x4000000000000000); | ||
256 | } | ||
257 | shift64RightJamming(aSig, -expDiff, &aSig); | ||
258 | bSig |= LIT64(0x4000000000000000); | ||
259 | bBigger: | ||
260 | zSig = bSig - aSig; | ||
261 | zExp = bExp; | ||
262 | zSign ^= 1; | ||
263 | goto normalizeRoundAndPack; | ||
264 | aExpBigger: | ||
265 | if (aExp == 0x7FF) { | ||
266 | return a; | ||
267 | } | ||
268 | if (bExp == 0) { | ||
269 | --expDiff; | ||
270 | } else { | ||
271 | bSig |= LIT64(0x4000000000000000); | ||
272 | } | ||
273 | shift64RightJamming(bSig, expDiff, &bSig); | ||
274 | aSig |= LIT64(0x4000000000000000); | ||
275 | aBigger: | ||
276 | zSig = aSig - bSig; | ||
277 | zExp = aExp; | ||
278 | normalizeRoundAndPack: | ||
279 | --zExp; | ||
280 | return normalizeRoundAndPackFloat64(zSign, zExp, zSig); | ||
281 | |||
282 | } | ||
283 | static float64 addFloat64Sigs(float64 a, float64 b, flag zSign) | ||
284 | { | ||
285 | int16 aExp, bExp, zExp; | ||
286 | bits64 aSig, bSig, zSig; | ||
287 | int16 expDiff; | ||
288 | |||
289 | aSig = extractFloat64Frac(a); | ||
290 | aExp = extractFloat64Exp(a); | ||
291 | bSig = extractFloat64Frac(b); | ||
292 | bExp = extractFloat64Exp(b); | ||
293 | expDiff = aExp - bExp; | ||
294 | aSig <<= 9; | ||
295 | bSig <<= 9; | ||
296 | if (0 < expDiff) { | ||
297 | if (aExp == 0x7FF) { | ||
298 | return a; | ||
299 | } | ||
300 | if (bExp == 0) { | ||
301 | --expDiff; | ||
302 | } else { | ||
303 | bSig |= LIT64(0x2000000000000000); | ||
304 | } | ||
305 | shift64RightJamming(bSig, expDiff, &bSig); | ||
306 | zExp = aExp; | ||
307 | } else if (expDiff < 0) { | ||
308 | if (bExp == 0x7FF) { | ||
309 | return packFloat64(zSign, 0x7FF, 0); | ||
310 | } | ||
311 | if (aExp == 0) { | ||
312 | ++expDiff; | ||
313 | } else { | ||
314 | aSig |= LIT64(0x2000000000000000); | ||
315 | } | ||
316 | shift64RightJamming(aSig, -expDiff, &aSig); | ||
317 | zExp = bExp; | ||
318 | } else { | ||
319 | if (aExp == 0x7FF) { | ||
320 | return a; | ||
321 | } | ||
322 | if (aExp == 0) | ||
323 | return packFloat64(zSign, 0, (aSig + bSig) >> 9); | ||
324 | zSig = LIT64(0x4000000000000000) + aSig + bSig; | ||
325 | zExp = aExp; | ||
326 | goto roundAndPack; | ||
327 | } | ||
328 | aSig |= LIT64(0x2000000000000000); | ||
329 | zSig = (aSig + bSig) << 1; | ||
330 | --zExp; | ||
331 | if ((sbits64) zSig < 0) { | ||
332 | zSig = aSig + bSig; | ||
333 | ++zExp; | ||
334 | } | ||
335 | roundAndPack: | ||
336 | return roundAndPackFloat64(zSign, zExp, zSig); | ||
337 | |||
338 | } | ||
339 | |||
340 | inline float32 packFloat32(flag zSign, int16 zExp, bits32 zSig) | ||
341 | { | ||
342 | return (((bits32) zSign) << 31) + (((bits32) zExp) << 23) + zSig; | ||
343 | } | ||
344 | |||
345 | inline void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr) | ||
346 | { | ||
347 | bits32 z; | ||
348 | if (count == 0) { | ||
349 | z = a; | ||
350 | } else if (count < 32) { | ||
351 | z = (a >> count) | ((a << ((-count) & 31)) != 0); | ||
352 | } else { | ||
353 | z = (a != 0); | ||
354 | } | ||
355 | *zPtr = z; | ||
356 | } | ||
357 | |||
358 | static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig) | ||
359 | { | ||
360 | flag roundNearestEven; | ||
361 | int8 roundIncrement, roundBits; | ||
362 | flag isTiny; | ||
363 | |||
364 | /* SH4 has only 2 rounding modes - round to nearest and round to zero */ | ||
365 | roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST); | ||
366 | roundIncrement = 0x40; | ||
367 | if (!roundNearestEven) { | ||
368 | roundIncrement = 0; | ||
369 | } | ||
370 | roundBits = zSig & 0x7F; | ||
371 | if (0xFD <= (bits16) zExp) { | ||
372 | if ((0xFD < zExp) | ||
373 | || ((zExp == 0xFD) | ||
374 | && ((sbits32) (zSig + roundIncrement) < 0)) | ||
375 | ) { | ||
376 | float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT); | ||
377 | return packFloat32(zSign, 0xFF, | ||
378 | 0) - (roundIncrement == 0); | ||
379 | } | ||
380 | if (zExp < 0) { | ||
381 | isTiny = (zExp < -1) | ||
382 | || (zSig + roundIncrement < 0x80000000); | ||
383 | shift32RightJamming(zSig, -zExp, &zSig); | ||
384 | zExp = 0; | ||
385 | roundBits = zSig & 0x7F; | ||
386 | if (isTiny && roundBits) | ||
387 | float_raise(FPSCR_CAUSE_UNDERFLOW); | ||
388 | } | ||
389 | } | ||
390 | if (roundBits) | ||
391 | float_raise(FPSCR_CAUSE_INEXACT); | ||
392 | zSig = (zSig + roundIncrement) >> 7; | ||
393 | zSig &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven); | ||
394 | if (zSig == 0) | ||
395 | zExp = 0; | ||
396 | return packFloat32(zSign, zExp, zSig); | ||
397 | |||
398 | } | ||
399 | |||
400 | static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig) | ||
401 | { | ||
402 | int8 shiftCount; | ||
403 | |||
404 | shiftCount = countLeadingZeros32(zSig) - 1; | ||
405 | return roundAndPackFloat32(zSign, zExp - shiftCount, | ||
406 | zSig << shiftCount); | ||
407 | } | ||
408 | |||
409 | static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig) | ||
410 | { | ||
411 | flag roundNearestEven; | ||
412 | int16 roundIncrement, roundBits; | ||
413 | flag isTiny; | ||
414 | |||
415 | /* SH4 has only 2 rounding modes - round to nearest and round to zero */ | ||
416 | roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST); | ||
417 | roundIncrement = 0x200; | ||
418 | if (!roundNearestEven) { | ||
419 | roundIncrement = 0; | ||
420 | } | ||
421 | roundBits = zSig & 0x3FF; | ||
422 | if (0x7FD <= (bits16) zExp) { | ||
423 | if ((0x7FD < zExp) | ||
424 | || ((zExp == 0x7FD) | ||
425 | && ((sbits64) (zSig + roundIncrement) < 0)) | ||
426 | ) { | ||
427 | float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT); | ||
428 | return packFloat64(zSign, 0x7FF, | ||
429 | 0) - (roundIncrement == 0); | ||
430 | } | ||
431 | if (zExp < 0) { | ||
432 | isTiny = (zExp < -1) | ||
433 | || (zSig + roundIncrement < | ||
434 | LIT64(0x8000000000000000)); | ||
435 | shift64RightJamming(zSig, -zExp, &zSig); | ||
436 | zExp = 0; | ||
437 | roundBits = zSig & 0x3FF; | ||
438 | if (isTiny && roundBits) | ||
439 | float_raise(FPSCR_CAUSE_UNDERFLOW); | ||
440 | } | ||
441 | } | ||
442 | if (roundBits) | ||
443 | float_raise(FPSCR_CAUSE_INEXACT); | ||
444 | zSig = (zSig + roundIncrement) >> 10; | ||
445 | zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven); | ||
446 | if (zSig == 0) | ||
447 | zExp = 0; | ||
448 | return packFloat64(zSign, zExp, zSig); | ||
449 | |||
450 | } | ||
451 | |||
452 | static float32 subFloat32Sigs(float32 a, float32 b, flag zSign) | ||
453 | { | ||
454 | int16 aExp, bExp, zExp; | ||
455 | bits32 aSig, bSig, zSig; | ||
456 | int16 expDiff; | ||
457 | |||
458 | aSig = extractFloat32Frac(a); | ||
459 | aExp = extractFloat32Exp(a); | ||
460 | bSig = extractFloat32Frac(b); | ||
461 | bExp = extractFloat32Exp(b); | ||
462 | expDiff = aExp - bExp; | ||
463 | aSig <<= 7; | ||
464 | bSig <<= 7; | ||
465 | if (0 < expDiff) | ||
466 | goto aExpBigger; | ||
467 | if (expDiff < 0) | ||
468 | goto bExpBigger; | ||
469 | if (aExp == 0) { | ||
470 | aExp = 1; | ||
471 | bExp = 1; | ||
472 | } | ||
473 | if (bSig < aSig) | ||
474 | goto aBigger; | ||
475 | if (aSig < bSig) | ||
476 | goto bBigger; | ||
477 | return packFloat32(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0); | ||
478 | bExpBigger: | ||
479 | if (bExp == 0xFF) { | ||
480 | return packFloat32(zSign ^ 1, 0xFF, 0); | ||
481 | } | ||
482 | if (aExp == 0) { | ||
483 | ++expDiff; | ||
484 | } else { | ||
485 | aSig |= 0x40000000; | ||
486 | } | ||
487 | shift32RightJamming(aSig, -expDiff, &aSig); | ||
488 | bSig |= 0x40000000; | ||
489 | bBigger: | ||
490 | zSig = bSig - aSig; | ||
491 | zExp = bExp; | ||
492 | zSign ^= 1; | ||
493 | goto normalizeRoundAndPack; | ||
494 | aExpBigger: | ||
495 | if (aExp == 0xFF) { | ||
496 | return a; | ||
497 | } | ||
498 | if (bExp == 0) { | ||
499 | --expDiff; | ||
500 | } else { | ||
501 | bSig |= 0x40000000; | ||
502 | } | ||
503 | shift32RightJamming(bSig, expDiff, &bSig); | ||
504 | aSig |= 0x40000000; | ||
505 | aBigger: | ||
506 | zSig = aSig - bSig; | ||
507 | zExp = aExp; | ||
508 | normalizeRoundAndPack: | ||
509 | --zExp; | ||
510 | return normalizeRoundAndPackFloat32(zSign, zExp, zSig); | ||
511 | |||
512 | } | ||
513 | |||
514 | static float32 addFloat32Sigs(float32 a, float32 b, flag zSign) | ||
515 | { | ||
516 | int16 aExp, bExp, zExp; | ||
517 | bits32 aSig, bSig, zSig; | ||
518 | int16 expDiff; | ||
519 | |||
520 | aSig = extractFloat32Frac(a); | ||
521 | aExp = extractFloat32Exp(a); | ||
522 | bSig = extractFloat32Frac(b); | ||
523 | bExp = extractFloat32Exp(b); | ||
524 | expDiff = aExp - bExp; | ||
525 | aSig <<= 6; | ||
526 | bSig <<= 6; | ||
527 | if (0 < expDiff) { | ||
528 | if (aExp == 0xFF) { | ||
529 | return a; | ||
530 | } | ||
531 | if (bExp == 0) { | ||
532 | --expDiff; | ||
533 | } else { | ||
534 | bSig |= 0x20000000; | ||
535 | } | ||
536 | shift32RightJamming(bSig, expDiff, &bSig); | ||
537 | zExp = aExp; | ||
538 | } else if (expDiff < 0) { | ||
539 | if (bExp == 0xFF) { | ||
540 | return packFloat32(zSign, 0xFF, 0); | ||
541 | } | ||
542 | if (aExp == 0) { | ||
543 | ++expDiff; | ||
544 | } else { | ||
545 | aSig |= 0x20000000; | ||
546 | } | ||
547 | shift32RightJamming(aSig, -expDiff, &aSig); | ||
548 | zExp = bExp; | ||
549 | } else { | ||
550 | if (aExp == 0xFF) { | ||
551 | return a; | ||
552 | } | ||
553 | if (aExp == 0) | ||
554 | return packFloat32(zSign, 0, (aSig + bSig) >> 6); | ||
555 | zSig = 0x40000000 + aSig + bSig; | ||
556 | zExp = aExp; | ||
557 | goto roundAndPack; | ||
558 | } | ||
559 | aSig |= 0x20000000; | ||
560 | zSig = (aSig + bSig) << 1; | ||
561 | --zExp; | ||
562 | if ((sbits32) zSig < 0) { | ||
563 | zSig = aSig + bSig; | ||
564 | ++zExp; | ||
565 | } | ||
566 | roundAndPack: | ||
567 | return roundAndPackFloat32(zSign, zExp, zSig); | ||
568 | |||
569 | } | ||
570 | |||
571 | float64 float64_sub(float64 a, float64 b) | ||
572 | { | ||
573 | flag aSign, bSign; | ||
574 | |||
575 | aSign = extractFloat64Sign(a); | ||
576 | bSign = extractFloat64Sign(b); | ||
577 | if (aSign == bSign) { | ||
578 | return subFloat64Sigs(a, b, aSign); | ||
579 | } else { | ||
580 | return addFloat64Sigs(a, b, aSign); | ||
581 | } | ||
582 | |||
583 | } | ||
584 | |||
585 | float32 float32_sub(float32 a, float32 b) | ||
586 | { | ||
587 | flag aSign, bSign; | ||
588 | |||
589 | aSign = extractFloat32Sign(a); | ||
590 | bSign = extractFloat32Sign(b); | ||
591 | if (aSign == bSign) { | ||
592 | return subFloat32Sigs(a, b, aSign); | ||
593 | } else { | ||
594 | return addFloat32Sigs(a, b, aSign); | ||
595 | } | ||
596 | |||
597 | } | ||
598 | |||
599 | float32 float32_add(float32 a, float32 b) | ||
600 | { | ||
601 | flag aSign, bSign; | ||
602 | |||
603 | aSign = extractFloat32Sign(a); | ||
604 | bSign = extractFloat32Sign(b); | ||
605 | if (aSign == bSign) { | ||
606 | return addFloat32Sigs(a, b, aSign); | ||
607 | } else { | ||
608 | return subFloat32Sigs(a, b, aSign); | ||
609 | } | ||
610 | |||
611 | } | ||
612 | |||
613 | float64 float64_add(float64 a, float64 b) | ||
614 | { | ||
615 | flag aSign, bSign; | ||
616 | |||
617 | aSign = extractFloat64Sign(a); | ||
618 | bSign = extractFloat64Sign(b); | ||
619 | if (aSign == bSign) { | ||
620 | return addFloat64Sigs(a, b, aSign); | ||
621 | } else { | ||
622 | return subFloat64Sigs(a, b, aSign); | ||
623 | } | ||
624 | } | ||
625 | |||
626 | static void | ||
627 | normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr, bits64 * zSigPtr) | ||
628 | { | ||
629 | int8 shiftCount; | ||
630 | |||
631 | shiftCount = countLeadingZeros64(aSig) - 11; | ||
632 | *zSigPtr = aSig << shiftCount; | ||
633 | *zExpPtr = 1 - shiftCount; | ||
634 | } | ||
635 | |||
636 | inline void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr, | ||
637 | bits64 * z1Ptr) | ||
638 | { | ||
639 | bits64 z1; | ||
640 | |||
641 | z1 = a1 + b1; | ||
642 | *z1Ptr = z1; | ||
643 | *z0Ptr = a0 + b0 + (z1 < a1); | ||
644 | } | ||
645 | |||
646 | inline void | ||
647 | sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr, | ||
648 | bits64 * z1Ptr) | ||
649 | { | ||
650 | *z1Ptr = a1 - b1; | ||
651 | *z0Ptr = a0 - b0 - (a1 < b1); | ||
652 | } | ||
653 | |||
654 | static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b) | ||
655 | { | ||
656 | bits64 b0, b1; | ||
657 | bits64 rem0, rem1, term0, term1; | ||
658 | bits64 z; | ||
659 | if (b <= a0) | ||
660 | return LIT64(0xFFFFFFFFFFFFFFFF); | ||
661 | b0 = b >> 32; | ||
662 | z = (b0 << 32 <= a0) ? LIT64(0xFFFFFFFF00000000) : (a0 / b0) << 32; | ||
663 | mul64To128(b, z, &term0, &term1); | ||
664 | sub128(a0, a1, term0, term1, &rem0, &rem1); | ||
665 | while (((sbits64) rem0) < 0) { | ||
666 | z -= LIT64(0x100000000); | ||
667 | b1 = b << 32; | ||
668 | add128(rem0, rem1, b0, b1, &rem0, &rem1); | ||
669 | } | ||
670 | rem0 = (rem0 << 32) | (rem1 >> 32); | ||
671 | z |= (b0 << 32 <= rem0) ? 0xFFFFFFFF : rem0 / b0; | ||
672 | return z; | ||
673 | } | ||
674 | |||
675 | inline void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr) | ||
676 | { | ||
677 | bits32 aHigh, aLow, bHigh, bLow; | ||
678 | bits64 z0, zMiddleA, zMiddleB, z1; | ||
679 | |||
680 | aLow = a; | ||
681 | aHigh = a >> 32; | ||
682 | bLow = b; | ||
683 | bHigh = b >> 32; | ||
684 | z1 = ((bits64) aLow) * bLow; | ||
685 | zMiddleA = ((bits64) aLow) * bHigh; | ||
686 | zMiddleB = ((bits64) aHigh) * bLow; | ||
687 | z0 = ((bits64) aHigh) * bHigh; | ||
688 | zMiddleA += zMiddleB; | ||
689 | z0 += (((bits64) (zMiddleA < zMiddleB)) << 32) + (zMiddleA >> 32); | ||
690 | zMiddleA <<= 32; | ||
691 | z1 += zMiddleA; | ||
692 | z0 += (z1 < zMiddleA); | ||
693 | *z1Ptr = z1; | ||
694 | *z0Ptr = z0; | ||
695 | |||
696 | } | ||
697 | |||
698 | static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr, | ||
699 | bits32 * zSigPtr) | ||
700 | { | ||
701 | int8 shiftCount; | ||
702 | |||
703 | shiftCount = countLeadingZeros32(aSig) - 8; | ||
704 | *zSigPtr = aSig << shiftCount; | ||
705 | *zExpPtr = 1 - shiftCount; | ||
706 | |||
707 | } | ||
708 | |||
709 | float64 float64_div(float64 a, float64 b) | ||
710 | { | ||
711 | flag aSign, bSign, zSign; | ||
712 | int16 aExp, bExp, zExp; | ||
713 | bits64 aSig, bSig, zSig; | ||
714 | bits64 rem0, rem1; | ||
715 | bits64 term0, term1; | ||
716 | |||
717 | aSig = extractFloat64Frac(a); | ||
718 | aExp = extractFloat64Exp(a); | ||
719 | aSign = extractFloat64Sign(a); | ||
720 | bSig = extractFloat64Frac(b); | ||
721 | bExp = extractFloat64Exp(b); | ||
722 | bSign = extractFloat64Sign(b); | ||
723 | zSign = aSign ^ bSign; | ||
724 | if (aExp == 0x7FF) { | ||
725 | if (bExp == 0x7FF) { | ||
726 | } | ||
727 | return packFloat64(zSign, 0x7FF, 0); | ||
728 | } | ||
729 | if (bExp == 0x7FF) { | ||
730 | return packFloat64(zSign, 0, 0); | ||
731 | } | ||
732 | if (bExp == 0) { | ||
733 | if (bSig == 0) { | ||
734 | if ((aExp | aSig) == 0) { | ||
735 | float_raise(FPSCR_CAUSE_INVALID); | ||
736 | } | ||
737 | return packFloat64(zSign, 0x7FF, 0); | ||
738 | } | ||
739 | normalizeFloat64Subnormal(bSig, &bExp, &bSig); | ||
740 | } | ||
741 | if (aExp == 0) { | ||
742 | if (aSig == 0) | ||
743 | return packFloat64(zSign, 0, 0); | ||
744 | normalizeFloat64Subnormal(aSig, &aExp, &aSig); | ||
745 | } | ||
746 | zExp = aExp - bExp + 0x3FD; | ||
747 | aSig = (aSig | LIT64(0x0010000000000000)) << 10; | ||
748 | bSig = (bSig | LIT64(0x0010000000000000)) << 11; | ||
749 | if (bSig <= (aSig + aSig)) { | ||
750 | aSig >>= 1; | ||
751 | ++zExp; | ||
752 | } | ||
753 | zSig = estimateDiv128To64(aSig, 0, bSig); | ||
754 | if ((zSig & 0x1FF) <= 2) { | ||
755 | mul64To128(bSig, zSig, &term0, &term1); | ||
756 | sub128(aSig, 0, term0, term1, &rem0, &rem1); | ||
757 | while ((sbits64) rem0 < 0) { | ||
758 | --zSig; | ||
759 | add128(rem0, rem1, 0, bSig, &rem0, &rem1); | ||
760 | } | ||
761 | zSig |= (rem1 != 0); | ||
762 | } | ||
763 | return roundAndPackFloat64(zSign, zExp, zSig); | ||
764 | |||
765 | } | ||
766 | |||
767 | float32 float32_div(float32 a, float32 b) | ||
768 | { | ||
769 | flag aSign, bSign, zSign; | ||
770 | int16 aExp, bExp, zExp; | ||
771 | bits32 aSig, bSig, zSig; | ||
772 | |||
773 | aSig = extractFloat32Frac(a); | ||
774 | aExp = extractFloat32Exp(a); | ||
775 | aSign = extractFloat32Sign(a); | ||
776 | bSig = extractFloat32Frac(b); | ||
777 | bExp = extractFloat32Exp(b); | ||
778 | bSign = extractFloat32Sign(b); | ||
779 | zSign = aSign ^ bSign; | ||
780 | if (aExp == 0xFF) { | ||
781 | if (bExp == 0xFF) { | ||
782 | } | ||
783 | return packFloat32(zSign, 0xFF, 0); | ||
784 | } | ||
785 | if (bExp == 0xFF) { | ||
786 | return packFloat32(zSign, 0, 0); | ||
787 | } | ||
788 | if (bExp == 0) { | ||
789 | if (bSig == 0) { | ||
790 | return packFloat32(zSign, 0xFF, 0); | ||
791 | } | ||
792 | normalizeFloat32Subnormal(bSig, &bExp, &bSig); | ||
793 | } | ||
794 | if (aExp == 0) { | ||
795 | if (aSig == 0) | ||
796 | return packFloat32(zSign, 0, 0); | ||
797 | normalizeFloat32Subnormal(aSig, &aExp, &aSig); | ||
798 | } | ||
799 | zExp = aExp - bExp + 0x7D; | ||
800 | aSig = (aSig | 0x00800000) << 7; | ||
801 | bSig = (bSig | 0x00800000) << 8; | ||
802 | if (bSig <= (aSig + aSig)) { | ||
803 | aSig >>= 1; | ||
804 | ++zExp; | ||
805 | } | ||
806 | zSig = (((bits64) aSig) << 32) / bSig; | ||
807 | if ((zSig & 0x3F) == 0) { | ||
808 | zSig |= (((bits64) bSig) * zSig != ((bits64) aSig) << 32); | ||
809 | } | ||
810 | return roundAndPackFloat32(zSign, zExp, zSig); | ||
811 | |||
812 | } | ||
813 | |||
814 | float32 float32_mul(float32 a, float32 b) | ||
815 | { | ||
816 | char aSign, bSign, zSign; | ||
817 | int aExp, bExp, zExp; | ||
818 | unsigned int aSig, bSig; | ||
819 | unsigned long long zSig64; | ||
820 | unsigned int zSig; | ||
821 | |||
822 | aSig = extractFloat32Frac(a); | ||
823 | aExp = extractFloat32Exp(a); | ||
824 | aSign = extractFloat32Sign(a); | ||
825 | bSig = extractFloat32Frac(b); | ||
826 | bExp = extractFloat32Exp(b); | ||
827 | bSign = extractFloat32Sign(b); | ||
828 | zSign = aSign ^ bSign; | ||
829 | if (aExp == 0) { | ||
830 | if (aSig == 0) | ||
831 | return packFloat32(zSign, 0, 0); | ||
832 | normalizeFloat32Subnormal(aSig, &aExp, &aSig); | ||
833 | } | ||
834 | if (bExp == 0) { | ||
835 | if (bSig == 0) | ||
836 | return packFloat32(zSign, 0, 0); | ||
837 | normalizeFloat32Subnormal(bSig, &bExp, &bSig); | ||
838 | } | ||
839 | if ((bExp == 0xff && bSig == 0) || (aExp == 0xff && aSig == 0)) | ||
840 | return roundAndPackFloat32(zSign, 0xff, 0); | ||
841 | |||
842 | zExp = aExp + bExp - 0x7F; | ||
843 | aSig = (aSig | 0x00800000) << 7; | ||
844 | bSig = (bSig | 0x00800000) << 8; | ||
845 | shift64RightJamming(((unsigned long long)aSig) * bSig, 32, &zSig64); | ||
846 | zSig = zSig64; | ||
847 | if (0 <= (signed int)(zSig << 1)) { | ||
848 | zSig <<= 1; | ||
849 | --zExp; | ||
850 | } | ||
851 | return roundAndPackFloat32(zSign, zExp, zSig); | ||
852 | |||
853 | } | ||
854 | |||
855 | float64 float64_mul(float64 a, float64 b) | ||
856 | { | ||
857 | char aSign, bSign, zSign; | ||
858 | int aExp, bExp, zExp; | ||
859 | unsigned long long int aSig, bSig, zSig0, zSig1; | ||
860 | |||
861 | aSig = extractFloat64Frac(a); | ||
862 | aExp = extractFloat64Exp(a); | ||
863 | aSign = extractFloat64Sign(a); | ||
864 | bSig = extractFloat64Frac(b); | ||
865 | bExp = extractFloat64Exp(b); | ||
866 | bSign = extractFloat64Sign(b); | ||
867 | zSign = aSign ^ bSign; | ||
868 | |||
869 | if (aExp == 0) { | ||
870 | if (aSig == 0) | ||
871 | return packFloat64(zSign, 0, 0); | ||
872 | normalizeFloat64Subnormal(aSig, &aExp, &aSig); | ||
873 | } | ||
874 | if (bExp == 0) { | ||
875 | if (bSig == 0) | ||
876 | return packFloat64(zSign, 0, 0); | ||
877 | normalizeFloat64Subnormal(bSig, &bExp, &bSig); | ||
878 | } | ||
879 | if ((aExp == 0x7ff && aSig == 0) || (bExp == 0x7ff && bSig == 0)) | ||
880 | return roundAndPackFloat64(zSign, 0x7ff, 0); | ||
881 | |||
882 | zExp = aExp + bExp - 0x3FF; | ||
883 | aSig = (aSig | 0x0010000000000000LL) << 10; | ||
884 | bSig = (bSig | 0x0010000000000000LL) << 11; | ||
885 | mul64To128(aSig, bSig, &zSig0, &zSig1); | ||
886 | zSig0 |= (zSig1 != 0); | ||
887 | if (0 <= (signed long long int)(zSig0 << 1)) { | ||
888 | zSig0 <<= 1; | ||
889 | --zExp; | ||
890 | } | ||
891 | return roundAndPackFloat64(zSign, zExp, zSig0); | ||
892 | } | ||