aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sh/kernel/cpu/sh4/softfloat.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/sh/kernel/cpu/sh4/softfloat.c')
-rw-r--r--arch/sh/kernel/cpu/sh4/softfloat.c892
1 files changed, 892 insertions, 0 deletions
diff --git a/arch/sh/kernel/cpu/sh4/softfloat.c b/arch/sh/kernel/cpu/sh4/softfloat.c
new file mode 100644
index 00000000000..7b2d337ee41
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh4/softfloat.c
@@ -0,0 +1,892 @@
1/*
2 * Floating point emulation support for subnormalised numbers on SH4
3 * architecture This file is derived from the SoftFloat IEC/IEEE
4 * Floating-point Arithmetic Package, Release 2 the original license of
5 * which is reproduced below.
6 *
7 * ========================================================================
8 *
9 * This C source file is part of the SoftFloat IEC/IEEE Floating-point
10 * Arithmetic Package, Release 2.
11 *
12 * Written by John R. Hauser. This work was made possible in part by the
13 * International Computer Science Institute, located at Suite 600, 1947 Center
14 * Street, Berkeley, California 94704. Funding was partially provided by the
15 * National Science Foundation under grant MIP-9311980. The original version
16 * of this code was written as part of a project to build a fixed-point vector
17 * processor in collaboration with the University of California at Berkeley,
18 * overseen by Profs. Nelson Morgan and John Wawrzynek. More information
19 * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
20 * arithmetic/softfloat.html'.
21 *
22 * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
23 * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
24 * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
25 * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
26 * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
27 *
28 * Derivative works are acceptable, even for commercial purposes, so long as
29 * (1) they include prominent notice that the work is derivative, and (2) they
30 * include prominent notice akin to these three paragraphs for those parts of
31 * this code that are retained.
32 *
33 * ========================================================================
34 *
35 * SH4 modifications by Ismail Dhaoui <ismail.dhaoui@st.com>
36 * and Kamel Khelifi <kamel.khelifi@st.com>
37 */
38#include <linux/kernel.h>
39#include <asm/cpu/fpu.h>
40
41#define LIT64( a ) a##LL
42
43typedef char flag;
44typedef unsigned char uint8;
45typedef signed char int8;
46typedef int uint16;
47typedef int int16;
48typedef unsigned int uint32;
49typedef signed int int32;
50
51typedef unsigned long long int bits64;
52typedef signed long long int sbits64;
53
54typedef unsigned char bits8;
55typedef signed char sbits8;
56typedef unsigned short int bits16;
57typedef signed short int sbits16;
58typedef unsigned int bits32;
59typedef signed int sbits32;
60
61typedef unsigned long long int uint64;
62typedef signed long long int int64;
63
64typedef unsigned long int float32;
65typedef unsigned long long float64;
66
67extern void float_raise(unsigned int flags); /* in fpu.c */
68extern int float_rounding_mode(void); /* in fpu.c */
69
70inline bits64 extractFloat64Frac(float64 a);
71inline flag extractFloat64Sign(float64 a);
72inline int16 extractFloat64Exp(float64 a);
73inline int16 extractFloat32Exp(float32 a);
74inline flag extractFloat32Sign(float32 a);
75inline bits32 extractFloat32Frac(float32 a);
76inline float64 packFloat64(flag zSign, int16 zExp, bits64 zSig);
77inline void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr);
78inline float32 packFloat32(flag zSign, int16 zExp, bits32 zSig);
79inline void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr);
80float64 float64_sub(float64 a, float64 b);
81float32 float32_sub(float32 a, float32 b);
82float32 float32_add(float32 a, float32 b);
83float64 float64_add(float64 a, float64 b);
84float64 float64_div(float64 a, float64 b);
85float32 float32_div(float32 a, float32 b);
86float32 float32_mul(float32 a, float32 b);
87float64 float64_mul(float64 a, float64 b);
88inline void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
89 bits64 * z1Ptr);
90inline void sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
91 bits64 * z1Ptr);
92inline void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr);
93
94static int8 countLeadingZeros32(bits32 a);
95static int8 countLeadingZeros64(bits64 a);
96static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp,
97 bits64 zSig);
98static float64 subFloat64Sigs(float64 a, float64 b, flag zSign);
99static float64 addFloat64Sigs(float64 a, float64 b, flag zSign);
100static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig);
101static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp,
102 bits32 zSig);
103static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig);
104static float32 subFloat32Sigs(float32 a, float32 b, flag zSign);
105static float32 addFloat32Sigs(float32 a, float32 b, flag zSign);
106static void normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr,
107 bits64 * zSigPtr);
108static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b);
109static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
110 bits32 * zSigPtr);
111
112inline bits64 extractFloat64Frac(float64 a)
113{
114 return a & LIT64(0x000FFFFFFFFFFFFF);
115}
116
117inline flag extractFloat64Sign(float64 a)
118{
119 return a >> 63;
120}
121
122inline int16 extractFloat64Exp(float64 a)
123{
124 return (a >> 52) & 0x7FF;
125}
126
127inline int16 extractFloat32Exp(float32 a)
128{
129 return (a >> 23) & 0xFF;
130}
131
132inline flag extractFloat32Sign(float32 a)
133{
134 return a >> 31;
135}
136
137inline bits32 extractFloat32Frac(float32 a)
138{
139 return a & 0x007FFFFF;
140}
141
142inline float64 packFloat64(flag zSign, int16 zExp, bits64 zSig)
143{
144 return (((bits64) zSign) << 63) + (((bits64) zExp) << 52) + zSig;
145}
146
147inline void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr)
148{
149 bits64 z;
150
151 if (count == 0) {
152 z = a;
153 } else if (count < 64) {
154 z = (a >> count) | ((a << ((-count) & 63)) != 0);
155 } else {
156 z = (a != 0);
157 }
158 *zPtr = z;
159}
160
161static int8 countLeadingZeros32(bits32 a)
162{
163 static const int8 countLeadingZerosHigh[] = {
164 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
165 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
166 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
167 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
168 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
169 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
170 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
171 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
172 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
173 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
174 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
177 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
178 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
180 };
181 int8 shiftCount;
182
183 shiftCount = 0;
184 if (a < 0x10000) {
185 shiftCount += 16;
186 a <<= 16;
187 }
188 if (a < 0x1000000) {
189 shiftCount += 8;
190 a <<= 8;
191 }
192 shiftCount += countLeadingZerosHigh[a >> 24];
193 return shiftCount;
194
195}
196
197static int8 countLeadingZeros64(bits64 a)
198{
199 int8 shiftCount;
200
201 shiftCount = 0;
202 if (a < ((bits64) 1) << 32) {
203 shiftCount += 32;
204 } else {
205 a >>= 32;
206 }
207 shiftCount += countLeadingZeros32(a);
208 return shiftCount;
209
210}
211
212static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
213{
214 int8 shiftCount;
215
216 shiftCount = countLeadingZeros64(zSig) - 1;
217 return roundAndPackFloat64(zSign, zExp - shiftCount,
218 zSig << shiftCount);
219
220}
221
222static float64 subFloat64Sigs(float64 a, float64 b, flag zSign)
223{
224 int16 aExp, bExp, zExp;
225 bits64 aSig, bSig, zSig;
226 int16 expDiff;
227
228 aSig = extractFloat64Frac(a);
229 aExp = extractFloat64Exp(a);
230 bSig = extractFloat64Frac(b);
231 bExp = extractFloat64Exp(b);
232 expDiff = aExp - bExp;
233 aSig <<= 10;
234 bSig <<= 10;
235 if (0 < expDiff)
236 goto aExpBigger;
237 if (expDiff < 0)
238 goto bExpBigger;
239 if (aExp == 0) {
240 aExp = 1;
241 bExp = 1;
242 }
243 if (bSig < aSig)
244 goto aBigger;
245 if (aSig < bSig)
246 goto bBigger;
247 return packFloat64(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
248 bExpBigger:
249 if (bExp == 0x7FF) {
250 return packFloat64(zSign ^ 1, 0x7FF, 0);
251 }
252 if (aExp == 0) {
253 ++expDiff;
254 } else {
255 aSig |= LIT64(0x4000000000000000);
256 }
257 shift64RightJamming(aSig, -expDiff, &aSig);
258 bSig |= LIT64(0x4000000000000000);
259 bBigger:
260 zSig = bSig - aSig;
261 zExp = bExp;
262 zSign ^= 1;
263 goto normalizeRoundAndPack;
264 aExpBigger:
265 if (aExp == 0x7FF) {
266 return a;
267 }
268 if (bExp == 0) {
269 --expDiff;
270 } else {
271 bSig |= LIT64(0x4000000000000000);
272 }
273 shift64RightJamming(bSig, expDiff, &bSig);
274 aSig |= LIT64(0x4000000000000000);
275 aBigger:
276 zSig = aSig - bSig;
277 zExp = aExp;
278 normalizeRoundAndPack:
279 --zExp;
280 return normalizeRoundAndPackFloat64(zSign, zExp, zSig);
281
282}
283static float64 addFloat64Sigs(float64 a, float64 b, flag zSign)
284{
285 int16 aExp, bExp, zExp;
286 bits64 aSig, bSig, zSig;
287 int16 expDiff;
288
289 aSig = extractFloat64Frac(a);
290 aExp = extractFloat64Exp(a);
291 bSig = extractFloat64Frac(b);
292 bExp = extractFloat64Exp(b);
293 expDiff = aExp - bExp;
294 aSig <<= 9;
295 bSig <<= 9;
296 if (0 < expDiff) {
297 if (aExp == 0x7FF) {
298 return a;
299 }
300 if (bExp == 0) {
301 --expDiff;
302 } else {
303 bSig |= LIT64(0x2000000000000000);
304 }
305 shift64RightJamming(bSig, expDiff, &bSig);
306 zExp = aExp;
307 } else if (expDiff < 0) {
308 if (bExp == 0x7FF) {
309 return packFloat64(zSign, 0x7FF, 0);
310 }
311 if (aExp == 0) {
312 ++expDiff;
313 } else {
314 aSig |= LIT64(0x2000000000000000);
315 }
316 shift64RightJamming(aSig, -expDiff, &aSig);
317 zExp = bExp;
318 } else {
319 if (aExp == 0x7FF) {
320 return a;
321 }
322 if (aExp == 0)
323 return packFloat64(zSign, 0, (aSig + bSig) >> 9);
324 zSig = LIT64(0x4000000000000000) + aSig + bSig;
325 zExp = aExp;
326 goto roundAndPack;
327 }
328 aSig |= LIT64(0x2000000000000000);
329 zSig = (aSig + bSig) << 1;
330 --zExp;
331 if ((sbits64) zSig < 0) {
332 zSig = aSig + bSig;
333 ++zExp;
334 }
335 roundAndPack:
336 return roundAndPackFloat64(zSign, zExp, zSig);
337
338}
339
340inline float32 packFloat32(flag zSign, int16 zExp, bits32 zSig)
341{
342 return (((bits32) zSign) << 31) + (((bits32) zExp) << 23) + zSig;
343}
344
345inline void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr)
346{
347 bits32 z;
348 if (count == 0) {
349 z = a;
350 } else if (count < 32) {
351 z = (a >> count) | ((a << ((-count) & 31)) != 0);
352 } else {
353 z = (a != 0);
354 }
355 *zPtr = z;
356}
357
358static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
359{
360 flag roundNearestEven;
361 int8 roundIncrement, roundBits;
362 flag isTiny;
363
364 /* SH4 has only 2 rounding modes - round to nearest and round to zero */
365 roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
366 roundIncrement = 0x40;
367 if (!roundNearestEven) {
368 roundIncrement = 0;
369 }
370 roundBits = zSig & 0x7F;
371 if (0xFD <= (bits16) zExp) {
372 if ((0xFD < zExp)
373 || ((zExp == 0xFD)
374 && ((sbits32) (zSig + roundIncrement) < 0))
375 ) {
376 float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
377 return packFloat32(zSign, 0xFF,
378 0) - (roundIncrement == 0);
379 }
380 if (zExp < 0) {
381 isTiny = (zExp < -1)
382 || (zSig + roundIncrement < 0x80000000);
383 shift32RightJamming(zSig, -zExp, &zSig);
384 zExp = 0;
385 roundBits = zSig & 0x7F;
386 if (isTiny && roundBits)
387 float_raise(FPSCR_CAUSE_UNDERFLOW);
388 }
389 }
390 if (roundBits)
391 float_raise(FPSCR_CAUSE_INEXACT);
392 zSig = (zSig + roundIncrement) >> 7;
393 zSig &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
394 if (zSig == 0)
395 zExp = 0;
396 return packFloat32(zSign, zExp, zSig);
397
398}
399
400static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
401{
402 int8 shiftCount;
403
404 shiftCount = countLeadingZeros32(zSig) - 1;
405 return roundAndPackFloat32(zSign, zExp - shiftCount,
406 zSig << shiftCount);
407}
408
409static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
410{
411 flag roundNearestEven;
412 int16 roundIncrement, roundBits;
413 flag isTiny;
414
415 /* SH4 has only 2 rounding modes - round to nearest and round to zero */
416 roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
417 roundIncrement = 0x200;
418 if (!roundNearestEven) {
419 roundIncrement = 0;
420 }
421 roundBits = zSig & 0x3FF;
422 if (0x7FD <= (bits16) zExp) {
423 if ((0x7FD < zExp)
424 || ((zExp == 0x7FD)
425 && ((sbits64) (zSig + roundIncrement) < 0))
426 ) {
427 float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
428 return packFloat64(zSign, 0x7FF,
429 0) - (roundIncrement == 0);
430 }
431 if (zExp < 0) {
432 isTiny = (zExp < -1)
433 || (zSig + roundIncrement <
434 LIT64(0x8000000000000000));
435 shift64RightJamming(zSig, -zExp, &zSig);
436 zExp = 0;
437 roundBits = zSig & 0x3FF;
438 if (isTiny && roundBits)
439 float_raise(FPSCR_CAUSE_UNDERFLOW);
440 }
441 }
442 if (roundBits)
443 float_raise(FPSCR_CAUSE_INEXACT);
444 zSig = (zSig + roundIncrement) >> 10;
445 zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
446 if (zSig == 0)
447 zExp = 0;
448 return packFloat64(zSign, zExp, zSig);
449
450}
451
452static float32 subFloat32Sigs(float32 a, float32 b, flag zSign)
453{
454 int16 aExp, bExp, zExp;
455 bits32 aSig, bSig, zSig;
456 int16 expDiff;
457
458 aSig = extractFloat32Frac(a);
459 aExp = extractFloat32Exp(a);
460 bSig = extractFloat32Frac(b);
461 bExp = extractFloat32Exp(b);
462 expDiff = aExp - bExp;
463 aSig <<= 7;
464 bSig <<= 7;
465 if (0 < expDiff)
466 goto aExpBigger;
467 if (expDiff < 0)
468 goto bExpBigger;
469 if (aExp == 0) {
470 aExp = 1;
471 bExp = 1;
472 }
473 if (bSig < aSig)
474 goto aBigger;
475 if (aSig < bSig)
476 goto bBigger;
477 return packFloat32(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
478 bExpBigger:
479 if (bExp == 0xFF) {
480 return packFloat32(zSign ^ 1, 0xFF, 0);
481 }
482 if (aExp == 0) {
483 ++expDiff;
484 } else {
485 aSig |= 0x40000000;
486 }
487 shift32RightJamming(aSig, -expDiff, &aSig);
488 bSig |= 0x40000000;
489 bBigger:
490 zSig = bSig - aSig;
491 zExp = bExp;
492 zSign ^= 1;
493 goto normalizeRoundAndPack;
494 aExpBigger:
495 if (aExp == 0xFF) {
496 return a;
497 }
498 if (bExp == 0) {
499 --expDiff;
500 } else {
501 bSig |= 0x40000000;
502 }
503 shift32RightJamming(bSig, expDiff, &bSig);
504 aSig |= 0x40000000;
505 aBigger:
506 zSig = aSig - bSig;
507 zExp = aExp;
508 normalizeRoundAndPack:
509 --zExp;
510 return normalizeRoundAndPackFloat32(zSign, zExp, zSig);
511
512}
513
514static float32 addFloat32Sigs(float32 a, float32 b, flag zSign)
515{
516 int16 aExp, bExp, zExp;
517 bits32 aSig, bSig, zSig;
518 int16 expDiff;
519
520 aSig = extractFloat32Frac(a);
521 aExp = extractFloat32Exp(a);
522 bSig = extractFloat32Frac(b);
523 bExp = extractFloat32Exp(b);
524 expDiff = aExp - bExp;
525 aSig <<= 6;
526 bSig <<= 6;
527 if (0 < expDiff) {
528 if (aExp == 0xFF) {
529 return a;
530 }
531 if (bExp == 0) {
532 --expDiff;
533 } else {
534 bSig |= 0x20000000;
535 }
536 shift32RightJamming(bSig, expDiff, &bSig);
537 zExp = aExp;
538 } else if (expDiff < 0) {
539 if (bExp == 0xFF) {
540 return packFloat32(zSign, 0xFF, 0);
541 }
542 if (aExp == 0) {
543 ++expDiff;
544 } else {
545 aSig |= 0x20000000;
546 }
547 shift32RightJamming(aSig, -expDiff, &aSig);
548 zExp = bExp;
549 } else {
550 if (aExp == 0xFF) {
551 return a;
552 }
553 if (aExp == 0)
554 return packFloat32(zSign, 0, (aSig + bSig) >> 6);
555 zSig = 0x40000000 + aSig + bSig;
556 zExp = aExp;
557 goto roundAndPack;
558 }
559 aSig |= 0x20000000;
560 zSig = (aSig + bSig) << 1;
561 --zExp;
562 if ((sbits32) zSig < 0) {
563 zSig = aSig + bSig;
564 ++zExp;
565 }
566 roundAndPack:
567 return roundAndPackFloat32(zSign, zExp, zSig);
568
569}
570
571float64 float64_sub(float64 a, float64 b)
572{
573 flag aSign, bSign;
574
575 aSign = extractFloat64Sign(a);
576 bSign = extractFloat64Sign(b);
577 if (aSign == bSign) {
578 return subFloat64Sigs(a, b, aSign);
579 } else {
580 return addFloat64Sigs(a, b, aSign);
581 }
582
583}
584
585float32 float32_sub(float32 a, float32 b)
586{
587 flag aSign, bSign;
588
589 aSign = extractFloat32Sign(a);
590 bSign = extractFloat32Sign(b);
591 if (aSign == bSign) {
592 return subFloat32Sigs(a, b, aSign);
593 } else {
594 return addFloat32Sigs(a, b, aSign);
595 }
596
597}
598
599float32 float32_add(float32 a, float32 b)
600{
601 flag aSign, bSign;
602
603 aSign = extractFloat32Sign(a);
604 bSign = extractFloat32Sign(b);
605 if (aSign == bSign) {
606 return addFloat32Sigs(a, b, aSign);
607 } else {
608 return subFloat32Sigs(a, b, aSign);
609 }
610
611}
612
613float64 float64_add(float64 a, float64 b)
614{
615 flag aSign, bSign;
616
617 aSign = extractFloat64Sign(a);
618 bSign = extractFloat64Sign(b);
619 if (aSign == bSign) {
620 return addFloat64Sigs(a, b, aSign);
621 } else {
622 return subFloat64Sigs(a, b, aSign);
623 }
624}
625
626static void
627normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr, bits64 * zSigPtr)
628{
629 int8 shiftCount;
630
631 shiftCount = countLeadingZeros64(aSig) - 11;
632 *zSigPtr = aSig << shiftCount;
633 *zExpPtr = 1 - shiftCount;
634}
635
636inline void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
637 bits64 * z1Ptr)
638{
639 bits64 z1;
640
641 z1 = a1 + b1;
642 *z1Ptr = z1;
643 *z0Ptr = a0 + b0 + (z1 < a1);
644}
645
646inline void
647sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
648 bits64 * z1Ptr)
649{
650 *z1Ptr = a1 - b1;
651 *z0Ptr = a0 - b0 - (a1 < b1);
652}
653
654static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b)
655{
656 bits64 b0, b1;
657 bits64 rem0, rem1, term0, term1;
658 bits64 z;
659 if (b <= a0)
660 return LIT64(0xFFFFFFFFFFFFFFFF);
661 b0 = b >> 32;
662 z = (b0 << 32 <= a0) ? LIT64(0xFFFFFFFF00000000) : (a0 / b0) << 32;
663 mul64To128(b, z, &term0, &term1);
664 sub128(a0, a1, term0, term1, &rem0, &rem1);
665 while (((sbits64) rem0) < 0) {
666 z -= LIT64(0x100000000);
667 b1 = b << 32;
668 add128(rem0, rem1, b0, b1, &rem0, &rem1);
669 }
670 rem0 = (rem0 << 32) | (rem1 >> 32);
671 z |= (b0 << 32 <= rem0) ? 0xFFFFFFFF : rem0 / b0;
672 return z;
673}
674
675inline void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr)
676{
677 bits32 aHigh, aLow, bHigh, bLow;
678 bits64 z0, zMiddleA, zMiddleB, z1;
679
680 aLow = a;
681 aHigh = a >> 32;
682 bLow = b;
683 bHigh = b >> 32;
684 z1 = ((bits64) aLow) * bLow;
685 zMiddleA = ((bits64) aLow) * bHigh;
686 zMiddleB = ((bits64) aHigh) * bLow;
687 z0 = ((bits64) aHigh) * bHigh;
688 zMiddleA += zMiddleB;
689 z0 += (((bits64) (zMiddleA < zMiddleB)) << 32) + (zMiddleA >> 32);
690 zMiddleA <<= 32;
691 z1 += zMiddleA;
692 z0 += (z1 < zMiddleA);
693 *z1Ptr = z1;
694 *z0Ptr = z0;
695
696}
697
698static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
699 bits32 * zSigPtr)
700{
701 int8 shiftCount;
702
703 shiftCount = countLeadingZeros32(aSig) - 8;
704 *zSigPtr = aSig << shiftCount;
705 *zExpPtr = 1 - shiftCount;
706
707}
708
709float64 float64_div(float64 a, float64 b)
710{
711 flag aSign, bSign, zSign;
712 int16 aExp, bExp, zExp;
713 bits64 aSig, bSig, zSig;
714 bits64 rem0, rem1;
715 bits64 term0, term1;
716
717 aSig = extractFloat64Frac(a);
718 aExp = extractFloat64Exp(a);
719 aSign = extractFloat64Sign(a);
720 bSig = extractFloat64Frac(b);
721 bExp = extractFloat64Exp(b);
722 bSign = extractFloat64Sign(b);
723 zSign = aSign ^ bSign;
724 if (aExp == 0x7FF) {
725 if (bExp == 0x7FF) {
726 }
727 return packFloat64(zSign, 0x7FF, 0);
728 }
729 if (bExp == 0x7FF) {
730 return packFloat64(zSign, 0, 0);
731 }
732 if (bExp == 0) {
733 if (bSig == 0) {
734 if ((aExp | aSig) == 0) {
735 float_raise(FPSCR_CAUSE_INVALID);
736 }
737 return packFloat64(zSign, 0x7FF, 0);
738 }
739 normalizeFloat64Subnormal(bSig, &bExp, &bSig);
740 }
741 if (aExp == 0) {
742 if (aSig == 0)
743 return packFloat64(zSign, 0, 0);
744 normalizeFloat64Subnormal(aSig, &aExp, &aSig);
745 }
746 zExp = aExp - bExp + 0x3FD;
747 aSig = (aSig | LIT64(0x0010000000000000)) << 10;
748 bSig = (bSig | LIT64(0x0010000000000000)) << 11;
749 if (bSig <= (aSig + aSig)) {
750 aSig >>= 1;
751 ++zExp;
752 }
753 zSig = estimateDiv128To64(aSig, 0, bSig);
754 if ((zSig & 0x1FF) <= 2) {
755 mul64To128(bSig, zSig, &term0, &term1);
756 sub128(aSig, 0, term0, term1, &rem0, &rem1);
757 while ((sbits64) rem0 < 0) {
758 --zSig;
759 add128(rem0, rem1, 0, bSig, &rem0, &rem1);
760 }
761 zSig |= (rem1 != 0);
762 }
763 return roundAndPackFloat64(zSign, zExp, zSig);
764
765}
766
767float32 float32_div(float32 a, float32 b)
768{
769 flag aSign, bSign, zSign;
770 int16 aExp, bExp, zExp;
771 bits32 aSig, bSig, zSig;
772
773 aSig = extractFloat32Frac(a);
774 aExp = extractFloat32Exp(a);
775 aSign = extractFloat32Sign(a);
776 bSig = extractFloat32Frac(b);
777 bExp = extractFloat32Exp(b);
778 bSign = extractFloat32Sign(b);
779 zSign = aSign ^ bSign;
780 if (aExp == 0xFF) {
781 if (bExp == 0xFF) {
782 }
783 return packFloat32(zSign, 0xFF, 0);
784 }
785 if (bExp == 0xFF) {
786 return packFloat32(zSign, 0, 0);
787 }
788 if (bExp == 0) {
789 if (bSig == 0) {
790 return packFloat32(zSign, 0xFF, 0);
791 }
792 normalizeFloat32Subnormal(bSig, &bExp, &bSig);
793 }
794 if (aExp == 0) {
795 if (aSig == 0)
796 return packFloat32(zSign, 0, 0);
797 normalizeFloat32Subnormal(aSig, &aExp, &aSig);
798 }
799 zExp = aExp - bExp + 0x7D;
800 aSig = (aSig | 0x00800000) << 7;
801 bSig = (bSig | 0x00800000) << 8;
802 if (bSig <= (aSig + aSig)) {
803 aSig >>= 1;
804 ++zExp;
805 }
806 zSig = (((bits64) aSig) << 32) / bSig;
807 if ((zSig & 0x3F) == 0) {
808 zSig |= (((bits64) bSig) * zSig != ((bits64) aSig) << 32);
809 }
810 return roundAndPackFloat32(zSign, zExp, zSig);
811
812}
813
814float32 float32_mul(float32 a, float32 b)
815{
816 char aSign, bSign, zSign;
817 int aExp, bExp, zExp;
818 unsigned int aSig, bSig;
819 unsigned long long zSig64;
820 unsigned int zSig;
821
822 aSig = extractFloat32Frac(a);
823 aExp = extractFloat32Exp(a);
824 aSign = extractFloat32Sign(a);
825 bSig = extractFloat32Frac(b);
826 bExp = extractFloat32Exp(b);
827 bSign = extractFloat32Sign(b);
828 zSign = aSign ^ bSign;
829 if (aExp == 0) {
830 if (aSig == 0)
831 return packFloat32(zSign, 0, 0);
832 normalizeFloat32Subnormal(aSig, &aExp, &aSig);
833 }
834 if (bExp == 0) {
835 if (bSig == 0)
836 return packFloat32(zSign, 0, 0);
837 normalizeFloat32Subnormal(bSig, &bExp, &bSig);
838 }
839 if ((bExp == 0xff && bSig == 0) || (aExp == 0xff && aSig == 0))
840 return roundAndPackFloat32(zSign, 0xff, 0);
841
842 zExp = aExp + bExp - 0x7F;
843 aSig = (aSig | 0x00800000) << 7;
844 bSig = (bSig | 0x00800000) << 8;
845 shift64RightJamming(((unsigned long long)aSig) * bSig, 32, &zSig64);
846 zSig = zSig64;
847 if (0 <= (signed int)(zSig << 1)) {
848 zSig <<= 1;
849 --zExp;
850 }
851 return roundAndPackFloat32(zSign, zExp, zSig);
852
853}
854
855float64 float64_mul(float64 a, float64 b)
856{
857 char aSign, bSign, zSign;
858 int aExp, bExp, zExp;
859 unsigned long long int aSig, bSig, zSig0, zSig1;
860
861 aSig = extractFloat64Frac(a);
862 aExp = extractFloat64Exp(a);
863 aSign = extractFloat64Sign(a);
864 bSig = extractFloat64Frac(b);
865 bExp = extractFloat64Exp(b);
866 bSign = extractFloat64Sign(b);
867 zSign = aSign ^ bSign;
868
869 if (aExp == 0) {
870 if (aSig == 0)
871 return packFloat64(zSign, 0, 0);
872 normalizeFloat64Subnormal(aSig, &aExp, &aSig);
873 }
874 if (bExp == 0) {
875 if (bSig == 0)
876 return packFloat64(zSign, 0, 0);
877 normalizeFloat64Subnormal(bSig, &bExp, &bSig);
878 }
879 if ((aExp == 0x7ff && aSig == 0) || (bExp == 0x7ff && bSig == 0))
880 return roundAndPackFloat64(zSign, 0x7ff, 0);
881
882 zExp = aExp + bExp - 0x3FF;
883 aSig = (aSig | 0x0010000000000000LL) << 10;
884 bSig = (bSig | 0x0010000000000000LL) << 11;
885 mul64To128(aSig, bSig, &zSig0, &zSig1);
886 zSig0 |= (zSig1 != 0);
887 if (0 <= (signed long long int)(zSig0 << 1)) {
888 zSig0 <<= 1;
889 --zExp;
890 }
891 return roundAndPackFloat64(zSign, zExp, zSig0);
892}