diff options
Diffstat (limited to 'arch/parisc/math-emu/fmpyfadd.c')
-rw-r--r-- | arch/parisc/math-emu/fmpyfadd.c | 2655 |
1 files changed, 2655 insertions, 0 deletions
diff --git a/arch/parisc/math-emu/fmpyfadd.c b/arch/parisc/math-emu/fmpyfadd.c new file mode 100644 index 000000000000..5dd7f93a89be --- /dev/null +++ b/arch/parisc/math-emu/fmpyfadd.c | |||
@@ -0,0 +1,2655 @@ | |||
1 | /* | ||
2 | * Linux/PA-RISC Project (http://www.parisc-linux.org/) | ||
3 | * | ||
4 | * Floating-point emulation code | ||
5 | * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2, or (at your option) | ||
10 | * any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
20 | */ | ||
21 | /* | ||
22 | * BEGIN_DESC | ||
23 | * | ||
24 | * File: | ||
25 | * @(#) pa/spmath/fmpyfadd.c $Revision: 1.1 $ | ||
26 | * | ||
27 | * Purpose: | ||
28 | * Double Floating-point Multiply Fused Add | ||
29 | * Double Floating-point Multiply Negate Fused Add | ||
30 | * Single Floating-point Multiply Fused Add | ||
31 | * Single Floating-point Multiply Negate Fused Add | ||
32 | * | ||
33 | * External Interfaces: | ||
34 | * dbl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr) | ||
35 | * dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr) | ||
36 | * sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr) | ||
37 | * sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr) | ||
38 | * | ||
39 | * Internal Interfaces: | ||
40 | * | ||
41 | * Theory: | ||
42 | * <<please update with a overview of the operation of this file>> | ||
43 | * | ||
44 | * END_DESC | ||
45 | */ | ||
46 | |||
47 | |||
48 | #include "float.h" | ||
49 | #include "sgl_float.h" | ||
50 | #include "dbl_float.h" | ||
51 | |||
52 | |||
53 | /* | ||
54 | * Double Floating-point Multiply Fused Add | ||
55 | */ | ||
56 | |||
57 | int | ||
58 | dbl_fmpyfadd( | ||
59 | dbl_floating_point *src1ptr, | ||
60 | dbl_floating_point *src2ptr, | ||
61 | dbl_floating_point *src3ptr, | ||
62 | unsigned int *status, | ||
63 | dbl_floating_point *dstptr) | ||
64 | { | ||
65 | unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2; | ||
66 | register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4; | ||
67 | unsigned int rightp1, rightp2, rightp3, rightp4; | ||
68 | unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0; | ||
69 | register int mpy_exponent, add_exponent, count; | ||
70 | boolean inexact = FALSE, is_tiny = FALSE; | ||
71 | |||
72 | unsigned int signlessleft1, signlessright1, save; | ||
73 | register int result_exponent, diff_exponent; | ||
74 | int sign_save, jumpsize; | ||
75 | |||
76 | Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2); | ||
77 | Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2); | ||
78 | Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2); | ||
79 | |||
80 | /* | ||
81 | * set sign bit of result of multiply | ||
82 | */ | ||
83 | if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1)) | ||
84 | Dbl_setnegativezerop1(resultp1); | ||
85 | else Dbl_setzerop1(resultp1); | ||
86 | |||
87 | /* | ||
88 | * Generate multiply exponent | ||
89 | */ | ||
90 | mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS; | ||
91 | |||
92 | /* | ||
93 | * check first operand for NaN's or infinity | ||
94 | */ | ||
95 | if (Dbl_isinfinity_exponent(opnd1p1)) { | ||
96 | if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) { | ||
97 | if (Dbl_isnotnan(opnd2p1,opnd2p2) && | ||
98 | Dbl_isnotnan(opnd3p1,opnd3p2)) { | ||
99 | if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) { | ||
100 | /* | ||
101 | * invalid since operands are infinity | ||
102 | * and zero | ||
103 | */ | ||
104 | if (Is_invalidtrap_enabled()) | ||
105 | return(OPC_2E_INVALIDEXCEPTION); | ||
106 | Set_invalidflag(); | ||
107 | Dbl_makequietnan(resultp1,resultp2); | ||
108 | Dbl_copytoptr(resultp1,resultp2,dstptr); | ||
109 | return(NOEXCEPTION); | ||
110 | } | ||
111 | /* | ||
112 | * Check third operand for infinity with a | ||
113 | * sign opposite of the multiply result | ||
114 | */ | ||
115 | if (Dbl_isinfinity(opnd3p1,opnd3p2) && | ||
116 | (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) { | ||
117 | /* | ||
118 | * invalid since attempting a magnitude | ||
119 | * subtraction of infinities | ||
120 | */ | ||
121 | if (Is_invalidtrap_enabled()) | ||
122 | return(OPC_2E_INVALIDEXCEPTION); | ||
123 | Set_invalidflag(); | ||
124 | Dbl_makequietnan(resultp1,resultp2); | ||
125 | Dbl_copytoptr(resultp1,resultp2,dstptr); | ||
126 | return(NOEXCEPTION); | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * return infinity | ||
131 | */ | ||
132 | Dbl_setinfinity_exponentmantissa(resultp1,resultp2); | ||
133 | Dbl_copytoptr(resultp1,resultp2,dstptr); | ||
134 | return(NOEXCEPTION); | ||
135 | } | ||
136 | } | ||
137 | else { | ||
138 | /* | ||
139 | * is NaN; signaling or quiet? | ||
140 | */ | ||
141 | if (Dbl_isone_signaling(opnd1p1)) { | ||
142 | /* trap if INVALIDTRAP enabled */ | ||
143 | if (Is_invalidtrap_enabled()) | ||
144 | return(OPC_2E_INVALIDEXCEPTION); | ||
145 | /* make NaN quiet */ | ||
146 | Set_invalidflag(); | ||
147 | Dbl_set_quiet(opnd1p1); | ||
148 | } | ||
149 | /* | ||
150 | * is second operand a signaling NaN? | ||
151 | */ | ||
152 | else if (Dbl_is_signalingnan(opnd2p1)) { | ||
153 | /* trap if INVALIDTRAP enabled */ | ||
154 | if (Is_invalidtrap_enabled()) | ||
155 | return(OPC_2E_INVALIDEXCEPTION); | ||
156 | /* make NaN quiet */ | ||
157 | Set_invalidflag(); | ||
158 | Dbl_set_quiet(opnd2p1); | ||
159 | Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); | ||
160 | return(NOEXCEPTION); | ||
161 | } | ||
162 | /* | ||
163 | * is third operand a signaling NaN? | ||
164 | */ | ||
165 | else if (Dbl_is_signalingnan(opnd3p1)) { | ||
166 | /* trap if INVALIDTRAP enabled */ | ||
167 | if (Is_invalidtrap_enabled()) | ||
168 | return(OPC_2E_INVALIDEXCEPTION); | ||
169 | /* make NaN quiet */ | ||
170 | Set_invalidflag(); | ||
171 | Dbl_set_quiet(opnd3p1); | ||
172 | Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); | ||
173 | return(NOEXCEPTION); | ||
174 | } | ||
175 | /* | ||
176 | * return quiet NaN | ||
177 | */ | ||
178 | Dbl_copytoptr(opnd1p1,opnd1p2,dstptr); | ||
179 | return(NOEXCEPTION); | ||
180 | } | ||
181 | } | ||
182 | |||
183 | /* | ||
184 | * check second operand for NaN's or infinity | ||
185 | */ | ||
186 | if (Dbl_isinfinity_exponent(opnd2p1)) { | ||
187 | if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) { | ||
188 | if (Dbl_isnotnan(opnd3p1,opnd3p2)) { | ||
189 | if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) { | ||
190 | /* | ||
191 | * invalid since multiply operands are | ||
192 | * zero & infinity | ||
193 | */ | ||
194 | if (Is_invalidtrap_enabled()) | ||
195 | return(OPC_2E_INVALIDEXCEPTION); | ||
196 | Set_invalidflag(); | ||
197 | Dbl_makequietnan(opnd2p1,opnd2p2); | ||
198 | Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); | ||
199 | return(NOEXCEPTION); | ||
200 | } | ||
201 | |||
202 | /* | ||
203 | * Check third operand for infinity with a | ||
204 | * sign opposite of the multiply result | ||
205 | */ | ||
206 | if (Dbl_isinfinity(opnd3p1,opnd3p2) && | ||
207 | (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) { | ||
208 | /* | ||
209 | * invalid since attempting a magnitude | ||
210 | * subtraction of infinities | ||
211 | */ | ||
212 | if (Is_invalidtrap_enabled()) | ||
213 | return(OPC_2E_INVALIDEXCEPTION); | ||
214 | Set_invalidflag(); | ||
215 | Dbl_makequietnan(resultp1,resultp2); | ||
216 | Dbl_copytoptr(resultp1,resultp2,dstptr); | ||
217 | return(NOEXCEPTION); | ||
218 | } | ||
219 | |||
220 | /* | ||
221 | * return infinity | ||
222 | */ | ||
223 | Dbl_setinfinity_exponentmantissa(resultp1,resultp2); | ||
224 | Dbl_copytoptr(resultp1,resultp2,dstptr); | ||
225 | return(NOEXCEPTION); | ||
226 | } | ||
227 | } | ||
228 | else { | ||
229 | /* | ||
230 | * is NaN; signaling or quiet? | ||
231 | */ | ||
232 | if (Dbl_isone_signaling(opnd2p1)) { | ||
233 | /* trap if INVALIDTRAP enabled */ | ||
234 | if (Is_invalidtrap_enabled()) | ||
235 | return(OPC_2E_INVALIDEXCEPTION); | ||
236 | /* make NaN quiet */ | ||
237 | Set_invalidflag(); | ||
238 | Dbl_set_quiet(opnd2p1); | ||
239 | } | ||
240 | /* | ||
241 | * is third operand a signaling NaN? | ||
242 | */ | ||
243 | else if (Dbl_is_signalingnan(opnd3p1)) { | ||
244 | /* trap if INVALIDTRAP enabled */ | ||
245 | if (Is_invalidtrap_enabled()) | ||
246 | return(OPC_2E_INVALIDEXCEPTION); | ||
247 | /* make NaN quiet */ | ||
248 | Set_invalidflag(); | ||
249 | Dbl_set_quiet(opnd3p1); | ||
250 | Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); | ||
251 | return(NOEXCEPTION); | ||
252 | } | ||
253 | /* | ||
254 | * return quiet NaN | ||
255 | */ | ||
256 | Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); | ||
257 | return(NOEXCEPTION); | ||
258 | } | ||
259 | } | ||
260 | |||
261 | /* | ||
262 | * check third operand for NaN's or infinity | ||
263 | */ | ||
264 | if (Dbl_isinfinity_exponent(opnd3p1)) { | ||
265 | if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) { | ||
266 | /* return infinity */ | ||
267 | Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); | ||
268 | return(NOEXCEPTION); | ||
269 | } else { | ||
270 | /* | ||
271 | * is NaN; signaling or quiet? | ||
272 | */ | ||
273 | if (Dbl_isone_signaling(opnd3p1)) { | ||
274 | /* trap if INVALIDTRAP enabled */ | ||
275 | if (Is_invalidtrap_enabled()) | ||
276 | return(OPC_2E_INVALIDEXCEPTION); | ||
277 | /* make NaN quiet */ | ||
278 | Set_invalidflag(); | ||
279 | Dbl_set_quiet(opnd3p1); | ||
280 | } | ||
281 | /* | ||
282 | * return quiet NaN | ||
283 | */ | ||
284 | Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); | ||
285 | return(NOEXCEPTION); | ||
286 | } | ||
287 | } | ||
288 | |||
289 | /* | ||
290 | * Generate multiply mantissa | ||
291 | */ | ||
292 | if (Dbl_isnotzero_exponent(opnd1p1)) { | ||
293 | /* set hidden bit */ | ||
294 | Dbl_clear_signexponent_set_hidden(opnd1p1); | ||
295 | } | ||
296 | else { | ||
297 | /* check for zero */ | ||
298 | if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) { | ||
299 | /* | ||
300 | * Perform the add opnd3 with zero here. | ||
301 | */ | ||
302 | if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) { | ||
303 | if (Is_rounding_mode(ROUNDMINUS)) { | ||
304 | Dbl_or_signs(opnd3p1,resultp1); | ||
305 | } else { | ||
306 | Dbl_and_signs(opnd3p1,resultp1); | ||
307 | } | ||
308 | } | ||
309 | /* | ||
310 | * Now let's check for trapped underflow case. | ||
311 | */ | ||
312 | else if (Dbl_iszero_exponent(opnd3p1) && | ||
313 | Is_underflowtrap_enabled()) { | ||
314 | /* need to normalize results mantissa */ | ||
315 | sign_save = Dbl_signextendedsign(opnd3p1); | ||
316 | result_exponent = 0; | ||
317 | Dbl_leftshiftby1(opnd3p1,opnd3p2); | ||
318 | Dbl_normalize(opnd3p1,opnd3p2,result_exponent); | ||
319 | Dbl_set_sign(opnd3p1,/*using*/sign_save); | ||
320 | Dbl_setwrapped_exponent(opnd3p1,result_exponent, | ||
321 | unfl); | ||
322 | Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); | ||
323 | /* inexact = FALSE */ | ||
324 | return(OPC_2E_UNDERFLOWEXCEPTION); | ||
325 | } | ||
326 | Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); | ||
327 | return(NOEXCEPTION); | ||
328 | } | ||
329 | /* is denormalized, adjust exponent */ | ||
330 | Dbl_clear_signexponent(opnd1p1); | ||
331 | Dbl_leftshiftby1(opnd1p1,opnd1p2); | ||
332 | Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent); | ||
333 | } | ||
334 | /* opnd2 needs to have hidden bit set with msb in hidden bit */ | ||
335 | if (Dbl_isnotzero_exponent(opnd2p1)) { | ||
336 | Dbl_clear_signexponent_set_hidden(opnd2p1); | ||
337 | } | ||
338 | else { | ||
339 | /* check for zero */ | ||
340 | if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) { | ||
341 | /* | ||
342 | * Perform the add opnd3 with zero here. | ||
343 | */ | ||
344 | if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) { | ||
345 | if (Is_rounding_mode(ROUNDMINUS)) { | ||
346 | Dbl_or_signs(opnd3p1,resultp1); | ||
347 | } else { | ||
348 | Dbl_and_signs(opnd3p1,resultp1); | ||
349 | } | ||
350 | } | ||
351 | /* | ||
352 | * Now let's check for trapped underflow case. | ||
353 | */ | ||
354 | else if (Dbl_iszero_exponent(opnd3p1) && | ||
355 | Is_underflowtrap_enabled()) { | ||
356 | /* need to normalize results mantissa */ | ||
357 | sign_save = Dbl_signextendedsign(opnd3p1); | ||
358 | result_exponent = 0; | ||
359 | Dbl_leftshiftby1(opnd3p1,opnd3p2); | ||
360 | Dbl_normalize(opnd3p1,opnd3p2,result_exponent); | ||
361 | Dbl_set_sign(opnd3p1,/*using*/sign_save); | ||
362 | Dbl_setwrapped_exponent(opnd3p1,result_exponent, | ||
363 | unfl); | ||
364 | Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); | ||
365 | /* inexact = FALSE */ | ||
366 | return(OPC_2E_UNDERFLOWEXCEPTION); | ||
367 | } | ||
368 | Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); | ||
369 | return(NOEXCEPTION); | ||
370 | } | ||
371 | /* is denormalized; want to normalize */ | ||
372 | Dbl_clear_signexponent(opnd2p1); | ||
373 | Dbl_leftshiftby1(opnd2p1,opnd2p2); | ||
374 | Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent); | ||
375 | } | ||
376 | |||
377 | /* Multiply the first two source mantissas together */ | ||
378 | |||
379 | /* | ||
380 | * The intermediate result will be kept in tmpres, | ||
381 | * which needs enough room for 106 bits of mantissa, | ||
382 | * so lets call it a Double extended. | ||
383 | */ | ||
384 | Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4); | ||
385 | |||
386 | /* | ||
387 | * Four bits at a time are inspected in each loop, and a | ||
388 | * simple shift and add multiply algorithm is used. | ||
389 | */ | ||
390 | for (count = DBL_P-1; count >= 0; count -= 4) { | ||
391 | Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4); | ||
392 | if (Dbit28p2(opnd1p2)) { | ||
393 | /* Fourword_add should be an ADD followed by 3 ADDC's */ | ||
394 | Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, | ||
395 | opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0); | ||
396 | } | ||
397 | if (Dbit29p2(opnd1p2)) { | ||
398 | Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, | ||
399 | opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0); | ||
400 | } | ||
401 | if (Dbit30p2(opnd1p2)) { | ||
402 | Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, | ||
403 | opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0); | ||
404 | } | ||
405 | if (Dbit31p2(opnd1p2)) { | ||
406 | Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, | ||
407 | opnd2p1, opnd2p2, 0, 0); | ||
408 | } | ||
409 | Dbl_rightshiftby4(opnd1p1,opnd1p2); | ||
410 | } | ||
411 | if (Is_dexthiddenoverflow(tmpresp1)) { | ||
412 | /* result mantissa >= 2 (mantissa overflow) */ | ||
413 | mpy_exponent++; | ||
414 | Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4); | ||
415 | } | ||
416 | |||
417 | /* | ||
418 | * Restore the sign of the mpy result which was saved in resultp1. | ||
419 | * The exponent will continue to be kept in mpy_exponent. | ||
420 | */ | ||
421 | Dblext_set_sign(tmpresp1,Dbl_sign(resultp1)); | ||
422 | |||
423 | /* | ||
424 | * No rounding is required, since the result of the multiply | ||
425 | * is exact in the extended format. | ||
426 | */ | ||
427 | |||
428 | /* | ||
429 | * Now we are ready to perform the add portion of the operation. | ||
430 | * | ||
431 | * The exponents need to be kept as integers for now, since the | ||
432 | * multiply result might not fit into the exponent field. We | ||
433 | * can't overflow or underflow because of this yet, since the | ||
434 | * add could bring the final result back into range. | ||
435 | */ | ||
436 | add_exponent = Dbl_exponent(opnd3p1); | ||
437 | |||
438 | /* | ||
439 | * Check for denormalized or zero add operand. | ||
440 | */ | ||
441 | if (add_exponent == 0) { | ||
442 | /* check for zero */ | ||
443 | if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) { | ||
444 | /* right is zero */ | ||
445 | /* Left can't be zero and must be result. | ||
446 | * | ||
447 | * The final result is now in tmpres and mpy_exponent, | ||
448 | * and needs to be rounded and squeezed back into | ||
449 | * double precision format from double extended. | ||
450 | */ | ||
451 | result_exponent = mpy_exponent; | ||
452 | Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4, | ||
453 | resultp1,resultp2,resultp3,resultp4); | ||
454 | sign_save = Dbl_signextendedsign(resultp1);/*save sign*/ | ||
455 | goto round; | ||
456 | } | ||
457 | |||
458 | /* | ||
459 | * Neither are zeroes. | ||
460 | * Adjust exponent and normalize add operand. | ||
461 | */ | ||
462 | sign_save = Dbl_signextendedsign(opnd3p1); /* save sign */ | ||
463 | Dbl_clear_signexponent(opnd3p1); | ||
464 | Dbl_leftshiftby1(opnd3p1,opnd3p2); | ||
465 | Dbl_normalize(opnd3p1,opnd3p2,add_exponent); | ||
466 | Dbl_set_sign(opnd3p1,sign_save); /* restore sign */ | ||
467 | } else { | ||
468 | Dbl_clear_exponent_set_hidden(opnd3p1); | ||
469 | } | ||
470 | /* | ||
471 | * Copy opnd3 to the double extended variable called right. | ||
472 | */ | ||
473 | Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4); | ||
474 | |||
475 | /* | ||
476 | * A zero "save" helps discover equal operands (for later), | ||
477 | * and is used in swapping operands (if needed). | ||
478 | */ | ||
479 | Dblext_xortointp1(tmpresp1,rightp1,/*to*/save); | ||
480 | |||
481 | /* | ||
482 | * Compare magnitude of operands. | ||
483 | */ | ||
484 | Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1); | ||
485 | Dblext_copytoint_exponentmantissap1(rightp1,signlessright1); | ||
486 | if (mpy_exponent < add_exponent || mpy_exponent == add_exponent && | ||
487 | Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){ | ||
488 | /* | ||
489 | * Set the left operand to the larger one by XOR swap. | ||
490 | * First finish the first word "save". | ||
491 | */ | ||
492 | Dblext_xorfromintp1(save,rightp1,/*to*/rightp1); | ||
493 | Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1); | ||
494 | Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4, | ||
495 | rightp2,rightp3,rightp4); | ||
496 | /* also setup exponents used in rest of routine */ | ||
497 | diff_exponent = add_exponent - mpy_exponent; | ||
498 | result_exponent = add_exponent; | ||
499 | } else { | ||
500 | /* also setup exponents used in rest of routine */ | ||
501 | diff_exponent = mpy_exponent - add_exponent; | ||
502 | result_exponent = mpy_exponent; | ||
503 | } | ||
504 | /* Invariant: left is not smaller than right. */ | ||
505 | |||
506 | /* | ||
507 | * Special case alignment of operands that would force alignment | ||
508 | * beyond the extent of the extension. A further optimization | ||
509 | * could special case this but only reduces the path length for | ||
510 | * this infrequent case. | ||
511 | */ | ||
512 | if (diff_exponent > DBLEXT_THRESHOLD) { | ||
513 | diff_exponent = DBLEXT_THRESHOLD; | ||
514 | } | ||
515 | |||
516 | /* Align right operand by shifting it to the right */ | ||
517 | Dblext_clear_sign(rightp1); | ||
518 | Dblext_right_align(rightp1,rightp2,rightp3,rightp4, | ||
519 | /*shifted by*/diff_exponent); | ||
520 | |||
521 | /* Treat sum and difference of the operands separately. */ | ||
522 | if ((int)save < 0) { | ||
523 | /* | ||
524 | * Difference of the two operands. Overflow can occur if the | ||
525 | * multiply overflowed. A borrow can occur out of the hidden | ||
526 | * bit and force a post normalization phase. | ||
527 | */ | ||
528 | Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4, | ||
529 | rightp1,rightp2,rightp3,rightp4, | ||
530 | resultp1,resultp2,resultp3,resultp4); | ||
531 | sign_save = Dbl_signextendedsign(resultp1); | ||
532 | if (Dbl_iszero_hidden(resultp1)) { | ||
533 | /* Handle normalization */ | ||
534 | /* A straight foward algorithm would now shift the | ||
535 | * result and extension left until the hidden bit | ||
536 | * becomes one. Not all of the extension bits need | ||
537 | * participate in the shift. Only the two most | ||
538 | * significant bits (round and guard) are needed. | ||
539 | * If only a single shift is needed then the guard | ||
540 | * bit becomes a significant low order bit and the | ||
541 | * extension must participate in the rounding. | ||
542 | * If more than a single shift is needed, then all | ||
543 | * bits to the right of the guard bit are zeros, | ||
544 | * and the guard bit may or may not be zero. */ | ||
545 | Dblext_leftshiftby1(resultp1,resultp2,resultp3, | ||
546 | resultp4); | ||
547 | |||
548 | /* Need to check for a zero result. The sign and | ||
549 | * exponent fields have already been zeroed. The more | ||
550 | * efficient test of the full object can be used. | ||
551 | */ | ||
552 | if(Dblext_iszero(resultp1,resultp2,resultp3,resultp4)){ | ||
553 | /* Must have been "x-x" or "x+(-x)". */ | ||
554 | if (Is_rounding_mode(ROUNDMINUS)) | ||
555 | Dbl_setone_sign(resultp1); | ||
556 | Dbl_copytoptr(resultp1,resultp2,dstptr); | ||
557 | return(NOEXCEPTION); | ||
558 | } | ||
559 | result_exponent--; | ||
560 | |||
561 | /* Look to see if normalization is finished. */ | ||
562 | if (Dbl_isone_hidden(resultp1)) { | ||
563 | /* No further normalization is needed */ | ||
564 | goto round; | ||
565 | } | ||
566 | |||
567 | /* Discover first one bit to determine shift amount. | ||
568 | * Use a modified binary search. We have already | ||
569 | * shifted the result one position right and still | ||
570 | * not found a one so the remainder of the extension | ||
571 | * must be zero and simplifies rounding. */ | ||
572 | /* Scan bytes */ | ||
573 | while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) { | ||
574 | Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4); | ||
575 | result_exponent -= 8; | ||
576 | } | ||
577 | /* Now narrow it down to the nibble */ | ||
578 | if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) { | ||
579 | /* The lower nibble contains the | ||
580 | * normalizing one */ | ||
581 | Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4); | ||
582 | result_exponent -= 4; | ||
583 | } | ||
584 | /* Select case where first bit is set (already | ||
585 | * normalized) otherwise select the proper shift. */ | ||
586 | jumpsize = Dbl_hiddenhigh3mantissa(resultp1); | ||
587 | if (jumpsize <= 7) switch(jumpsize) { | ||
588 | case 1: | ||
589 | Dblext_leftshiftby3(resultp1,resultp2,resultp3, | ||
590 | resultp4); | ||
591 | result_exponent -= 3; | ||
592 | break; | ||
593 | case 2: | ||
594 | case 3: | ||
595 | Dblext_leftshiftby2(resultp1,resultp2,resultp3, | ||
596 | resultp4); | ||
597 | result_exponent -= 2; | ||
598 | break; | ||
599 | case 4: | ||
600 | case 5: | ||
601 | case 6: | ||
602 | case 7: | ||
603 | Dblext_leftshiftby1(resultp1,resultp2,resultp3, | ||
604 | resultp4); | ||
605 | result_exponent -= 1; | ||
606 | break; | ||
607 | } | ||
608 | } /* end if (hidden...)... */ | ||
609 | /* Fall through and round */ | ||
610 | } /* end if (save < 0)... */ | ||
611 | else { | ||
612 | /* Add magnitudes */ | ||
613 | Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4, | ||
614 | rightp1,rightp2,rightp3,rightp4, | ||
615 | /*to*/resultp1,resultp2,resultp3,resultp4); | ||
616 | sign_save = Dbl_signextendedsign(resultp1); | ||
617 | if (Dbl_isone_hiddenoverflow(resultp1)) { | ||
618 | /* Prenormalization required. */ | ||
619 | Dblext_arithrightshiftby1(resultp1,resultp2,resultp3, | ||
620 | resultp4); | ||
621 | result_exponent++; | ||
622 | } /* end if hiddenoverflow... */ | ||
623 | } /* end else ...add magnitudes... */ | ||
624 | |||
625 | /* Round the result. If the extension and lower two words are | ||
626 | * all zeros, then the result is exact. Otherwise round in the | ||
627 | * correct direction. Underflow is possible. If a postnormalization | ||
628 | * is necessary, then the mantissa is all zeros so no shift is needed. | ||
629 | */ | ||
630 | round: | ||
631 | if (result_exponent <= 0 && !Is_underflowtrap_enabled()) { | ||
632 | Dblext_denormalize(resultp1,resultp2,resultp3,resultp4, | ||
633 | result_exponent,is_tiny); | ||
634 | } | ||
635 | Dbl_set_sign(resultp1,/*using*/sign_save); | ||
636 | if (Dblext_isnotzero_mantissap3(resultp3) || | ||
637 | Dblext_isnotzero_mantissap4(resultp4)) { | ||
638 | inexact = TRUE; | ||
639 | switch(Rounding_mode()) { | ||
640 | case ROUNDNEAREST: /* The default. */ | ||
641 | if (Dblext_isone_highp3(resultp3)) { | ||
642 | /* at least 1/2 ulp */ | ||
643 | if (Dblext_isnotzero_low31p3(resultp3) || | ||
644 | Dblext_isnotzero_mantissap4(resultp4) || | ||
645 | Dblext_isone_lowp2(resultp2)) { | ||
646 | /* either exactly half way and odd or | ||
647 | * more than 1/2ulp */ | ||
648 | Dbl_increment(resultp1,resultp2); | ||
649 | } | ||
650 | } | ||
651 | break; | ||
652 | |||
653 | case ROUNDPLUS: | ||
654 | if (Dbl_iszero_sign(resultp1)) { | ||
655 | /* Round up positive results */ | ||
656 | Dbl_increment(resultp1,resultp2); | ||
657 | } | ||
658 | break; | ||
659 | |||
660 | case ROUNDMINUS: | ||
661 | if (Dbl_isone_sign(resultp1)) { | ||
662 | /* Round down negative results */ | ||
663 | Dbl_increment(resultp1,resultp2); | ||
664 | } | ||
665 | |||
666 | case ROUNDZERO:; | ||
667 | /* truncate is simple */ | ||
668 | } /* end switch... */ | ||
669 | if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++; | ||
670 | } | ||
671 | if (result_exponent >= DBL_INFINITY_EXPONENT) { | ||
672 | /* trap if OVERFLOWTRAP enabled */ | ||
673 | if (Is_overflowtrap_enabled()) { | ||
674 | /* | ||
675 | * Adjust bias of result | ||
676 | */ | ||
677 | Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl); | ||
678 | Dbl_copytoptr(resultp1,resultp2,dstptr); | ||
679 | if (inexact) | ||
680 | if (Is_inexacttrap_enabled()) | ||
681 | return (OPC_2E_OVERFLOWEXCEPTION | | ||
682 | OPC_2E_INEXACTEXCEPTION); | ||
683 | else Set_inexactflag(); | ||
684 | return (OPC_2E_OVERFLOWEXCEPTION); | ||
685 | } | ||
686 | inexact = TRUE; | ||
687 | Set_overflowflag(); | ||
688 | /* set result to infinity or largest number */ | ||
689 | Dbl_setoverflow(resultp1,resultp2); | ||
690 | |||
691 | } else if (result_exponent <= 0) { /* underflow case */ | ||
692 | if (Is_underflowtrap_enabled()) { | ||
693 | /* | ||
694 | * Adjust bias of result | ||
695 | */ | ||
696 | Dbl_setwrapped_exponent(resultp1,result_exponent,unfl); | ||
697 | Dbl_copytoptr(resultp1,resultp2,dstptr); | ||
698 | if (inexact) | ||
699 | if (Is_inexacttrap_enabled()) | ||
700 | return (OPC_2E_UNDERFLOWEXCEPTION | | ||
701 | OPC_2E_INEXACTEXCEPTION); | ||
702 | else Set_inexactflag(); | ||
703 | return(OPC_2E_UNDERFLOWEXCEPTION); | ||
704 | } | ||
705 | else if (inexact && is_tiny) Set_underflowflag(); | ||
706 | } | ||
707 | else Dbl_set_exponent(resultp1,result_exponent); | ||
708 | Dbl_copytoptr(resultp1,resultp2,dstptr); | ||
709 | if (inexact) | ||
710 | if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION); | ||
711 | else Set_inexactflag(); | ||
712 | return(NOEXCEPTION); | ||
713 | } | ||
714 | |||
715 | /* | ||
716 | * Double Floating-point Multiply Negate Fused Add | ||
717 | */ | ||
718 | |||
719 | dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr) | ||
720 | |||
721 | dbl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr; | ||
722 | unsigned int *status; | ||
723 | { | ||
724 | unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2; | ||
725 | register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4; | ||
726 | unsigned int rightp1, rightp2, rightp3, rightp4; | ||
727 | unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0; | ||
728 | register int mpy_exponent, add_exponent, count; | ||
729 | boolean inexact = FALSE, is_tiny = FALSE; | ||
730 | |||
731 | unsigned int signlessleft1, signlessright1, save; | ||
732 | register int result_exponent, diff_exponent; | ||
733 | int sign_save, jumpsize; | ||
734 | |||
735 | Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2); | ||
736 | Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2); | ||
737 | Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2); | ||
738 | |||
739 | /* | ||
740 | * set sign bit of result of multiply | ||
741 | */ | ||
742 | if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1)) | ||
743 | Dbl_setzerop1(resultp1); | ||
744 | else | ||
745 | Dbl_setnegativezerop1(resultp1); | ||
746 | |||
747 | /* | ||
748 | * Generate multiply exponent | ||
749 | */ | ||
750 | mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS; | ||
751 | |||
752 | /* | ||
753 | * check first operand for NaN's or infinity | ||
754 | */ | ||
755 | if (Dbl_isinfinity_exponent(opnd1p1)) { | ||
756 | if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) { | ||
757 | if (Dbl_isnotnan(opnd2p1,opnd2p2) && | ||
758 | Dbl_isnotnan(opnd3p1,opnd3p2)) { | ||
759 | if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) { | ||
760 | /* | ||
761 | * invalid since operands are infinity | ||
762 | * and zero | ||
763 | */ | ||
764 | if (Is_invalidtrap_enabled()) | ||
765 | return(OPC_2E_INVALIDEXCEPTION); | ||
766 | Set_invalidflag(); | ||
767 | Dbl_makequietnan(resultp1,resultp2); | ||
768 | Dbl_copytoptr(resultp1,resultp2,dstptr); | ||
769 | return(NOEXCEPTION); | ||
770 | } | ||
771 | /* | ||
772 | * Check third operand for infinity with a | ||
773 | * sign opposite of the multiply result | ||
774 | */ | ||
775 | if (Dbl_isinfinity(opnd3p1,opnd3p2) && | ||
776 | (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) { | ||
777 | /* | ||
778 | * invalid since attempting a magnitude | ||
779 | * subtraction of infinities | ||
780 | */ | ||
781 | if (Is_invalidtrap_enabled()) | ||
782 | return(OPC_2E_INVALIDEXCEPTION); | ||
783 | Set_invalidflag(); | ||
784 | Dbl_makequietnan(resultp1,resultp2); | ||
785 | Dbl_copytoptr(resultp1,resultp2,dstptr); | ||
786 | return(NOEXCEPTION); | ||
787 | } | ||
788 | |||
789 | /* | ||
790 | * return infinity | ||
791 | */ | ||
792 | Dbl_setinfinity_exponentmantissa(resultp1,resultp2); | ||
793 | Dbl_copytoptr(resultp1,resultp2,dstptr); | ||
794 | return(NOEXCEPTION); | ||
795 | } | ||
796 | } | ||
797 | else { | ||
798 | /* | ||
799 | * is NaN; signaling or quiet? | ||
800 | */ | ||
801 | if (Dbl_isone_signaling(opnd1p1)) { | ||
802 | /* trap if INVALIDTRAP enabled */ | ||
803 | if (Is_invalidtrap_enabled()) | ||
804 | return(OPC_2E_INVALIDEXCEPTION); | ||
805 | /* make NaN quiet */ | ||
806 | Set_invalidflag(); | ||
807 | Dbl_set_quiet(opnd1p1); | ||
808 | } | ||
809 | /* | ||
810 | * is second operand a signaling NaN? | ||
811 | */ | ||
812 | else if (Dbl_is_signalingnan(opnd2p1)) { | ||
813 | /* trap if INVALIDTRAP enabled */ | ||
814 | if (Is_invalidtrap_enabled()) | ||
815 | return(OPC_2E_INVALIDEXCEPTION); | ||
816 | /* make NaN quiet */ | ||
817 | Set_invalidflag(); | ||
818 | Dbl_set_quiet(opnd2p1); | ||
819 | Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); | ||
820 | return(NOEXCEPTION); | ||
821 | } | ||
822 | /* | ||
823 | * is third operand a signaling NaN? | ||
824 | */ | ||
825 | else if (Dbl_is_signalingnan(opnd3p1)) { | ||
826 | /* trap if INVALIDTRAP enabled */ | ||
827 | if (Is_invalidtrap_enabled()) | ||
828 | return(OPC_2E_INVALIDEXCEPTION); | ||
829 | /* make NaN quiet */ | ||
830 | Set_invalidflag(); | ||
831 | Dbl_set_quiet(opnd3p1); | ||
832 | Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); | ||
833 | return(NOEXCEPTION); | ||
834 | } | ||
835 | /* | ||
836 | * return quiet NaN | ||
837 | */ | ||
838 | Dbl_copytoptr(opnd1p1,opnd1p2,dstptr); | ||
839 | return(NOEXCEPTION); | ||
840 | } | ||
841 | } | ||
842 | |||
843 | /* | ||
844 | * check second operand for NaN's or infinity | ||
845 | */ | ||
846 | if (Dbl_isinfinity_exponent(opnd2p1)) { | ||
847 | if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) { | ||
848 | if (Dbl_isnotnan(opnd3p1,opnd3p2)) { | ||
849 | if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) { | ||
850 | /* | ||
851 | * invalid since multiply operands are | ||
852 | * zero & infinity | ||
853 | */ | ||
854 | if (Is_invalidtrap_enabled()) | ||
855 | return(OPC_2E_INVALIDEXCEPTION); | ||
856 | Set_invalidflag(); | ||
857 | Dbl_makequietnan(opnd2p1,opnd2p2); | ||
858 | Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); | ||
859 | return(NOEXCEPTION); | ||
860 | } | ||
861 | |||
862 | /* | ||
863 | * Check third operand for infinity with a | ||
864 | * sign opposite of the multiply result | ||
865 | */ | ||
866 | if (Dbl_isinfinity(opnd3p1,opnd3p2) && | ||
867 | (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) { | ||
868 | /* | ||
869 | * invalid since attempting a magnitude | ||
870 | * subtraction of infinities | ||
871 | */ | ||
872 | if (Is_invalidtrap_enabled()) | ||
873 | return(OPC_2E_INVALIDEXCEPTION); | ||
874 | Set_invalidflag(); | ||
875 | Dbl_makequietnan(resultp1,resultp2); | ||
876 | Dbl_copytoptr(resultp1,resultp2,dstptr); | ||
877 | return(NOEXCEPTION); | ||
878 | } | ||
879 | |||
880 | /* | ||
881 | * return infinity | ||
882 | */ | ||
883 | Dbl_setinfinity_exponentmantissa(resultp1,resultp2); | ||
884 | Dbl_copytoptr(resultp1,resultp2,dstptr); | ||
885 | return(NOEXCEPTION); | ||
886 | } | ||
887 | } | ||
888 | else { | ||
889 | /* | ||
890 | * is NaN; signaling or quiet? | ||
891 | */ | ||
892 | if (Dbl_isone_signaling(opnd2p1)) { | ||
893 | /* trap if INVALIDTRAP enabled */ | ||
894 | if (Is_invalidtrap_enabled()) | ||
895 | return(OPC_2E_INVALIDEXCEPTION); | ||
896 | /* make NaN quiet */ | ||
897 | Set_invalidflag(); | ||
898 | Dbl_set_quiet(opnd2p1); | ||
899 | } | ||
900 | /* | ||
901 | * is third operand a signaling NaN? | ||
902 | */ | ||
903 | else if (Dbl_is_signalingnan(opnd3p1)) { | ||
904 | /* trap if INVALIDTRAP enabled */ | ||
905 | if (Is_invalidtrap_enabled()) | ||
906 | return(OPC_2E_INVALIDEXCEPTION); | ||
907 | /* make NaN quiet */ | ||
908 | Set_invalidflag(); | ||
909 | Dbl_set_quiet(opnd3p1); | ||
910 | Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); | ||
911 | return(NOEXCEPTION); | ||
912 | } | ||
913 | /* | ||
914 | * return quiet NaN | ||
915 | */ | ||
916 | Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); | ||
917 | return(NOEXCEPTION); | ||
918 | } | ||
919 | } | ||
920 | |||
921 | /* | ||
922 | * check third operand for NaN's or infinity | ||
923 | */ | ||
924 | if (Dbl_isinfinity_exponent(opnd3p1)) { | ||
925 | if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) { | ||
926 | /* return infinity */ | ||
927 | Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); | ||
928 | return(NOEXCEPTION); | ||
929 | } else { | ||
930 | /* | ||
931 | * is NaN; signaling or quiet? | ||
932 | */ | ||
933 | if (Dbl_isone_signaling(opnd3p1)) { | ||
934 | /* trap if INVALIDTRAP enabled */ | ||
935 | if (Is_invalidtrap_enabled()) | ||
936 | return(OPC_2E_INVALIDEXCEPTION); | ||
937 | /* make NaN quiet */ | ||
938 | Set_invalidflag(); | ||
939 | Dbl_set_quiet(opnd3p1); | ||
940 | } | ||
941 | /* | ||
942 | * return quiet NaN | ||
943 | */ | ||
944 | Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); | ||
945 | return(NOEXCEPTION); | ||
946 | } | ||
947 | } | ||
948 | |||
949 | /* | ||
950 | * Generate multiply mantissa | ||
951 | */ | ||
952 | if (Dbl_isnotzero_exponent(opnd1p1)) { | ||
953 | /* set hidden bit */ | ||
954 | Dbl_clear_signexponent_set_hidden(opnd1p1); | ||
955 | } | ||
956 | else { | ||
957 | /* check for zero */ | ||
958 | if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) { | ||
959 | /* | ||
960 | * Perform the add opnd3 with zero here. | ||
961 | */ | ||
962 | if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) { | ||
963 | if (Is_rounding_mode(ROUNDMINUS)) { | ||
964 | Dbl_or_signs(opnd3p1,resultp1); | ||
965 | } else { | ||
966 | Dbl_and_signs(opnd3p1,resultp1); | ||
967 | } | ||
968 | } | ||
969 | /* | ||
970 | * Now let's check for trapped underflow case. | ||
971 | */ | ||
972 | else if (Dbl_iszero_exponent(opnd3p1) && | ||
973 | Is_underflowtrap_enabled()) { | ||
974 | /* need to normalize results mantissa */ | ||
975 | sign_save = Dbl_signextendedsign(opnd3p1); | ||
976 | result_exponent = 0; | ||
977 | Dbl_leftshiftby1(opnd3p1,opnd3p2); | ||
978 | Dbl_normalize(opnd3p1,opnd3p2,result_exponent); | ||
979 | Dbl_set_sign(opnd3p1,/*using*/sign_save); | ||
980 | Dbl_setwrapped_exponent(opnd3p1,result_exponent, | ||
981 | unfl); | ||
982 | Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); | ||
983 | /* inexact = FALSE */ | ||
984 | return(OPC_2E_UNDERFLOWEXCEPTION); | ||
985 | } | ||
986 | Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); | ||
987 | return(NOEXCEPTION); | ||
988 | } | ||
989 | /* is denormalized, adjust exponent */ | ||
990 | Dbl_clear_signexponent(opnd1p1); | ||
991 | Dbl_leftshiftby1(opnd1p1,opnd1p2); | ||
992 | Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent); | ||
993 | } | ||
994 | /* opnd2 needs to have hidden bit set with msb in hidden bit */ | ||
995 | if (Dbl_isnotzero_exponent(opnd2p1)) { | ||
996 | Dbl_clear_signexponent_set_hidden(opnd2p1); | ||
997 | } | ||
998 | else { | ||
999 | /* check for zero */ | ||
1000 | if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) { | ||
1001 | /* | ||
1002 | * Perform the add opnd3 with zero here. | ||
1003 | */ | ||
1004 | if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) { | ||
1005 | if (Is_rounding_mode(ROUNDMINUS)) { | ||
1006 | Dbl_or_signs(opnd3p1,resultp1); | ||
1007 | } else { | ||
1008 | Dbl_and_signs(opnd3p1,resultp1); | ||
1009 | } | ||
1010 | } | ||
1011 | /* | ||
1012 | * Now let's check for trapped underflow case. | ||
1013 | */ | ||
1014 | else if (Dbl_iszero_exponent(opnd3p1) && | ||
1015 | Is_underflowtrap_enabled()) { | ||
1016 | /* need to normalize results mantissa */ | ||
1017 | sign_save = Dbl_signextendedsign(opnd3p1); | ||
1018 | result_exponent = 0; | ||
1019 | Dbl_leftshiftby1(opnd3p1,opnd3p2); | ||
1020 | Dbl_normalize(opnd3p1,opnd3p2,result_exponent); | ||
1021 | Dbl_set_sign(opnd3p1,/*using*/sign_save); | ||
1022 | Dbl_setwrapped_exponent(opnd3p1,result_exponent, | ||
1023 | unfl); | ||
1024 | Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); | ||
1025 | /* inexact = FALSE */ | ||
1026 | return(OPC_2E_UNDERFLOWEXCEPTION); | ||
1027 | } | ||
1028 | Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); | ||
1029 | return(NOEXCEPTION); | ||
1030 | } | ||
1031 | /* is denormalized; want to normalize */ | ||
1032 | Dbl_clear_signexponent(opnd2p1); | ||
1033 | Dbl_leftshiftby1(opnd2p1,opnd2p2); | ||
1034 | Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent); | ||
1035 | } | ||
1036 | |||
1037 | /* Multiply the first two source mantissas together */ | ||
1038 | |||
1039 | /* | ||
1040 | * The intermediate result will be kept in tmpres, | ||
1041 | * which needs enough room for 106 bits of mantissa, | ||
1042 | * so lets call it a Double extended. | ||
1043 | */ | ||
1044 | Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4); | ||
1045 | |||
1046 | /* | ||
1047 | * Four bits at a time are inspected in each loop, and a | ||
1048 | * simple shift and add multiply algorithm is used. | ||
1049 | */ | ||
1050 | for (count = DBL_P-1; count >= 0; count -= 4) { | ||
1051 | Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4); | ||
1052 | if (Dbit28p2(opnd1p2)) { | ||
1053 | /* Fourword_add should be an ADD followed by 3 ADDC's */ | ||
1054 | Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, | ||
1055 | opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0); | ||
1056 | } | ||
1057 | if (Dbit29p2(opnd1p2)) { | ||
1058 | Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, | ||
1059 | opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0); | ||
1060 | } | ||
1061 | if (Dbit30p2(opnd1p2)) { | ||
1062 | Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, | ||
1063 | opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0); | ||
1064 | } | ||
1065 | if (Dbit31p2(opnd1p2)) { | ||
1066 | Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, | ||
1067 | opnd2p1, opnd2p2, 0, 0); | ||
1068 | } | ||
1069 | Dbl_rightshiftby4(opnd1p1,opnd1p2); | ||
1070 | } | ||
1071 | if (Is_dexthiddenoverflow(tmpresp1)) { | ||
1072 | /* result mantissa >= 2 (mantissa overflow) */ | ||
1073 | mpy_exponent++; | ||
1074 | Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4); | ||
1075 | } | ||
1076 | |||
1077 | /* | ||
1078 | * Restore the sign of the mpy result which was saved in resultp1. | ||
1079 | * The exponent will continue to be kept in mpy_exponent. | ||
1080 | */ | ||
1081 | Dblext_set_sign(tmpresp1,Dbl_sign(resultp1)); | ||
1082 | |||
1083 | /* | ||
1084 | * No rounding is required, since the result of the multiply | ||
1085 | * is exact in the extended format. | ||
1086 | */ | ||
1087 | |||
1088 | /* | ||
1089 | * Now we are ready to perform the add portion of the operation. | ||
1090 | * | ||
1091 | * The exponents need to be kept as integers for now, since the | ||
1092 | * multiply result might not fit into the exponent field. We | ||
1093 | * can't overflow or underflow because of this yet, since the | ||
1094 | * add could bring the final result back into range. | ||
1095 | */ | ||
1096 | add_exponent = Dbl_exponent(opnd3p1); | ||
1097 | |||
1098 | /* | ||
1099 | * Check for denormalized or zero add operand. | ||
1100 | */ | ||
1101 | if (add_exponent == 0) { | ||
1102 | /* check for zero */ | ||
1103 | if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) { | ||
1104 | /* right is zero */ | ||
1105 | /* Left can't be zero and must be result. | ||
1106 | * | ||
1107 | * The final result is now in tmpres and mpy_exponent, | ||
1108 | * and needs to be rounded and squeezed back into | ||
1109 | * double precision format from double extended. | ||
1110 | */ | ||
1111 | result_exponent = mpy_exponent; | ||
1112 | Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4, | ||
1113 | resultp1,resultp2,resultp3,resultp4); | ||
1114 | sign_save = Dbl_signextendedsign(resultp1);/*save sign*/ | ||
1115 | goto round; | ||
1116 | } | ||
1117 | |||
1118 | /* | ||
1119 | * Neither are zeroes. | ||
1120 | * Adjust exponent and normalize add operand. | ||
1121 | */ | ||
1122 | sign_save = Dbl_signextendedsign(opnd3p1); /* save sign */ | ||
1123 | Dbl_clear_signexponent(opnd3p1); | ||
1124 | Dbl_leftshiftby1(opnd3p1,opnd3p2); | ||
1125 | Dbl_normalize(opnd3p1,opnd3p2,add_exponent); | ||
1126 | Dbl_set_sign(opnd3p1,sign_save); /* restore sign */ | ||
1127 | } else { | ||
1128 | Dbl_clear_exponent_set_hidden(opnd3p1); | ||
1129 | } | ||
1130 | /* | ||
1131 | * Copy opnd3 to the double extended variable called right. | ||
1132 | */ | ||
1133 | Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4); | ||
1134 | |||
1135 | /* | ||
1136 | * A zero "save" helps discover equal operands (for later), | ||
1137 | * and is used in swapping operands (if needed). | ||
1138 | */ | ||
1139 | Dblext_xortointp1(tmpresp1,rightp1,/*to*/save); | ||
1140 | |||
1141 | /* | ||
1142 | * Compare magnitude of operands. | ||
1143 | */ | ||
1144 | Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1); | ||
1145 | Dblext_copytoint_exponentmantissap1(rightp1,signlessright1); | ||
1146 | if (mpy_exponent < add_exponent || mpy_exponent == add_exponent && | ||
1147 | Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){ | ||
1148 | /* | ||
1149 | * Set the left operand to the larger one by XOR swap. | ||
1150 | * First finish the first word "save". | ||
1151 | */ | ||
1152 | Dblext_xorfromintp1(save,rightp1,/*to*/rightp1); | ||
1153 | Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1); | ||
1154 | Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4, | ||
1155 | rightp2,rightp3,rightp4); | ||
1156 | /* also setup exponents used in rest of routine */ | ||
1157 | diff_exponent = add_exponent - mpy_exponent; | ||
1158 | result_exponent = add_exponent; | ||
1159 | } else { | ||
1160 | /* also setup exponents used in rest of routine */ | ||
1161 | diff_exponent = mpy_exponent - add_exponent; | ||
1162 | result_exponent = mpy_exponent; | ||
1163 | } | ||
1164 | /* Invariant: left is not smaller than right. */ | ||
1165 | |||
1166 | /* | ||
1167 | * Special case alignment of operands that would force alignment | ||
1168 | * beyond the extent of the extension. A further optimization | ||
1169 | * could special case this but only reduces the path length for | ||
1170 | * this infrequent case. | ||
1171 | */ | ||
1172 | if (diff_exponent > DBLEXT_THRESHOLD) { | ||
1173 | diff_exponent = DBLEXT_THRESHOLD; | ||
1174 | } | ||
1175 | |||
1176 | /* Align right operand by shifting it to the right */ | ||
1177 | Dblext_clear_sign(rightp1); | ||
1178 | Dblext_right_align(rightp1,rightp2,rightp3,rightp4, | ||
1179 | /*shifted by*/diff_exponent); | ||
1180 | |||
1181 | /* Treat sum and difference of the operands separately. */ | ||
1182 | if ((int)save < 0) { | ||
1183 | /* | ||
1184 | * Difference of the two operands. Overflow can occur if the | ||
1185 | * multiply overflowed. A borrow can occur out of the hidden | ||
1186 | * bit and force a post normalization phase. | ||
1187 | */ | ||
1188 | Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4, | ||
1189 | rightp1,rightp2,rightp3,rightp4, | ||
1190 | resultp1,resultp2,resultp3,resultp4); | ||
1191 | sign_save = Dbl_signextendedsign(resultp1); | ||
1192 | if (Dbl_iszero_hidden(resultp1)) { | ||
1193 | /* Handle normalization */ | ||
1194 | /* A straight foward algorithm would now shift the | ||
1195 | * result and extension left until the hidden bit | ||
1196 | * becomes one. Not all of the extension bits need | ||
1197 | * participate in the shift. Only the two most | ||
1198 | * significant bits (round and guard) are needed. | ||
1199 | * If only a single shift is needed then the guard | ||
1200 | * bit becomes a significant low order bit and the | ||
1201 | * extension must participate in the rounding. | ||
1202 | * If more than a single shift is needed, then all | ||
1203 | * bits to the right of the guard bit are zeros, | ||
1204 | * and the guard bit may or may not be zero. */ | ||
1205 | Dblext_leftshiftby1(resultp1,resultp2,resultp3, | ||
1206 | resultp4); | ||
1207 | |||
1208 | /* Need to check for a zero result. The sign and | ||
1209 | * exponent fields have already been zeroed. The more | ||
1210 | * efficient test of the full object can be used. | ||
1211 | */ | ||
1212 | if (Dblext_iszero(resultp1,resultp2,resultp3,resultp4)) { | ||
1213 | /* Must have been "x-x" or "x+(-x)". */ | ||
1214 | if (Is_rounding_mode(ROUNDMINUS)) | ||
1215 | Dbl_setone_sign(resultp1); | ||
1216 | Dbl_copytoptr(resultp1,resultp2,dstptr); | ||
1217 | return(NOEXCEPTION); | ||
1218 | } | ||
1219 | result_exponent--; | ||
1220 | |||
1221 | /* Look to see if normalization is finished. */ | ||
1222 | if (Dbl_isone_hidden(resultp1)) { | ||
1223 | /* No further normalization is needed */ | ||
1224 | goto round; | ||
1225 | } | ||
1226 | |||
1227 | /* Discover first one bit to determine shift amount. | ||
1228 | * Use a modified binary search. We have already | ||
1229 | * shifted the result one position right and still | ||
1230 | * not found a one so the remainder of the extension | ||
1231 | * must be zero and simplifies rounding. */ | ||
1232 | /* Scan bytes */ | ||
1233 | while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) { | ||
1234 | Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4); | ||
1235 | result_exponent -= 8; | ||
1236 | } | ||
1237 | /* Now narrow it down to the nibble */ | ||
1238 | if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) { | ||
1239 | /* The lower nibble contains the | ||
1240 | * normalizing one */ | ||
1241 | Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4); | ||
1242 | result_exponent -= 4; | ||
1243 | } | ||
1244 | /* Select case where first bit is set (already | ||
1245 | * normalized) otherwise select the proper shift. */ | ||
1246 | jumpsize = Dbl_hiddenhigh3mantissa(resultp1); | ||
1247 | if (jumpsize <= 7) switch(jumpsize) { | ||
1248 | case 1: | ||
1249 | Dblext_leftshiftby3(resultp1,resultp2,resultp3, | ||
1250 | resultp4); | ||
1251 | result_exponent -= 3; | ||
1252 | break; | ||
1253 | case 2: | ||
1254 | case 3: | ||
1255 | Dblext_leftshiftby2(resultp1,resultp2,resultp3, | ||
1256 | resultp4); | ||
1257 | result_exponent -= 2; | ||
1258 | break; | ||
1259 | case 4: | ||
1260 | case 5: | ||
1261 | case 6: | ||
1262 | case 7: | ||
1263 | Dblext_leftshiftby1(resultp1,resultp2,resultp3, | ||
1264 | resultp4); | ||
1265 | result_exponent -= 1; | ||
1266 | break; | ||
1267 | } | ||
1268 | } /* end if (hidden...)... */ | ||
1269 | /* Fall through and round */ | ||
1270 | } /* end if (save < 0)... */ | ||
1271 | else { | ||
1272 | /* Add magnitudes */ | ||
1273 | Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4, | ||
1274 | rightp1,rightp2,rightp3,rightp4, | ||
1275 | /*to*/resultp1,resultp2,resultp3,resultp4); | ||
1276 | sign_save = Dbl_signextendedsign(resultp1); | ||
1277 | if (Dbl_isone_hiddenoverflow(resultp1)) { | ||
1278 | /* Prenormalization required. */ | ||
1279 | Dblext_arithrightshiftby1(resultp1,resultp2,resultp3, | ||
1280 | resultp4); | ||
1281 | result_exponent++; | ||
1282 | } /* end if hiddenoverflow... */ | ||
1283 | } /* end else ...add magnitudes... */ | ||
1284 | |||
1285 | /* Round the result. If the extension and lower two words are | ||
1286 | * all zeros, then the result is exact. Otherwise round in the | ||
1287 | * correct direction. Underflow is possible. If a postnormalization | ||
1288 | * is necessary, then the mantissa is all zeros so no shift is needed. | ||
1289 | */ | ||
1290 | round: | ||
1291 | if (result_exponent <= 0 && !Is_underflowtrap_enabled()) { | ||
1292 | Dblext_denormalize(resultp1,resultp2,resultp3,resultp4, | ||
1293 | result_exponent,is_tiny); | ||
1294 | } | ||
1295 | Dbl_set_sign(resultp1,/*using*/sign_save); | ||
1296 | if (Dblext_isnotzero_mantissap3(resultp3) || | ||
1297 | Dblext_isnotzero_mantissap4(resultp4)) { | ||
1298 | inexact = TRUE; | ||
1299 | switch(Rounding_mode()) { | ||
1300 | case ROUNDNEAREST: /* The default. */ | ||
1301 | if (Dblext_isone_highp3(resultp3)) { | ||
1302 | /* at least 1/2 ulp */ | ||
1303 | if (Dblext_isnotzero_low31p3(resultp3) || | ||
1304 | Dblext_isnotzero_mantissap4(resultp4) || | ||
1305 | Dblext_isone_lowp2(resultp2)) { | ||
1306 | /* either exactly half way and odd or | ||
1307 | * more than 1/2ulp */ | ||
1308 | Dbl_increment(resultp1,resultp2); | ||
1309 | } | ||
1310 | } | ||
1311 | break; | ||
1312 | |||
1313 | case ROUNDPLUS: | ||
1314 | if (Dbl_iszero_sign(resultp1)) { | ||
1315 | /* Round up positive results */ | ||
1316 | Dbl_increment(resultp1,resultp2); | ||
1317 | } | ||
1318 | break; | ||
1319 | |||
1320 | case ROUNDMINUS: | ||
1321 | if (Dbl_isone_sign(resultp1)) { | ||
1322 | /* Round down negative results */ | ||
1323 | Dbl_increment(resultp1,resultp2); | ||
1324 | } | ||
1325 | |||
1326 | case ROUNDZERO:; | ||
1327 | /* truncate is simple */ | ||
1328 | } /* end switch... */ | ||
1329 | if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++; | ||
1330 | } | ||
1331 | if (result_exponent >= DBL_INFINITY_EXPONENT) { | ||
1332 | /* Overflow */ | ||
1333 | if (Is_overflowtrap_enabled()) { | ||
1334 | /* | ||
1335 | * Adjust bias of result | ||
1336 | */ | ||
1337 | Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl); | ||
1338 | Dbl_copytoptr(resultp1,resultp2,dstptr); | ||
1339 | if (inexact) | ||
1340 | if (Is_inexacttrap_enabled()) | ||
1341 | return (OPC_2E_OVERFLOWEXCEPTION | | ||
1342 | OPC_2E_INEXACTEXCEPTION); | ||
1343 | else Set_inexactflag(); | ||
1344 | return (OPC_2E_OVERFLOWEXCEPTION); | ||
1345 | } | ||
1346 | inexact = TRUE; | ||
1347 | Set_overflowflag(); | ||
1348 | Dbl_setoverflow(resultp1,resultp2); | ||
1349 | } else if (result_exponent <= 0) { /* underflow case */ | ||
1350 | if (Is_underflowtrap_enabled()) { | ||
1351 | /* | ||
1352 | * Adjust bias of result | ||
1353 | */ | ||
1354 | Dbl_setwrapped_exponent(resultp1,result_exponent,unfl); | ||
1355 | Dbl_copytoptr(resultp1,resultp2,dstptr); | ||
1356 | if (inexact) | ||
1357 | if (Is_inexacttrap_enabled()) | ||
1358 | return (OPC_2E_UNDERFLOWEXCEPTION | | ||
1359 | OPC_2E_INEXACTEXCEPTION); | ||
1360 | else Set_inexactflag(); | ||
1361 | return(OPC_2E_UNDERFLOWEXCEPTION); | ||
1362 | } | ||
1363 | else if (inexact && is_tiny) Set_underflowflag(); | ||
1364 | } | ||
1365 | else Dbl_set_exponent(resultp1,result_exponent); | ||
1366 | Dbl_copytoptr(resultp1,resultp2,dstptr); | ||
1367 | if (inexact) | ||
1368 | if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION); | ||
1369 | else Set_inexactflag(); | ||
1370 | return(NOEXCEPTION); | ||
1371 | } | ||
1372 | |||
1373 | /* | ||
1374 | * Single Floating-point Multiply Fused Add | ||
1375 | */ | ||
1376 | |||
1377 | sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr) | ||
1378 | |||
1379 | sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr; | ||
1380 | unsigned int *status; | ||
1381 | { | ||
1382 | unsigned int opnd1, opnd2, opnd3; | ||
1383 | register unsigned int tmpresp1, tmpresp2; | ||
1384 | unsigned int rightp1, rightp2; | ||
1385 | unsigned int resultp1, resultp2 = 0; | ||
1386 | register int mpy_exponent, add_exponent, count; | ||
1387 | boolean inexact = FALSE, is_tiny = FALSE; | ||
1388 | |||
1389 | unsigned int signlessleft1, signlessright1, save; | ||
1390 | register int result_exponent, diff_exponent; | ||
1391 | int sign_save, jumpsize; | ||
1392 | |||
1393 | Sgl_copyfromptr(src1ptr,opnd1); | ||
1394 | Sgl_copyfromptr(src2ptr,opnd2); | ||
1395 | Sgl_copyfromptr(src3ptr,opnd3); | ||
1396 | |||
1397 | /* | ||
1398 | * set sign bit of result of multiply | ||
1399 | */ | ||
1400 | if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2)) | ||
1401 | Sgl_setnegativezero(resultp1); | ||
1402 | else Sgl_setzero(resultp1); | ||
1403 | |||
1404 | /* | ||
1405 | * Generate multiply exponent | ||
1406 | */ | ||
1407 | mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS; | ||
1408 | |||
1409 | /* | ||
1410 | * check first operand for NaN's or infinity | ||
1411 | */ | ||
1412 | if (Sgl_isinfinity_exponent(opnd1)) { | ||
1413 | if (Sgl_iszero_mantissa(opnd1)) { | ||
1414 | if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) { | ||
1415 | if (Sgl_iszero_exponentmantissa(opnd2)) { | ||
1416 | /* | ||
1417 | * invalid since operands are infinity | ||
1418 | * and zero | ||
1419 | */ | ||
1420 | if (Is_invalidtrap_enabled()) | ||
1421 | return(OPC_2E_INVALIDEXCEPTION); | ||
1422 | Set_invalidflag(); | ||
1423 | Sgl_makequietnan(resultp1); | ||
1424 | Sgl_copytoptr(resultp1,dstptr); | ||
1425 | return(NOEXCEPTION); | ||
1426 | } | ||
1427 | /* | ||
1428 | * Check third operand for infinity with a | ||
1429 | * sign opposite of the multiply result | ||
1430 | */ | ||
1431 | if (Sgl_isinfinity(opnd3) && | ||
1432 | (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) { | ||
1433 | /* | ||
1434 | * invalid since attempting a magnitude | ||
1435 | * subtraction of infinities | ||
1436 | */ | ||
1437 | if (Is_invalidtrap_enabled()) | ||
1438 | return(OPC_2E_INVALIDEXCEPTION); | ||
1439 | Set_invalidflag(); | ||
1440 | Sgl_makequietnan(resultp1); | ||
1441 | Sgl_copytoptr(resultp1,dstptr); | ||
1442 | return(NOEXCEPTION); | ||
1443 | } | ||
1444 | |||
1445 | /* | ||
1446 | * return infinity | ||
1447 | */ | ||
1448 | Sgl_setinfinity_exponentmantissa(resultp1); | ||
1449 | Sgl_copytoptr(resultp1,dstptr); | ||
1450 | return(NOEXCEPTION); | ||
1451 | } | ||
1452 | } | ||
1453 | else { | ||
1454 | /* | ||
1455 | * is NaN; signaling or quiet? | ||
1456 | */ | ||
1457 | if (Sgl_isone_signaling(opnd1)) { | ||
1458 | /* trap if INVALIDTRAP enabled */ | ||
1459 | if (Is_invalidtrap_enabled()) | ||
1460 | return(OPC_2E_INVALIDEXCEPTION); | ||
1461 | /* make NaN quiet */ | ||
1462 | Set_invalidflag(); | ||
1463 | Sgl_set_quiet(opnd1); | ||
1464 | } | ||
1465 | /* | ||
1466 | * is second operand a signaling NaN? | ||
1467 | */ | ||
1468 | else if (Sgl_is_signalingnan(opnd2)) { | ||
1469 | /* trap if INVALIDTRAP enabled */ | ||
1470 | if (Is_invalidtrap_enabled()) | ||
1471 | return(OPC_2E_INVALIDEXCEPTION); | ||
1472 | /* make NaN quiet */ | ||
1473 | Set_invalidflag(); | ||
1474 | Sgl_set_quiet(opnd2); | ||
1475 | Sgl_copytoptr(opnd2,dstptr); | ||
1476 | return(NOEXCEPTION); | ||
1477 | } | ||
1478 | /* | ||
1479 | * is third operand a signaling NaN? | ||
1480 | */ | ||
1481 | else if (Sgl_is_signalingnan(opnd3)) { | ||
1482 | /* trap if INVALIDTRAP enabled */ | ||
1483 | if (Is_invalidtrap_enabled()) | ||
1484 | return(OPC_2E_INVALIDEXCEPTION); | ||
1485 | /* make NaN quiet */ | ||
1486 | Set_invalidflag(); | ||
1487 | Sgl_set_quiet(opnd3); | ||
1488 | Sgl_copytoptr(opnd3,dstptr); | ||
1489 | return(NOEXCEPTION); | ||
1490 | } | ||
1491 | /* | ||
1492 | * return quiet NaN | ||
1493 | */ | ||
1494 | Sgl_copytoptr(opnd1,dstptr); | ||
1495 | return(NOEXCEPTION); | ||
1496 | } | ||
1497 | } | ||
1498 | |||
1499 | /* | ||
1500 | * check second operand for NaN's or infinity | ||
1501 | */ | ||
1502 | if (Sgl_isinfinity_exponent(opnd2)) { | ||
1503 | if (Sgl_iszero_mantissa(opnd2)) { | ||
1504 | if (Sgl_isnotnan(opnd3)) { | ||
1505 | if (Sgl_iszero_exponentmantissa(opnd1)) { | ||
1506 | /* | ||
1507 | * invalid since multiply operands are | ||
1508 | * zero & infinity | ||
1509 | */ | ||
1510 | if (Is_invalidtrap_enabled()) | ||
1511 | return(OPC_2E_INVALIDEXCEPTION); | ||
1512 | Set_invalidflag(); | ||
1513 | Sgl_makequietnan(opnd2); | ||
1514 | Sgl_copytoptr(opnd2,dstptr); | ||
1515 | return(NOEXCEPTION); | ||
1516 | } | ||
1517 | |||
1518 | /* | ||
1519 | * Check third operand for infinity with a | ||
1520 | * sign opposite of the multiply result | ||
1521 | */ | ||
1522 | if (Sgl_isinfinity(opnd3) && | ||
1523 | (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) { | ||
1524 | /* | ||
1525 | * invalid since attempting a magnitude | ||
1526 | * subtraction of infinities | ||
1527 | */ | ||
1528 | if (Is_invalidtrap_enabled()) | ||
1529 | return(OPC_2E_INVALIDEXCEPTION); | ||
1530 | Set_invalidflag(); | ||
1531 | Sgl_makequietnan(resultp1); | ||
1532 | Sgl_copytoptr(resultp1,dstptr); | ||
1533 | return(NOEXCEPTION); | ||
1534 | } | ||
1535 | |||
1536 | /* | ||
1537 | * return infinity | ||
1538 | */ | ||
1539 | Sgl_setinfinity_exponentmantissa(resultp1); | ||
1540 | Sgl_copytoptr(resultp1,dstptr); | ||
1541 | return(NOEXCEPTION); | ||
1542 | } | ||
1543 | } | ||
1544 | else { | ||
1545 | /* | ||
1546 | * is NaN; signaling or quiet? | ||
1547 | */ | ||
1548 | if (Sgl_isone_signaling(opnd2)) { | ||
1549 | /* trap if INVALIDTRAP enabled */ | ||
1550 | if (Is_invalidtrap_enabled()) | ||
1551 | return(OPC_2E_INVALIDEXCEPTION); | ||
1552 | /* make NaN quiet */ | ||
1553 | Set_invalidflag(); | ||
1554 | Sgl_set_quiet(opnd2); | ||
1555 | } | ||
1556 | /* | ||
1557 | * is third operand a signaling NaN? | ||
1558 | */ | ||
1559 | else if (Sgl_is_signalingnan(opnd3)) { | ||
1560 | /* trap if INVALIDTRAP enabled */ | ||
1561 | if (Is_invalidtrap_enabled()) | ||
1562 | return(OPC_2E_INVALIDEXCEPTION); | ||
1563 | /* make NaN quiet */ | ||
1564 | Set_invalidflag(); | ||
1565 | Sgl_set_quiet(opnd3); | ||
1566 | Sgl_copytoptr(opnd3,dstptr); | ||
1567 | return(NOEXCEPTION); | ||
1568 | } | ||
1569 | /* | ||
1570 | * return quiet NaN | ||
1571 | */ | ||
1572 | Sgl_copytoptr(opnd2,dstptr); | ||
1573 | return(NOEXCEPTION); | ||
1574 | } | ||
1575 | } | ||
1576 | |||
1577 | /* | ||
1578 | * check third operand for NaN's or infinity | ||
1579 | */ | ||
1580 | if (Sgl_isinfinity_exponent(opnd3)) { | ||
1581 | if (Sgl_iszero_mantissa(opnd3)) { | ||
1582 | /* return infinity */ | ||
1583 | Sgl_copytoptr(opnd3,dstptr); | ||
1584 | return(NOEXCEPTION); | ||
1585 | } else { | ||
1586 | /* | ||
1587 | * is NaN; signaling or quiet? | ||
1588 | */ | ||
1589 | if (Sgl_isone_signaling(opnd3)) { | ||
1590 | /* trap if INVALIDTRAP enabled */ | ||
1591 | if (Is_invalidtrap_enabled()) | ||
1592 | return(OPC_2E_INVALIDEXCEPTION); | ||
1593 | /* make NaN quiet */ | ||
1594 | Set_invalidflag(); | ||
1595 | Sgl_set_quiet(opnd3); | ||
1596 | } | ||
1597 | /* | ||
1598 | * return quiet NaN | ||
1599 | */ | ||
1600 | Sgl_copytoptr(opnd3,dstptr); | ||
1601 | return(NOEXCEPTION); | ||
1602 | } | ||
1603 | } | ||
1604 | |||
1605 | /* | ||
1606 | * Generate multiply mantissa | ||
1607 | */ | ||
1608 | if (Sgl_isnotzero_exponent(opnd1)) { | ||
1609 | /* set hidden bit */ | ||
1610 | Sgl_clear_signexponent_set_hidden(opnd1); | ||
1611 | } | ||
1612 | else { | ||
1613 | /* check for zero */ | ||
1614 | if (Sgl_iszero_mantissa(opnd1)) { | ||
1615 | /* | ||
1616 | * Perform the add opnd3 with zero here. | ||
1617 | */ | ||
1618 | if (Sgl_iszero_exponentmantissa(opnd3)) { | ||
1619 | if (Is_rounding_mode(ROUNDMINUS)) { | ||
1620 | Sgl_or_signs(opnd3,resultp1); | ||
1621 | } else { | ||
1622 | Sgl_and_signs(opnd3,resultp1); | ||
1623 | } | ||
1624 | } | ||
1625 | /* | ||
1626 | * Now let's check for trapped underflow case. | ||
1627 | */ | ||
1628 | else if (Sgl_iszero_exponent(opnd3) && | ||
1629 | Is_underflowtrap_enabled()) { | ||
1630 | /* need to normalize results mantissa */ | ||
1631 | sign_save = Sgl_signextendedsign(opnd3); | ||
1632 | result_exponent = 0; | ||
1633 | Sgl_leftshiftby1(opnd3); | ||
1634 | Sgl_normalize(opnd3,result_exponent); | ||
1635 | Sgl_set_sign(opnd3,/*using*/sign_save); | ||
1636 | Sgl_setwrapped_exponent(opnd3,result_exponent, | ||
1637 | unfl); | ||
1638 | Sgl_copytoptr(opnd3,dstptr); | ||
1639 | /* inexact = FALSE */ | ||
1640 | return(OPC_2E_UNDERFLOWEXCEPTION); | ||
1641 | } | ||
1642 | Sgl_copytoptr(opnd3,dstptr); | ||
1643 | return(NOEXCEPTION); | ||
1644 | } | ||
1645 | /* is denormalized, adjust exponent */ | ||
1646 | Sgl_clear_signexponent(opnd1); | ||
1647 | Sgl_leftshiftby1(opnd1); | ||
1648 | Sgl_normalize(opnd1,mpy_exponent); | ||
1649 | } | ||
1650 | /* opnd2 needs to have hidden bit set with msb in hidden bit */ | ||
1651 | if (Sgl_isnotzero_exponent(opnd2)) { | ||
1652 | Sgl_clear_signexponent_set_hidden(opnd2); | ||
1653 | } | ||
1654 | else { | ||
1655 | /* check for zero */ | ||
1656 | if (Sgl_iszero_mantissa(opnd2)) { | ||
1657 | /* | ||
1658 | * Perform the add opnd3 with zero here. | ||
1659 | */ | ||
1660 | if (Sgl_iszero_exponentmantissa(opnd3)) { | ||
1661 | if (Is_rounding_mode(ROUNDMINUS)) { | ||
1662 | Sgl_or_signs(opnd3,resultp1); | ||
1663 | } else { | ||
1664 | Sgl_and_signs(opnd3,resultp1); | ||
1665 | } | ||
1666 | } | ||
1667 | /* | ||
1668 | * Now let's check for trapped underflow case. | ||
1669 | */ | ||
1670 | else if (Sgl_iszero_exponent(opnd3) && | ||
1671 | Is_underflowtrap_enabled()) { | ||
1672 | /* need to normalize results mantissa */ | ||
1673 | sign_save = Sgl_signextendedsign(opnd3); | ||
1674 | result_exponent = 0; | ||
1675 | Sgl_leftshiftby1(opnd3); | ||
1676 | Sgl_normalize(opnd3,result_exponent); | ||
1677 | Sgl_set_sign(opnd3,/*using*/sign_save); | ||
1678 | Sgl_setwrapped_exponent(opnd3,result_exponent, | ||
1679 | unfl); | ||
1680 | Sgl_copytoptr(opnd3,dstptr); | ||
1681 | /* inexact = FALSE */ | ||
1682 | return(OPC_2E_UNDERFLOWEXCEPTION); | ||
1683 | } | ||
1684 | Sgl_copytoptr(opnd3,dstptr); | ||
1685 | return(NOEXCEPTION); | ||
1686 | } | ||
1687 | /* is denormalized; want to normalize */ | ||
1688 | Sgl_clear_signexponent(opnd2); | ||
1689 | Sgl_leftshiftby1(opnd2); | ||
1690 | Sgl_normalize(opnd2,mpy_exponent); | ||
1691 | } | ||
1692 | |||
1693 | /* Multiply the first two source mantissas together */ | ||
1694 | |||
1695 | /* | ||
1696 | * The intermediate result will be kept in tmpres, | ||
1697 | * which needs enough room for 106 bits of mantissa, | ||
1698 | * so lets call it a Double extended. | ||
1699 | */ | ||
1700 | Sglext_setzero(tmpresp1,tmpresp2); | ||
1701 | |||
1702 | /* | ||
1703 | * Four bits at a time are inspected in each loop, and a | ||
1704 | * simple shift and add multiply algorithm is used. | ||
1705 | */ | ||
1706 | for (count = SGL_P-1; count >= 0; count -= 4) { | ||
1707 | Sglext_rightshiftby4(tmpresp1,tmpresp2); | ||
1708 | if (Sbit28(opnd1)) { | ||
1709 | /* Twoword_add should be an ADD followed by 2 ADDC's */ | ||
1710 | Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0); | ||
1711 | } | ||
1712 | if (Sbit29(opnd1)) { | ||
1713 | Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0); | ||
1714 | } | ||
1715 | if (Sbit30(opnd1)) { | ||
1716 | Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0); | ||
1717 | } | ||
1718 | if (Sbit31(opnd1)) { | ||
1719 | Twoword_add(tmpresp1, tmpresp2, opnd2, 0); | ||
1720 | } | ||
1721 | Sgl_rightshiftby4(opnd1); | ||
1722 | } | ||
1723 | if (Is_sexthiddenoverflow(tmpresp1)) { | ||
1724 | /* result mantissa >= 2 (mantissa overflow) */ | ||
1725 | mpy_exponent++; | ||
1726 | Sglext_rightshiftby4(tmpresp1,tmpresp2); | ||
1727 | } else { | ||
1728 | Sglext_rightshiftby3(tmpresp1,tmpresp2); | ||
1729 | } | ||
1730 | |||
1731 | /* | ||
1732 | * Restore the sign of the mpy result which was saved in resultp1. | ||
1733 | * The exponent will continue to be kept in mpy_exponent. | ||
1734 | */ | ||
1735 | Sglext_set_sign(tmpresp1,Sgl_sign(resultp1)); | ||
1736 | |||
1737 | /* | ||
1738 | * No rounding is required, since the result of the multiply | ||
1739 | * is exact in the extended format. | ||
1740 | */ | ||
1741 | |||
1742 | /* | ||
1743 | * Now we are ready to perform the add portion of the operation. | ||
1744 | * | ||
1745 | * The exponents need to be kept as integers for now, since the | ||
1746 | * multiply result might not fit into the exponent field. We | ||
1747 | * can't overflow or underflow because of this yet, since the | ||
1748 | * add could bring the final result back into range. | ||
1749 | */ | ||
1750 | add_exponent = Sgl_exponent(opnd3); | ||
1751 | |||
1752 | /* | ||
1753 | * Check for denormalized or zero add operand. | ||
1754 | */ | ||
1755 | if (add_exponent == 0) { | ||
1756 | /* check for zero */ | ||
1757 | if (Sgl_iszero_mantissa(opnd3)) { | ||
1758 | /* right is zero */ | ||
1759 | /* Left can't be zero and must be result. | ||
1760 | * | ||
1761 | * The final result is now in tmpres and mpy_exponent, | ||
1762 | * and needs to be rounded and squeezed back into | ||
1763 | * double precision format from double extended. | ||
1764 | */ | ||
1765 | result_exponent = mpy_exponent; | ||
1766 | Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2); | ||
1767 | sign_save = Sgl_signextendedsign(resultp1);/*save sign*/ | ||
1768 | goto round; | ||
1769 | } | ||
1770 | |||
1771 | /* | ||
1772 | * Neither are zeroes. | ||
1773 | * Adjust exponent and normalize add operand. | ||
1774 | */ | ||
1775 | sign_save = Sgl_signextendedsign(opnd3); /* save sign */ | ||
1776 | Sgl_clear_signexponent(opnd3); | ||
1777 | Sgl_leftshiftby1(opnd3); | ||
1778 | Sgl_normalize(opnd3,add_exponent); | ||
1779 | Sgl_set_sign(opnd3,sign_save); /* restore sign */ | ||
1780 | } else { | ||
1781 | Sgl_clear_exponent_set_hidden(opnd3); | ||
1782 | } | ||
1783 | /* | ||
1784 | * Copy opnd3 to the double extended variable called right. | ||
1785 | */ | ||
1786 | Sgl_copyto_sglext(opnd3,rightp1,rightp2); | ||
1787 | |||
1788 | /* | ||
1789 | * A zero "save" helps discover equal operands (for later), | ||
1790 | * and is used in swapping operands (if needed). | ||
1791 | */ | ||
1792 | Sglext_xortointp1(tmpresp1,rightp1,/*to*/save); | ||
1793 | |||
1794 | /* | ||
1795 | * Compare magnitude of operands. | ||
1796 | */ | ||
1797 | Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1); | ||
1798 | Sglext_copytoint_exponentmantissa(rightp1,signlessright1); | ||
1799 | if (mpy_exponent < add_exponent || mpy_exponent == add_exponent && | ||
1800 | Sglext_ismagnitudeless(signlessleft1,signlessright1)) { | ||
1801 | /* | ||
1802 | * Set the left operand to the larger one by XOR swap. | ||
1803 | * First finish the first word "save". | ||
1804 | */ | ||
1805 | Sglext_xorfromintp1(save,rightp1,/*to*/rightp1); | ||
1806 | Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1); | ||
1807 | Sglext_swap_lower(tmpresp2,rightp2); | ||
1808 | /* also setup exponents used in rest of routine */ | ||
1809 | diff_exponent = add_exponent - mpy_exponent; | ||
1810 | result_exponent = add_exponent; | ||
1811 | } else { | ||
1812 | /* also setup exponents used in rest of routine */ | ||
1813 | diff_exponent = mpy_exponent - add_exponent; | ||
1814 | result_exponent = mpy_exponent; | ||
1815 | } | ||
1816 | /* Invariant: left is not smaller than right. */ | ||
1817 | |||
1818 | /* | ||
1819 | * Special case alignment of operands that would force alignment | ||
1820 | * beyond the extent of the extension. A further optimization | ||
1821 | * could special case this but only reduces the path length for | ||
1822 | * this infrequent case. | ||
1823 | */ | ||
1824 | if (diff_exponent > SGLEXT_THRESHOLD) { | ||
1825 | diff_exponent = SGLEXT_THRESHOLD; | ||
1826 | } | ||
1827 | |||
1828 | /* Align right operand by shifting it to the right */ | ||
1829 | Sglext_clear_sign(rightp1); | ||
1830 | Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent); | ||
1831 | |||
1832 | /* Treat sum and difference of the operands separately. */ | ||
1833 | if ((int)save < 0) { | ||
1834 | /* | ||
1835 | * Difference of the two operands. Overflow can occur if the | ||
1836 | * multiply overflowed. A borrow can occur out of the hidden | ||
1837 | * bit and force a post normalization phase. | ||
1838 | */ | ||
1839 | Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2, | ||
1840 | resultp1,resultp2); | ||
1841 | sign_save = Sgl_signextendedsign(resultp1); | ||
1842 | if (Sgl_iszero_hidden(resultp1)) { | ||
1843 | /* Handle normalization */ | ||
1844 | /* A straight foward algorithm would now shift the | ||
1845 | * result and extension left until the hidden bit | ||
1846 | * becomes one. Not all of the extension bits need | ||
1847 | * participate in the shift. Only the two most | ||
1848 | * significant bits (round and guard) are needed. | ||
1849 | * If only a single shift is needed then the guard | ||
1850 | * bit becomes a significant low order bit and the | ||
1851 | * extension must participate in the rounding. | ||
1852 | * If more than a single shift is needed, then all | ||
1853 | * bits to the right of the guard bit are zeros, | ||
1854 | * and the guard bit may or may not be zero. */ | ||
1855 | Sglext_leftshiftby1(resultp1,resultp2); | ||
1856 | |||
1857 | /* Need to check for a zero result. The sign and | ||
1858 | * exponent fields have already been zeroed. The more | ||
1859 | * efficient test of the full object can be used. | ||
1860 | */ | ||
1861 | if (Sglext_iszero(resultp1,resultp2)) { | ||
1862 | /* Must have been "x-x" or "x+(-x)". */ | ||
1863 | if (Is_rounding_mode(ROUNDMINUS)) | ||
1864 | Sgl_setone_sign(resultp1); | ||
1865 | Sgl_copytoptr(resultp1,dstptr); | ||
1866 | return(NOEXCEPTION); | ||
1867 | } | ||
1868 | result_exponent--; | ||
1869 | |||
1870 | /* Look to see if normalization is finished. */ | ||
1871 | if (Sgl_isone_hidden(resultp1)) { | ||
1872 | /* No further normalization is needed */ | ||
1873 | goto round; | ||
1874 | } | ||
1875 | |||
1876 | /* Discover first one bit to determine shift amount. | ||
1877 | * Use a modified binary search. We have already | ||
1878 | * shifted the result one position right and still | ||
1879 | * not found a one so the remainder of the extension | ||
1880 | * must be zero and simplifies rounding. */ | ||
1881 | /* Scan bytes */ | ||
1882 | while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) { | ||
1883 | Sglext_leftshiftby8(resultp1,resultp2); | ||
1884 | result_exponent -= 8; | ||
1885 | } | ||
1886 | /* Now narrow it down to the nibble */ | ||
1887 | if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) { | ||
1888 | /* The lower nibble contains the | ||
1889 | * normalizing one */ | ||
1890 | Sglext_leftshiftby4(resultp1,resultp2); | ||
1891 | result_exponent -= 4; | ||
1892 | } | ||
1893 | /* Select case where first bit is set (already | ||
1894 | * normalized) otherwise select the proper shift. */ | ||
1895 | jumpsize = Sgl_hiddenhigh3mantissa(resultp1); | ||
1896 | if (jumpsize <= 7) switch(jumpsize) { | ||
1897 | case 1: | ||
1898 | Sglext_leftshiftby3(resultp1,resultp2); | ||
1899 | result_exponent -= 3; | ||
1900 | break; | ||
1901 | case 2: | ||
1902 | case 3: | ||
1903 | Sglext_leftshiftby2(resultp1,resultp2); | ||
1904 | result_exponent -= 2; | ||
1905 | break; | ||
1906 | case 4: | ||
1907 | case 5: | ||
1908 | case 6: | ||
1909 | case 7: | ||
1910 | Sglext_leftshiftby1(resultp1,resultp2); | ||
1911 | result_exponent -= 1; | ||
1912 | break; | ||
1913 | } | ||
1914 | } /* end if (hidden...)... */ | ||
1915 | /* Fall through and round */ | ||
1916 | } /* end if (save < 0)... */ | ||
1917 | else { | ||
1918 | /* Add magnitudes */ | ||
1919 | Sglext_addition(tmpresp1,tmpresp2, | ||
1920 | rightp1,rightp2, /*to*/resultp1,resultp2); | ||
1921 | sign_save = Sgl_signextendedsign(resultp1); | ||
1922 | if (Sgl_isone_hiddenoverflow(resultp1)) { | ||
1923 | /* Prenormalization required. */ | ||
1924 | Sglext_arithrightshiftby1(resultp1,resultp2); | ||
1925 | result_exponent++; | ||
1926 | } /* end if hiddenoverflow... */ | ||
1927 | } /* end else ...add magnitudes... */ | ||
1928 | |||
1929 | /* Round the result. If the extension and lower two words are | ||
1930 | * all zeros, then the result is exact. Otherwise round in the | ||
1931 | * correct direction. Underflow is possible. If a postnormalization | ||
1932 | * is necessary, then the mantissa is all zeros so no shift is needed. | ||
1933 | */ | ||
1934 | round: | ||
1935 | if (result_exponent <= 0 && !Is_underflowtrap_enabled()) { | ||
1936 | Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny); | ||
1937 | } | ||
1938 | Sgl_set_sign(resultp1,/*using*/sign_save); | ||
1939 | if (Sglext_isnotzero_mantissap2(resultp2)) { | ||
1940 | inexact = TRUE; | ||
1941 | switch(Rounding_mode()) { | ||
1942 | case ROUNDNEAREST: /* The default. */ | ||
1943 | if (Sglext_isone_highp2(resultp2)) { | ||
1944 | /* at least 1/2 ulp */ | ||
1945 | if (Sglext_isnotzero_low31p2(resultp2) || | ||
1946 | Sglext_isone_lowp1(resultp1)) { | ||
1947 | /* either exactly half way and odd or | ||
1948 | * more than 1/2ulp */ | ||
1949 | Sgl_increment(resultp1); | ||
1950 | } | ||
1951 | } | ||
1952 | break; | ||
1953 | |||
1954 | case ROUNDPLUS: | ||
1955 | if (Sgl_iszero_sign(resultp1)) { | ||
1956 | /* Round up positive results */ | ||
1957 | Sgl_increment(resultp1); | ||
1958 | } | ||
1959 | break; | ||
1960 | |||
1961 | case ROUNDMINUS: | ||
1962 | if (Sgl_isone_sign(resultp1)) { | ||
1963 | /* Round down negative results */ | ||
1964 | Sgl_increment(resultp1); | ||
1965 | } | ||
1966 | |||
1967 | case ROUNDZERO:; | ||
1968 | /* truncate is simple */ | ||
1969 | } /* end switch... */ | ||
1970 | if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++; | ||
1971 | } | ||
1972 | if (result_exponent >= SGL_INFINITY_EXPONENT) { | ||
1973 | /* Overflow */ | ||
1974 | if (Is_overflowtrap_enabled()) { | ||
1975 | /* | ||
1976 | * Adjust bias of result | ||
1977 | */ | ||
1978 | Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl); | ||
1979 | Sgl_copytoptr(resultp1,dstptr); | ||
1980 | if (inexact) | ||
1981 | if (Is_inexacttrap_enabled()) | ||
1982 | return (OPC_2E_OVERFLOWEXCEPTION | | ||
1983 | OPC_2E_INEXACTEXCEPTION); | ||
1984 | else Set_inexactflag(); | ||
1985 | return (OPC_2E_OVERFLOWEXCEPTION); | ||
1986 | } | ||
1987 | inexact = TRUE; | ||
1988 | Set_overflowflag(); | ||
1989 | Sgl_setoverflow(resultp1); | ||
1990 | } else if (result_exponent <= 0) { /* underflow case */ | ||
1991 | if (Is_underflowtrap_enabled()) { | ||
1992 | /* | ||
1993 | * Adjust bias of result | ||
1994 | */ | ||
1995 | Sgl_setwrapped_exponent(resultp1,result_exponent,unfl); | ||
1996 | Sgl_copytoptr(resultp1,dstptr); | ||
1997 | if (inexact) | ||
1998 | if (Is_inexacttrap_enabled()) | ||
1999 | return (OPC_2E_UNDERFLOWEXCEPTION | | ||
2000 | OPC_2E_INEXACTEXCEPTION); | ||
2001 | else Set_inexactflag(); | ||
2002 | return(OPC_2E_UNDERFLOWEXCEPTION); | ||
2003 | } | ||
2004 | else if (inexact && is_tiny) Set_underflowflag(); | ||
2005 | } | ||
2006 | else Sgl_set_exponent(resultp1,result_exponent); | ||
2007 | Sgl_copytoptr(resultp1,dstptr); | ||
2008 | if (inexact) | ||
2009 | if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION); | ||
2010 | else Set_inexactflag(); | ||
2011 | return(NOEXCEPTION); | ||
2012 | } | ||
2013 | |||
2014 | /* | ||
2015 | * Single Floating-point Multiply Negate Fused Add | ||
2016 | */ | ||
2017 | |||
2018 | sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr) | ||
2019 | |||
2020 | sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr; | ||
2021 | unsigned int *status; | ||
2022 | { | ||
2023 | unsigned int opnd1, opnd2, opnd3; | ||
2024 | register unsigned int tmpresp1, tmpresp2; | ||
2025 | unsigned int rightp1, rightp2; | ||
2026 | unsigned int resultp1, resultp2 = 0; | ||
2027 | register int mpy_exponent, add_exponent, count; | ||
2028 | boolean inexact = FALSE, is_tiny = FALSE; | ||
2029 | |||
2030 | unsigned int signlessleft1, signlessright1, save; | ||
2031 | register int result_exponent, diff_exponent; | ||
2032 | int sign_save, jumpsize; | ||
2033 | |||
2034 | Sgl_copyfromptr(src1ptr,opnd1); | ||
2035 | Sgl_copyfromptr(src2ptr,opnd2); | ||
2036 | Sgl_copyfromptr(src3ptr,opnd3); | ||
2037 | |||
2038 | /* | ||
2039 | * set sign bit of result of multiply | ||
2040 | */ | ||
2041 | if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2)) | ||
2042 | Sgl_setzero(resultp1); | ||
2043 | else | ||
2044 | Sgl_setnegativezero(resultp1); | ||
2045 | |||
2046 | /* | ||
2047 | * Generate multiply exponent | ||
2048 | */ | ||
2049 | mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS; | ||
2050 | |||
2051 | /* | ||
2052 | * check first operand for NaN's or infinity | ||
2053 | */ | ||
2054 | if (Sgl_isinfinity_exponent(opnd1)) { | ||
2055 | if (Sgl_iszero_mantissa(opnd1)) { | ||
2056 | if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) { | ||
2057 | if (Sgl_iszero_exponentmantissa(opnd2)) { | ||
2058 | /* | ||
2059 | * invalid since operands are infinity | ||
2060 | * and zero | ||
2061 | */ | ||
2062 | if (Is_invalidtrap_enabled()) | ||
2063 | return(OPC_2E_INVALIDEXCEPTION); | ||
2064 | Set_invalidflag(); | ||
2065 | Sgl_makequietnan(resultp1); | ||
2066 | Sgl_copytoptr(resultp1,dstptr); | ||
2067 | return(NOEXCEPTION); | ||
2068 | } | ||
2069 | /* | ||
2070 | * Check third operand for infinity with a | ||
2071 | * sign opposite of the multiply result | ||
2072 | */ | ||
2073 | if (Sgl_isinfinity(opnd3) && | ||
2074 | (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) { | ||
2075 | /* | ||
2076 | * invalid since attempting a magnitude | ||
2077 | * subtraction of infinities | ||
2078 | */ | ||
2079 | if (Is_invalidtrap_enabled()) | ||
2080 | return(OPC_2E_INVALIDEXCEPTION); | ||
2081 | Set_invalidflag(); | ||
2082 | Sgl_makequietnan(resultp1); | ||
2083 | Sgl_copytoptr(resultp1,dstptr); | ||
2084 | return(NOEXCEPTION); | ||
2085 | } | ||
2086 | |||
2087 | /* | ||
2088 | * return infinity | ||
2089 | */ | ||
2090 | Sgl_setinfinity_exponentmantissa(resultp1); | ||
2091 | Sgl_copytoptr(resultp1,dstptr); | ||
2092 | return(NOEXCEPTION); | ||
2093 | } | ||
2094 | } | ||
2095 | else { | ||
2096 | /* | ||
2097 | * is NaN; signaling or quiet? | ||
2098 | */ | ||
2099 | if (Sgl_isone_signaling(opnd1)) { | ||
2100 | /* trap if INVALIDTRAP enabled */ | ||
2101 | if (Is_invalidtrap_enabled()) | ||
2102 | return(OPC_2E_INVALIDEXCEPTION); | ||
2103 | /* make NaN quiet */ | ||
2104 | Set_invalidflag(); | ||
2105 | Sgl_set_quiet(opnd1); | ||
2106 | } | ||
2107 | /* | ||
2108 | * is second operand a signaling NaN? | ||
2109 | */ | ||
2110 | else if (Sgl_is_signalingnan(opnd2)) { | ||
2111 | /* trap if INVALIDTRAP enabled */ | ||
2112 | if (Is_invalidtrap_enabled()) | ||
2113 | return(OPC_2E_INVALIDEXCEPTION); | ||
2114 | /* make NaN quiet */ | ||
2115 | Set_invalidflag(); | ||
2116 | Sgl_set_quiet(opnd2); | ||
2117 | Sgl_copytoptr(opnd2,dstptr); | ||
2118 | return(NOEXCEPTION); | ||
2119 | } | ||
2120 | /* | ||
2121 | * is third operand a signaling NaN? | ||
2122 | */ | ||
2123 | else if (Sgl_is_signalingnan(opnd3)) { | ||
2124 | /* trap if INVALIDTRAP enabled */ | ||
2125 | if (Is_invalidtrap_enabled()) | ||
2126 | return(OPC_2E_INVALIDEXCEPTION); | ||
2127 | /* make NaN quiet */ | ||
2128 | Set_invalidflag(); | ||
2129 | Sgl_set_quiet(opnd3); | ||
2130 | Sgl_copytoptr(opnd3,dstptr); | ||
2131 | return(NOEXCEPTION); | ||
2132 | } | ||
2133 | /* | ||
2134 | * return quiet NaN | ||
2135 | */ | ||
2136 | Sgl_copytoptr(opnd1,dstptr); | ||
2137 | return(NOEXCEPTION); | ||
2138 | } | ||
2139 | } | ||
2140 | |||
2141 | /* | ||
2142 | * check second operand for NaN's or infinity | ||
2143 | */ | ||
2144 | if (Sgl_isinfinity_exponent(opnd2)) { | ||
2145 | if (Sgl_iszero_mantissa(opnd2)) { | ||
2146 | if (Sgl_isnotnan(opnd3)) { | ||
2147 | if (Sgl_iszero_exponentmantissa(opnd1)) { | ||
2148 | /* | ||
2149 | * invalid since multiply operands are | ||
2150 | * zero & infinity | ||
2151 | */ | ||
2152 | if (Is_invalidtrap_enabled()) | ||
2153 | return(OPC_2E_INVALIDEXCEPTION); | ||
2154 | Set_invalidflag(); | ||
2155 | Sgl_makequietnan(opnd2); | ||
2156 | Sgl_copytoptr(opnd2,dstptr); | ||
2157 | return(NOEXCEPTION); | ||
2158 | } | ||
2159 | |||
2160 | /* | ||
2161 | * Check third operand for infinity with a | ||
2162 | * sign opposite of the multiply result | ||
2163 | */ | ||
2164 | if (Sgl_isinfinity(opnd3) && | ||
2165 | (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) { | ||
2166 | /* | ||
2167 | * invalid since attempting a magnitude | ||
2168 | * subtraction of infinities | ||
2169 | */ | ||
2170 | if (Is_invalidtrap_enabled()) | ||
2171 | return(OPC_2E_INVALIDEXCEPTION); | ||
2172 | Set_invalidflag(); | ||
2173 | Sgl_makequietnan(resultp1); | ||
2174 | Sgl_copytoptr(resultp1,dstptr); | ||
2175 | return(NOEXCEPTION); | ||
2176 | } | ||
2177 | |||
2178 | /* | ||
2179 | * return infinity | ||
2180 | */ | ||
2181 | Sgl_setinfinity_exponentmantissa(resultp1); | ||
2182 | Sgl_copytoptr(resultp1,dstptr); | ||
2183 | return(NOEXCEPTION); | ||
2184 | } | ||
2185 | } | ||
2186 | else { | ||
2187 | /* | ||
2188 | * is NaN; signaling or quiet? | ||
2189 | */ | ||
2190 | if (Sgl_isone_signaling(opnd2)) { | ||
2191 | /* trap if INVALIDTRAP enabled */ | ||
2192 | if (Is_invalidtrap_enabled()) | ||
2193 | return(OPC_2E_INVALIDEXCEPTION); | ||
2194 | /* make NaN quiet */ | ||
2195 | Set_invalidflag(); | ||
2196 | Sgl_set_quiet(opnd2); | ||
2197 | } | ||
2198 | /* | ||
2199 | * is third operand a signaling NaN? | ||
2200 | */ | ||
2201 | else if (Sgl_is_signalingnan(opnd3)) { | ||
2202 | /* trap if INVALIDTRAP enabled */ | ||
2203 | if (Is_invalidtrap_enabled()) | ||
2204 | return(OPC_2E_INVALIDEXCEPTION); | ||
2205 | /* make NaN quiet */ | ||
2206 | Set_invalidflag(); | ||
2207 | Sgl_set_quiet(opnd3); | ||
2208 | Sgl_copytoptr(opnd3,dstptr); | ||
2209 | return(NOEXCEPTION); | ||
2210 | } | ||
2211 | /* | ||
2212 | * return quiet NaN | ||
2213 | */ | ||
2214 | Sgl_copytoptr(opnd2,dstptr); | ||
2215 | return(NOEXCEPTION); | ||
2216 | } | ||
2217 | } | ||
2218 | |||
2219 | /* | ||
2220 | * check third operand for NaN's or infinity | ||
2221 | */ | ||
2222 | if (Sgl_isinfinity_exponent(opnd3)) { | ||
2223 | if (Sgl_iszero_mantissa(opnd3)) { | ||
2224 | /* return infinity */ | ||
2225 | Sgl_copytoptr(opnd3,dstptr); | ||
2226 | return(NOEXCEPTION); | ||
2227 | } else { | ||
2228 | /* | ||
2229 | * is NaN; signaling or quiet? | ||
2230 | */ | ||
2231 | if (Sgl_isone_signaling(opnd3)) { | ||
2232 | /* trap if INVALIDTRAP enabled */ | ||
2233 | if (Is_invalidtrap_enabled()) | ||
2234 | return(OPC_2E_INVALIDEXCEPTION); | ||
2235 | /* make NaN quiet */ | ||
2236 | Set_invalidflag(); | ||
2237 | Sgl_set_quiet(opnd3); | ||
2238 | } | ||
2239 | /* | ||
2240 | * return quiet NaN | ||
2241 | */ | ||
2242 | Sgl_copytoptr(opnd3,dstptr); | ||
2243 | return(NOEXCEPTION); | ||
2244 | } | ||
2245 | } | ||
2246 | |||
2247 | /* | ||
2248 | * Generate multiply mantissa | ||
2249 | */ | ||
2250 | if (Sgl_isnotzero_exponent(opnd1)) { | ||
2251 | /* set hidden bit */ | ||
2252 | Sgl_clear_signexponent_set_hidden(opnd1); | ||
2253 | } | ||
2254 | else { | ||
2255 | /* check for zero */ | ||
2256 | if (Sgl_iszero_mantissa(opnd1)) { | ||
2257 | /* | ||
2258 | * Perform the add opnd3 with zero here. | ||
2259 | */ | ||
2260 | if (Sgl_iszero_exponentmantissa(opnd3)) { | ||
2261 | if (Is_rounding_mode(ROUNDMINUS)) { | ||
2262 | Sgl_or_signs(opnd3,resultp1); | ||
2263 | } else { | ||
2264 | Sgl_and_signs(opnd3,resultp1); | ||
2265 | } | ||
2266 | } | ||
2267 | /* | ||
2268 | * Now let's check for trapped underflow case. | ||
2269 | */ | ||
2270 | else if (Sgl_iszero_exponent(opnd3) && | ||
2271 | Is_underflowtrap_enabled()) { | ||
2272 | /* need to normalize results mantissa */ | ||
2273 | sign_save = Sgl_signextendedsign(opnd3); | ||
2274 | result_exponent = 0; | ||
2275 | Sgl_leftshiftby1(opnd3); | ||
2276 | Sgl_normalize(opnd3,result_exponent); | ||
2277 | Sgl_set_sign(opnd3,/*using*/sign_save); | ||
2278 | Sgl_setwrapped_exponent(opnd3,result_exponent, | ||
2279 | unfl); | ||
2280 | Sgl_copytoptr(opnd3,dstptr); | ||
2281 | /* inexact = FALSE */ | ||
2282 | return(OPC_2E_UNDERFLOWEXCEPTION); | ||
2283 | } | ||
2284 | Sgl_copytoptr(opnd3,dstptr); | ||
2285 | return(NOEXCEPTION); | ||
2286 | } | ||
2287 | /* is denormalized, adjust exponent */ | ||
2288 | Sgl_clear_signexponent(opnd1); | ||
2289 | Sgl_leftshiftby1(opnd1); | ||
2290 | Sgl_normalize(opnd1,mpy_exponent); | ||
2291 | } | ||
2292 | /* opnd2 needs to have hidden bit set with msb in hidden bit */ | ||
2293 | if (Sgl_isnotzero_exponent(opnd2)) { | ||
2294 | Sgl_clear_signexponent_set_hidden(opnd2); | ||
2295 | } | ||
2296 | else { | ||
2297 | /* check for zero */ | ||
2298 | if (Sgl_iszero_mantissa(opnd2)) { | ||
2299 | /* | ||
2300 | * Perform the add opnd3 with zero here. | ||
2301 | */ | ||
2302 | if (Sgl_iszero_exponentmantissa(opnd3)) { | ||
2303 | if (Is_rounding_mode(ROUNDMINUS)) { | ||
2304 | Sgl_or_signs(opnd3,resultp1); | ||
2305 | } else { | ||
2306 | Sgl_and_signs(opnd3,resultp1); | ||
2307 | } | ||
2308 | } | ||
2309 | /* | ||
2310 | * Now let's check for trapped underflow case. | ||
2311 | */ | ||
2312 | else if (Sgl_iszero_exponent(opnd3) && | ||
2313 | Is_underflowtrap_enabled()) { | ||
2314 | /* need to normalize results mantissa */ | ||
2315 | sign_save = Sgl_signextendedsign(opnd3); | ||
2316 | result_exponent = 0; | ||
2317 | Sgl_leftshiftby1(opnd3); | ||
2318 | Sgl_normalize(opnd3,result_exponent); | ||
2319 | Sgl_set_sign(opnd3,/*using*/sign_save); | ||
2320 | Sgl_setwrapped_exponent(opnd3,result_exponent, | ||
2321 | unfl); | ||
2322 | Sgl_copytoptr(opnd3,dstptr); | ||
2323 | /* inexact = FALSE */ | ||
2324 | return(OPC_2E_UNDERFLOWEXCEPTION); | ||
2325 | } | ||
2326 | Sgl_copytoptr(opnd3,dstptr); | ||
2327 | return(NOEXCEPTION); | ||
2328 | } | ||
2329 | /* is denormalized; want to normalize */ | ||
2330 | Sgl_clear_signexponent(opnd2); | ||
2331 | Sgl_leftshiftby1(opnd2); | ||
2332 | Sgl_normalize(opnd2,mpy_exponent); | ||
2333 | } | ||
2334 | |||
2335 | /* Multiply the first two source mantissas together */ | ||
2336 | |||
2337 | /* | ||
2338 | * The intermediate result will be kept in tmpres, | ||
2339 | * which needs enough room for 106 bits of mantissa, | ||
2340 | * so lets call it a Double extended. | ||
2341 | */ | ||
2342 | Sglext_setzero(tmpresp1,tmpresp2); | ||
2343 | |||
2344 | /* | ||
2345 | * Four bits at a time are inspected in each loop, and a | ||
2346 | * simple shift and add multiply algorithm is used. | ||
2347 | */ | ||
2348 | for (count = SGL_P-1; count >= 0; count -= 4) { | ||
2349 | Sglext_rightshiftby4(tmpresp1,tmpresp2); | ||
2350 | if (Sbit28(opnd1)) { | ||
2351 | /* Twoword_add should be an ADD followed by 2 ADDC's */ | ||
2352 | Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0); | ||
2353 | } | ||
2354 | if (Sbit29(opnd1)) { | ||
2355 | Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0); | ||
2356 | } | ||
2357 | if (Sbit30(opnd1)) { | ||
2358 | Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0); | ||
2359 | } | ||
2360 | if (Sbit31(opnd1)) { | ||
2361 | Twoword_add(tmpresp1, tmpresp2, opnd2, 0); | ||
2362 | } | ||
2363 | Sgl_rightshiftby4(opnd1); | ||
2364 | } | ||
2365 | if (Is_sexthiddenoverflow(tmpresp1)) { | ||
2366 | /* result mantissa >= 2 (mantissa overflow) */ | ||
2367 | mpy_exponent++; | ||
2368 | Sglext_rightshiftby4(tmpresp1,tmpresp2); | ||
2369 | } else { | ||
2370 | Sglext_rightshiftby3(tmpresp1,tmpresp2); | ||
2371 | } | ||
2372 | |||
2373 | /* | ||
2374 | * Restore the sign of the mpy result which was saved in resultp1. | ||
2375 | * The exponent will continue to be kept in mpy_exponent. | ||
2376 | */ | ||
2377 | Sglext_set_sign(tmpresp1,Sgl_sign(resultp1)); | ||
2378 | |||
2379 | /* | ||
2380 | * No rounding is required, since the result of the multiply | ||
2381 | * is exact in the extended format. | ||
2382 | */ | ||
2383 | |||
2384 | /* | ||
2385 | * Now we are ready to perform the add portion of the operation. | ||
2386 | * | ||
2387 | * The exponents need to be kept as integers for now, since the | ||
2388 | * multiply result might not fit into the exponent field. We | ||
2389 | * can't overflow or underflow because of this yet, since the | ||
2390 | * add could bring the final result back into range. | ||
2391 | */ | ||
2392 | add_exponent = Sgl_exponent(opnd3); | ||
2393 | |||
2394 | /* | ||
2395 | * Check for denormalized or zero add operand. | ||
2396 | */ | ||
2397 | if (add_exponent == 0) { | ||
2398 | /* check for zero */ | ||
2399 | if (Sgl_iszero_mantissa(opnd3)) { | ||
2400 | /* right is zero */ | ||
2401 | /* Left can't be zero and must be result. | ||
2402 | * | ||
2403 | * The final result is now in tmpres and mpy_exponent, | ||
2404 | * and needs to be rounded and squeezed back into | ||
2405 | * double precision format from double extended. | ||
2406 | */ | ||
2407 | result_exponent = mpy_exponent; | ||
2408 | Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2); | ||
2409 | sign_save = Sgl_signextendedsign(resultp1);/*save sign*/ | ||
2410 | goto round; | ||
2411 | } | ||
2412 | |||
2413 | /* | ||
2414 | * Neither are zeroes. | ||
2415 | * Adjust exponent and normalize add operand. | ||
2416 | */ | ||
2417 | sign_save = Sgl_signextendedsign(opnd3); /* save sign */ | ||
2418 | Sgl_clear_signexponent(opnd3); | ||
2419 | Sgl_leftshiftby1(opnd3); | ||
2420 | Sgl_normalize(opnd3,add_exponent); | ||
2421 | Sgl_set_sign(opnd3,sign_save); /* restore sign */ | ||
2422 | } else { | ||
2423 | Sgl_clear_exponent_set_hidden(opnd3); | ||
2424 | } | ||
2425 | /* | ||
2426 | * Copy opnd3 to the double extended variable called right. | ||
2427 | */ | ||
2428 | Sgl_copyto_sglext(opnd3,rightp1,rightp2); | ||
2429 | |||
2430 | /* | ||
2431 | * A zero "save" helps discover equal operands (for later), | ||
2432 | * and is used in swapping operands (if needed). | ||
2433 | */ | ||
2434 | Sglext_xortointp1(tmpresp1,rightp1,/*to*/save); | ||
2435 | |||
2436 | /* | ||
2437 | * Compare magnitude of operands. | ||
2438 | */ | ||
2439 | Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1); | ||
2440 | Sglext_copytoint_exponentmantissa(rightp1,signlessright1); | ||
2441 | if (mpy_exponent < add_exponent || mpy_exponent == add_exponent && | ||
2442 | Sglext_ismagnitudeless(signlessleft1,signlessright1)) { | ||
2443 | /* | ||
2444 | * Set the left operand to the larger one by XOR swap. | ||
2445 | * First finish the first word "save". | ||
2446 | */ | ||
2447 | Sglext_xorfromintp1(save,rightp1,/*to*/rightp1); | ||
2448 | Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1); | ||
2449 | Sglext_swap_lower(tmpresp2,rightp2); | ||
2450 | /* also setup exponents used in rest of routine */ | ||
2451 | diff_exponent = add_exponent - mpy_exponent; | ||
2452 | result_exponent = add_exponent; | ||
2453 | } else { | ||
2454 | /* also setup exponents used in rest of routine */ | ||
2455 | diff_exponent = mpy_exponent - add_exponent; | ||
2456 | result_exponent = mpy_exponent; | ||
2457 | } | ||
2458 | /* Invariant: left is not smaller than right. */ | ||
2459 | |||
2460 | /* | ||
2461 | * Special case alignment of operands that would force alignment | ||
2462 | * beyond the extent of the extension. A further optimization | ||
2463 | * could special case this but only reduces the path length for | ||
2464 | * this infrequent case. | ||
2465 | */ | ||
2466 | if (diff_exponent > SGLEXT_THRESHOLD) { | ||
2467 | diff_exponent = SGLEXT_THRESHOLD; | ||
2468 | } | ||
2469 | |||
2470 | /* Align right operand by shifting it to the right */ | ||
2471 | Sglext_clear_sign(rightp1); | ||
2472 | Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent); | ||
2473 | |||
2474 | /* Treat sum and difference of the operands separately. */ | ||
2475 | if ((int)save < 0) { | ||
2476 | /* | ||
2477 | * Difference of the two operands. Overflow can occur if the | ||
2478 | * multiply overflowed. A borrow can occur out of the hidden | ||
2479 | * bit and force a post normalization phase. | ||
2480 | */ | ||
2481 | Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2, | ||
2482 | resultp1,resultp2); | ||
2483 | sign_save = Sgl_signextendedsign(resultp1); | ||
2484 | if (Sgl_iszero_hidden(resultp1)) { | ||
2485 | /* Handle normalization */ | ||
2486 | /* A straight foward algorithm would now shift the | ||
2487 | * result and extension left until the hidden bit | ||
2488 | * becomes one. Not all of the extension bits need | ||
2489 | * participate in the shift. Only the two most | ||
2490 | * significant bits (round and guard) are needed. | ||
2491 | * If only a single shift is needed then the guard | ||
2492 | * bit becomes a significant low order bit and the | ||
2493 | * extension must participate in the rounding. | ||
2494 | * If more than a single shift is needed, then all | ||
2495 | * bits to the right of the guard bit are zeros, | ||
2496 | * and the guard bit may or may not be zero. */ | ||
2497 | Sglext_leftshiftby1(resultp1,resultp2); | ||
2498 | |||
2499 | /* Need to check for a zero result. The sign and | ||
2500 | * exponent fields have already been zeroed. The more | ||
2501 | * efficient test of the full object can be used. | ||
2502 | */ | ||
2503 | if (Sglext_iszero(resultp1,resultp2)) { | ||
2504 | /* Must have been "x-x" or "x+(-x)". */ | ||
2505 | if (Is_rounding_mode(ROUNDMINUS)) | ||
2506 | Sgl_setone_sign(resultp1); | ||
2507 | Sgl_copytoptr(resultp1,dstptr); | ||
2508 | return(NOEXCEPTION); | ||
2509 | } | ||
2510 | result_exponent--; | ||
2511 | |||
2512 | /* Look to see if normalization is finished. */ | ||
2513 | if (Sgl_isone_hidden(resultp1)) { | ||
2514 | /* No further normalization is needed */ | ||
2515 | goto round; | ||
2516 | } | ||
2517 | |||
2518 | /* Discover first one bit to determine shift amount. | ||
2519 | * Use a modified binary search. We have already | ||
2520 | * shifted the result one position right and still | ||
2521 | * not found a one so the remainder of the extension | ||
2522 | * must be zero and simplifies rounding. */ | ||
2523 | /* Scan bytes */ | ||
2524 | while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) { | ||
2525 | Sglext_leftshiftby8(resultp1,resultp2); | ||
2526 | result_exponent -= 8; | ||
2527 | } | ||
2528 | /* Now narrow it down to the nibble */ | ||
2529 | if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) { | ||
2530 | /* The lower nibble contains the | ||
2531 | * normalizing one */ | ||
2532 | Sglext_leftshiftby4(resultp1,resultp2); | ||
2533 | result_exponent -= 4; | ||
2534 | } | ||
2535 | /* Select case where first bit is set (already | ||
2536 | * normalized) otherwise select the proper shift. */ | ||
2537 | jumpsize = Sgl_hiddenhigh3mantissa(resultp1); | ||
2538 | if (jumpsize <= 7) switch(jumpsize) { | ||
2539 | case 1: | ||
2540 | Sglext_leftshiftby3(resultp1,resultp2); | ||
2541 | result_exponent -= 3; | ||
2542 | break; | ||
2543 | case 2: | ||
2544 | case 3: | ||
2545 | Sglext_leftshiftby2(resultp1,resultp2); | ||
2546 | result_exponent -= 2; | ||
2547 | break; | ||
2548 | case 4: | ||
2549 | case 5: | ||
2550 | case 6: | ||
2551 | case 7: | ||
2552 | Sglext_leftshiftby1(resultp1,resultp2); | ||
2553 | result_exponent -= 1; | ||
2554 | break; | ||
2555 | } | ||
2556 | } /* end if (hidden...)... */ | ||
2557 | /* Fall through and round */ | ||
2558 | } /* end if (save < 0)... */ | ||
2559 | else { | ||
2560 | /* Add magnitudes */ | ||
2561 | Sglext_addition(tmpresp1,tmpresp2, | ||
2562 | rightp1,rightp2, /*to*/resultp1,resultp2); | ||
2563 | sign_save = Sgl_signextendedsign(resultp1); | ||
2564 | if (Sgl_isone_hiddenoverflow(resultp1)) { | ||
2565 | /* Prenormalization required. */ | ||
2566 | Sglext_arithrightshiftby1(resultp1,resultp2); | ||
2567 | result_exponent++; | ||
2568 | } /* end if hiddenoverflow... */ | ||
2569 | } /* end else ...add magnitudes... */ | ||
2570 | |||
2571 | /* Round the result. If the extension and lower two words are | ||
2572 | * all zeros, then the result is exact. Otherwise round in the | ||
2573 | * correct direction. Underflow is possible. If a postnormalization | ||
2574 | * is necessary, then the mantissa is all zeros so no shift is needed. | ||
2575 | */ | ||
2576 | round: | ||
2577 | if (result_exponent <= 0 && !Is_underflowtrap_enabled()) { | ||
2578 | Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny); | ||
2579 | } | ||
2580 | Sgl_set_sign(resultp1,/*using*/sign_save); | ||
2581 | if (Sglext_isnotzero_mantissap2(resultp2)) { | ||
2582 | inexact = TRUE; | ||
2583 | switch(Rounding_mode()) { | ||
2584 | case ROUNDNEAREST: /* The default. */ | ||
2585 | if (Sglext_isone_highp2(resultp2)) { | ||
2586 | /* at least 1/2 ulp */ | ||
2587 | if (Sglext_isnotzero_low31p2(resultp2) || | ||
2588 | Sglext_isone_lowp1(resultp1)) { | ||
2589 | /* either exactly half way and odd or | ||
2590 | * more than 1/2ulp */ | ||
2591 | Sgl_increment(resultp1); | ||
2592 | } | ||
2593 | } | ||
2594 | break; | ||
2595 | |||
2596 | case ROUNDPLUS: | ||
2597 | if (Sgl_iszero_sign(resultp1)) { | ||
2598 | /* Round up positive results */ | ||
2599 | Sgl_increment(resultp1); | ||
2600 | } | ||
2601 | break; | ||
2602 | |||
2603 | case ROUNDMINUS: | ||
2604 | if (Sgl_isone_sign(resultp1)) { | ||
2605 | /* Round down negative results */ | ||
2606 | Sgl_increment(resultp1); | ||
2607 | } | ||
2608 | |||
2609 | case ROUNDZERO:; | ||
2610 | /* truncate is simple */ | ||
2611 | } /* end switch... */ | ||
2612 | if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++; | ||
2613 | } | ||
2614 | if (result_exponent >= SGL_INFINITY_EXPONENT) { | ||
2615 | /* Overflow */ | ||
2616 | if (Is_overflowtrap_enabled()) { | ||
2617 | /* | ||
2618 | * Adjust bias of result | ||
2619 | */ | ||
2620 | Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl); | ||
2621 | Sgl_copytoptr(resultp1,dstptr); | ||
2622 | if (inexact) | ||
2623 | if (Is_inexacttrap_enabled()) | ||
2624 | return (OPC_2E_OVERFLOWEXCEPTION | | ||
2625 | OPC_2E_INEXACTEXCEPTION); | ||
2626 | else Set_inexactflag(); | ||
2627 | return (OPC_2E_OVERFLOWEXCEPTION); | ||
2628 | } | ||
2629 | inexact = TRUE; | ||
2630 | Set_overflowflag(); | ||
2631 | Sgl_setoverflow(resultp1); | ||
2632 | } else if (result_exponent <= 0) { /* underflow case */ | ||
2633 | if (Is_underflowtrap_enabled()) { | ||
2634 | /* | ||
2635 | * Adjust bias of result | ||
2636 | */ | ||
2637 | Sgl_setwrapped_exponent(resultp1,result_exponent,unfl); | ||
2638 | Sgl_copytoptr(resultp1,dstptr); | ||
2639 | if (inexact) | ||
2640 | if (Is_inexacttrap_enabled()) | ||
2641 | return (OPC_2E_UNDERFLOWEXCEPTION | | ||
2642 | OPC_2E_INEXACTEXCEPTION); | ||
2643 | else Set_inexactflag(); | ||
2644 | return(OPC_2E_UNDERFLOWEXCEPTION); | ||
2645 | } | ||
2646 | else if (inexact && is_tiny) Set_underflowflag(); | ||
2647 | } | ||
2648 | else Sgl_set_exponent(resultp1,result_exponent); | ||
2649 | Sgl_copytoptr(resultp1,dstptr); | ||
2650 | if (inexact) | ||
2651 | if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION); | ||
2652 | else Set_inexactflag(); | ||
2653 | return(NOEXCEPTION); | ||
2654 | } | ||
2655 | |||