diff options
Diffstat (limited to 'arch/m68k/ifpsp060/src/fplsp.S')
-rw-r--r-- | arch/m68k/ifpsp060/src/fplsp.S | 10980 |
1 files changed, 10980 insertions, 0 deletions
diff --git a/arch/m68k/ifpsp060/src/fplsp.S b/arch/m68k/ifpsp060/src/fplsp.S new file mode 100644 index 000000000000..fdb79b927ef1 --- /dev/null +++ b/arch/m68k/ifpsp060/src/fplsp.S | |||
@@ -0,0 +1,10980 @@ | |||
1 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
2 | MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP | ||
3 | M68000 Hi-Performance Microprocessor Division | ||
4 | M68060 Software Package | ||
5 | Production Release P1.00 -- October 10, 1994 | ||
6 | |||
7 | M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved. | ||
8 | |||
9 | THE SOFTWARE is provided on an "AS IS" basis and without warranty. | ||
10 | To the maximum extent permitted by applicable law, | ||
11 | MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, | ||
12 | INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE | ||
13 | and any warranty against infringement with regard to the SOFTWARE | ||
14 | (INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials. | ||
15 | |||
16 | To the maximum extent permitted by applicable law, | ||
17 | IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER | ||
18 | (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, | ||
19 | BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS) | ||
20 | ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE. | ||
21 | Motorola assumes no responsibility for the maintenance and support of the SOFTWARE. | ||
22 | |||
23 | You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE | ||
24 | so long as this entire notice is retained without alteration in any modified and/or | ||
25 | redistributed versions, and that such modified versions are clearly identified as such. | ||
26 | No licenses are granted by implication, estoppel or otherwise under any patents | ||
27 | or trademarks of Motorola, Inc. | ||
28 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
29 | # | ||
30 | # lfptop.s: | ||
31 | # This file is appended to the top of the 060ILSP package | ||
32 | # and contains the entry points into the package. The user, in | ||
33 | # effect, branches to one of the branch table entries located here. | ||
34 | # | ||
35 | |||
36 | bra.l _facoss_ | ||
37 | short 0x0000 | ||
38 | bra.l _facosd_ | ||
39 | short 0x0000 | ||
40 | bra.l _facosx_ | ||
41 | short 0x0000 | ||
42 | |||
43 | bra.l _fasins_ | ||
44 | short 0x0000 | ||
45 | bra.l _fasind_ | ||
46 | short 0x0000 | ||
47 | bra.l _fasinx_ | ||
48 | short 0x0000 | ||
49 | |||
50 | bra.l _fatans_ | ||
51 | short 0x0000 | ||
52 | bra.l _fatand_ | ||
53 | short 0x0000 | ||
54 | bra.l _fatanx_ | ||
55 | short 0x0000 | ||
56 | |||
57 | bra.l _fatanhs_ | ||
58 | short 0x0000 | ||
59 | bra.l _fatanhd_ | ||
60 | short 0x0000 | ||
61 | bra.l _fatanhx_ | ||
62 | short 0x0000 | ||
63 | |||
64 | bra.l _fcoss_ | ||
65 | short 0x0000 | ||
66 | bra.l _fcosd_ | ||
67 | short 0x0000 | ||
68 | bra.l _fcosx_ | ||
69 | short 0x0000 | ||
70 | |||
71 | bra.l _fcoshs_ | ||
72 | short 0x0000 | ||
73 | bra.l _fcoshd_ | ||
74 | short 0x0000 | ||
75 | bra.l _fcoshx_ | ||
76 | short 0x0000 | ||
77 | |||
78 | bra.l _fetoxs_ | ||
79 | short 0x0000 | ||
80 | bra.l _fetoxd_ | ||
81 | short 0x0000 | ||
82 | bra.l _fetoxx_ | ||
83 | short 0x0000 | ||
84 | |||
85 | bra.l _fetoxm1s_ | ||
86 | short 0x0000 | ||
87 | bra.l _fetoxm1d_ | ||
88 | short 0x0000 | ||
89 | bra.l _fetoxm1x_ | ||
90 | short 0x0000 | ||
91 | |||
92 | bra.l _fgetexps_ | ||
93 | short 0x0000 | ||
94 | bra.l _fgetexpd_ | ||
95 | short 0x0000 | ||
96 | bra.l _fgetexpx_ | ||
97 | short 0x0000 | ||
98 | |||
99 | bra.l _fgetmans_ | ||
100 | short 0x0000 | ||
101 | bra.l _fgetmand_ | ||
102 | short 0x0000 | ||
103 | bra.l _fgetmanx_ | ||
104 | short 0x0000 | ||
105 | |||
106 | bra.l _flog10s_ | ||
107 | short 0x0000 | ||
108 | bra.l _flog10d_ | ||
109 | short 0x0000 | ||
110 | bra.l _flog10x_ | ||
111 | short 0x0000 | ||
112 | |||
113 | bra.l _flog2s_ | ||
114 | short 0x0000 | ||
115 | bra.l _flog2d_ | ||
116 | short 0x0000 | ||
117 | bra.l _flog2x_ | ||
118 | short 0x0000 | ||
119 | |||
120 | bra.l _flogns_ | ||
121 | short 0x0000 | ||
122 | bra.l _flognd_ | ||
123 | short 0x0000 | ||
124 | bra.l _flognx_ | ||
125 | short 0x0000 | ||
126 | |||
127 | bra.l _flognp1s_ | ||
128 | short 0x0000 | ||
129 | bra.l _flognp1d_ | ||
130 | short 0x0000 | ||
131 | bra.l _flognp1x_ | ||
132 | short 0x0000 | ||
133 | |||
134 | bra.l _fmods_ | ||
135 | short 0x0000 | ||
136 | bra.l _fmodd_ | ||
137 | short 0x0000 | ||
138 | bra.l _fmodx_ | ||
139 | short 0x0000 | ||
140 | |||
141 | bra.l _frems_ | ||
142 | short 0x0000 | ||
143 | bra.l _fremd_ | ||
144 | short 0x0000 | ||
145 | bra.l _fremx_ | ||
146 | short 0x0000 | ||
147 | |||
148 | bra.l _fscales_ | ||
149 | short 0x0000 | ||
150 | bra.l _fscaled_ | ||
151 | short 0x0000 | ||
152 | bra.l _fscalex_ | ||
153 | short 0x0000 | ||
154 | |||
155 | bra.l _fsins_ | ||
156 | short 0x0000 | ||
157 | bra.l _fsind_ | ||
158 | short 0x0000 | ||
159 | bra.l _fsinx_ | ||
160 | short 0x0000 | ||
161 | |||
162 | bra.l _fsincoss_ | ||
163 | short 0x0000 | ||
164 | bra.l _fsincosd_ | ||
165 | short 0x0000 | ||
166 | bra.l _fsincosx_ | ||
167 | short 0x0000 | ||
168 | |||
169 | bra.l _fsinhs_ | ||
170 | short 0x0000 | ||
171 | bra.l _fsinhd_ | ||
172 | short 0x0000 | ||
173 | bra.l _fsinhx_ | ||
174 | short 0x0000 | ||
175 | |||
176 | bra.l _ftans_ | ||
177 | short 0x0000 | ||
178 | bra.l _ftand_ | ||
179 | short 0x0000 | ||
180 | bra.l _ftanx_ | ||
181 | short 0x0000 | ||
182 | |||
183 | bra.l _ftanhs_ | ||
184 | short 0x0000 | ||
185 | bra.l _ftanhd_ | ||
186 | short 0x0000 | ||
187 | bra.l _ftanhx_ | ||
188 | short 0x0000 | ||
189 | |||
190 | bra.l _ftentoxs_ | ||
191 | short 0x0000 | ||
192 | bra.l _ftentoxd_ | ||
193 | short 0x0000 | ||
194 | bra.l _ftentoxx_ | ||
195 | short 0x0000 | ||
196 | |||
197 | bra.l _ftwotoxs_ | ||
198 | short 0x0000 | ||
199 | bra.l _ftwotoxd_ | ||
200 | short 0x0000 | ||
201 | bra.l _ftwotoxx_ | ||
202 | short 0x0000 | ||
203 | |||
204 | bra.l _fabss_ | ||
205 | short 0x0000 | ||
206 | bra.l _fabsd_ | ||
207 | short 0x0000 | ||
208 | bra.l _fabsx_ | ||
209 | short 0x0000 | ||
210 | |||
211 | bra.l _fadds_ | ||
212 | short 0x0000 | ||
213 | bra.l _faddd_ | ||
214 | short 0x0000 | ||
215 | bra.l _faddx_ | ||
216 | short 0x0000 | ||
217 | |||
218 | bra.l _fdivs_ | ||
219 | short 0x0000 | ||
220 | bra.l _fdivd_ | ||
221 | short 0x0000 | ||
222 | bra.l _fdivx_ | ||
223 | short 0x0000 | ||
224 | |||
225 | bra.l _fints_ | ||
226 | short 0x0000 | ||
227 | bra.l _fintd_ | ||
228 | short 0x0000 | ||
229 | bra.l _fintx_ | ||
230 | short 0x0000 | ||
231 | |||
232 | bra.l _fintrzs_ | ||
233 | short 0x0000 | ||
234 | bra.l _fintrzd_ | ||
235 | short 0x0000 | ||
236 | bra.l _fintrzx_ | ||
237 | short 0x0000 | ||
238 | |||
239 | bra.l _fmuls_ | ||
240 | short 0x0000 | ||
241 | bra.l _fmuld_ | ||
242 | short 0x0000 | ||
243 | bra.l _fmulx_ | ||
244 | short 0x0000 | ||
245 | |||
246 | bra.l _fnegs_ | ||
247 | short 0x0000 | ||
248 | bra.l _fnegd_ | ||
249 | short 0x0000 | ||
250 | bra.l _fnegx_ | ||
251 | short 0x0000 | ||
252 | |||
253 | bra.l _fsqrts_ | ||
254 | short 0x0000 | ||
255 | bra.l _fsqrtd_ | ||
256 | short 0x0000 | ||
257 | bra.l _fsqrtx_ | ||
258 | short 0x0000 | ||
259 | |||
260 | bra.l _fsubs_ | ||
261 | short 0x0000 | ||
262 | bra.l _fsubd_ | ||
263 | short 0x0000 | ||
264 | bra.l _fsubx_ | ||
265 | short 0x0000 | ||
266 | |||
267 | # leave room for future possible additions | ||
268 | align 0x400 | ||
269 | |||
270 | # | ||
271 | # This file contains a set of define statements for constants | ||
272 | # in order to promote readability within the corecode itself. | ||
273 | # | ||
274 | |||
275 | set LOCAL_SIZE, 192 # stack frame size(bytes) | ||
276 | set LV, -LOCAL_SIZE # stack offset | ||
277 | |||
278 | set EXC_SR, 0x4 # stack status register | ||
279 | set EXC_PC, 0x6 # stack pc | ||
280 | set EXC_VOFF, 0xa # stacked vector offset | ||
281 | set EXC_EA, 0xc # stacked <ea> | ||
282 | |||
283 | set EXC_FP, 0x0 # frame pointer | ||
284 | |||
285 | set EXC_AREGS, -68 # offset of all address regs | ||
286 | set EXC_DREGS, -100 # offset of all data regs | ||
287 | set EXC_FPREGS, -36 # offset of all fp regs | ||
288 | |||
289 | set EXC_A7, EXC_AREGS+(7*4) # offset of saved a7 | ||
290 | set OLD_A7, EXC_AREGS+(6*4) # extra copy of saved a7 | ||
291 | set EXC_A6, EXC_AREGS+(6*4) # offset of saved a6 | ||
292 | set EXC_A5, EXC_AREGS+(5*4) | ||
293 | set EXC_A4, EXC_AREGS+(4*4) | ||
294 | set EXC_A3, EXC_AREGS+(3*4) | ||
295 | set EXC_A2, EXC_AREGS+(2*4) | ||
296 | set EXC_A1, EXC_AREGS+(1*4) | ||
297 | set EXC_A0, EXC_AREGS+(0*4) | ||
298 | set EXC_D7, EXC_DREGS+(7*4) | ||
299 | set EXC_D6, EXC_DREGS+(6*4) | ||
300 | set EXC_D5, EXC_DREGS+(5*4) | ||
301 | set EXC_D4, EXC_DREGS+(4*4) | ||
302 | set EXC_D3, EXC_DREGS+(3*4) | ||
303 | set EXC_D2, EXC_DREGS+(2*4) | ||
304 | set EXC_D1, EXC_DREGS+(1*4) | ||
305 | set EXC_D0, EXC_DREGS+(0*4) | ||
306 | |||
307 | set EXC_FP0, EXC_FPREGS+(0*12) # offset of saved fp0 | ||
308 | set EXC_FP1, EXC_FPREGS+(1*12) # offset of saved fp1 | ||
309 | set EXC_FP2, EXC_FPREGS+(2*12) # offset of saved fp2 (not used) | ||
310 | |||
311 | set FP_SCR1, LV+80 # fp scratch 1 | ||
312 | set FP_SCR1_EX, FP_SCR1+0 | ||
313 | set FP_SCR1_SGN, FP_SCR1+2 | ||
314 | set FP_SCR1_HI, FP_SCR1+4 | ||
315 | set FP_SCR1_LO, FP_SCR1+8 | ||
316 | |||
317 | set FP_SCR0, LV+68 # fp scratch 0 | ||
318 | set FP_SCR0_EX, FP_SCR0+0 | ||
319 | set FP_SCR0_SGN, FP_SCR0+2 | ||
320 | set FP_SCR0_HI, FP_SCR0+4 | ||
321 | set FP_SCR0_LO, FP_SCR0+8 | ||
322 | |||
323 | set FP_DST, LV+56 # fp destination operand | ||
324 | set FP_DST_EX, FP_DST+0 | ||
325 | set FP_DST_SGN, FP_DST+2 | ||
326 | set FP_DST_HI, FP_DST+4 | ||
327 | set FP_DST_LO, FP_DST+8 | ||
328 | |||
329 | set FP_SRC, LV+44 # fp source operand | ||
330 | set FP_SRC_EX, FP_SRC+0 | ||
331 | set FP_SRC_SGN, FP_SRC+2 | ||
332 | set FP_SRC_HI, FP_SRC+4 | ||
333 | set FP_SRC_LO, FP_SRC+8 | ||
334 | |||
335 | set USER_FPIAR, LV+40 # FP instr address register | ||
336 | |||
337 | set USER_FPSR, LV+36 # FP status register | ||
338 | set FPSR_CC, USER_FPSR+0 # FPSR condition codes | ||
339 | set FPSR_QBYTE, USER_FPSR+1 # FPSR qoutient byte | ||
340 | set FPSR_EXCEPT, USER_FPSR+2 # FPSR exception status byte | ||
341 | set FPSR_AEXCEPT, USER_FPSR+3 # FPSR accrued exception byte | ||
342 | |||
343 | set USER_FPCR, LV+32 # FP control register | ||
344 | set FPCR_ENABLE, USER_FPCR+2 # FPCR exception enable | ||
345 | set FPCR_MODE, USER_FPCR+3 # FPCR rounding mode control | ||
346 | |||
347 | set L_SCR3, LV+28 # integer scratch 3 | ||
348 | set L_SCR2, LV+24 # integer scratch 2 | ||
349 | set L_SCR1, LV+20 # integer scratch 1 | ||
350 | |||
351 | set STORE_FLG, LV+19 # flag: operand store (ie. not fcmp/ftst) | ||
352 | |||
353 | set EXC_TEMP2, LV+24 # temporary space | ||
354 | set EXC_TEMP, LV+16 # temporary space | ||
355 | |||
356 | set DTAG, LV+15 # destination operand type | ||
357 | set STAG, LV+14 # source operand type | ||
358 | |||
359 | set SPCOND_FLG, LV+10 # flag: special case (see below) | ||
360 | |||
361 | set EXC_CC, LV+8 # saved condition codes | ||
362 | set EXC_EXTWPTR, LV+4 # saved current PC (active) | ||
363 | set EXC_EXTWORD, LV+2 # saved extension word | ||
364 | set EXC_CMDREG, LV+2 # saved extension word | ||
365 | set EXC_OPWORD, LV+0 # saved operation word | ||
366 | |||
367 | ################################ | ||
368 | |||
369 | # Helpful macros | ||
370 | |||
371 | set FTEMP, 0 # offsets within an | ||
372 | set FTEMP_EX, 0 # extended precision | ||
373 | set FTEMP_SGN, 2 # value saved in memory. | ||
374 | set FTEMP_HI, 4 | ||
375 | set FTEMP_LO, 8 | ||
376 | set FTEMP_GRS, 12 | ||
377 | |||
378 | set LOCAL, 0 # offsets within an | ||
379 | set LOCAL_EX, 0 # extended precision | ||
380 | set LOCAL_SGN, 2 # value saved in memory. | ||
381 | set LOCAL_HI, 4 | ||
382 | set LOCAL_LO, 8 | ||
383 | set LOCAL_GRS, 12 | ||
384 | |||
385 | set DST, 0 # offsets within an | ||
386 | set DST_EX, 0 # extended precision | ||
387 | set DST_HI, 4 # value saved in memory. | ||
388 | set DST_LO, 8 | ||
389 | |||
390 | set SRC, 0 # offsets within an | ||
391 | set SRC_EX, 0 # extended precision | ||
392 | set SRC_HI, 4 # value saved in memory. | ||
393 | set SRC_LO, 8 | ||
394 | |||
395 | set SGL_LO, 0x3f81 # min sgl prec exponent | ||
396 | set SGL_HI, 0x407e # max sgl prec exponent | ||
397 | set DBL_LO, 0x3c01 # min dbl prec exponent | ||
398 | set DBL_HI, 0x43fe # max dbl prec exponent | ||
399 | set EXT_LO, 0x0 # min ext prec exponent | ||
400 | set EXT_HI, 0x7ffe # max ext prec exponent | ||
401 | |||
402 | set EXT_BIAS, 0x3fff # extended precision bias | ||
403 | set SGL_BIAS, 0x007f # single precision bias | ||
404 | set DBL_BIAS, 0x03ff # double precision bias | ||
405 | |||
406 | set NORM, 0x00 # operand type for STAG/DTAG | ||
407 | set ZERO, 0x01 # operand type for STAG/DTAG | ||
408 | set INF, 0x02 # operand type for STAG/DTAG | ||
409 | set QNAN, 0x03 # operand type for STAG/DTAG | ||
410 | set DENORM, 0x04 # operand type for STAG/DTAG | ||
411 | set SNAN, 0x05 # operand type for STAG/DTAG | ||
412 | set UNNORM, 0x06 # operand type for STAG/DTAG | ||
413 | |||
414 | ################## | ||
415 | # FPSR/FPCR bits # | ||
416 | ################## | ||
417 | set neg_bit, 0x3 # negative result | ||
418 | set z_bit, 0x2 # zero result | ||
419 | set inf_bit, 0x1 # infinite result | ||
420 | set nan_bit, 0x0 # NAN result | ||
421 | |||
422 | set q_sn_bit, 0x7 # sign bit of quotient byte | ||
423 | |||
424 | set bsun_bit, 7 # branch on unordered | ||
425 | set snan_bit, 6 # signalling NAN | ||
426 | set operr_bit, 5 # operand error | ||
427 | set ovfl_bit, 4 # overflow | ||
428 | set unfl_bit, 3 # underflow | ||
429 | set dz_bit, 2 # divide by zero | ||
430 | set inex2_bit, 1 # inexact result 2 | ||
431 | set inex1_bit, 0 # inexact result 1 | ||
432 | |||
433 | set aiop_bit, 7 # accrued inexact operation bit | ||
434 | set aovfl_bit, 6 # accrued overflow bit | ||
435 | set aunfl_bit, 5 # accrued underflow bit | ||
436 | set adz_bit, 4 # accrued dz bit | ||
437 | set ainex_bit, 3 # accrued inexact bit | ||
438 | |||
439 | ############################# | ||
440 | # FPSR individual bit masks # | ||
441 | ############################# | ||
442 | set neg_mask, 0x08000000 # negative bit mask (lw) | ||
443 | set inf_mask, 0x02000000 # infinity bit mask (lw) | ||
444 | set z_mask, 0x04000000 # zero bit mask (lw) | ||
445 | set nan_mask, 0x01000000 # nan bit mask (lw) | ||
446 | |||
447 | set neg_bmask, 0x08 # negative bit mask (byte) | ||
448 | set inf_bmask, 0x02 # infinity bit mask (byte) | ||
449 | set z_bmask, 0x04 # zero bit mask (byte) | ||
450 | set nan_bmask, 0x01 # nan bit mask (byte) | ||
451 | |||
452 | set bsun_mask, 0x00008000 # bsun exception mask | ||
453 | set snan_mask, 0x00004000 # snan exception mask | ||
454 | set operr_mask, 0x00002000 # operr exception mask | ||
455 | set ovfl_mask, 0x00001000 # overflow exception mask | ||
456 | set unfl_mask, 0x00000800 # underflow exception mask | ||
457 | set dz_mask, 0x00000400 # dz exception mask | ||
458 | set inex2_mask, 0x00000200 # inex2 exception mask | ||
459 | set inex1_mask, 0x00000100 # inex1 exception mask | ||
460 | |||
461 | set aiop_mask, 0x00000080 # accrued illegal operation | ||
462 | set aovfl_mask, 0x00000040 # accrued overflow | ||
463 | set aunfl_mask, 0x00000020 # accrued underflow | ||
464 | set adz_mask, 0x00000010 # accrued divide by zero | ||
465 | set ainex_mask, 0x00000008 # accrued inexact | ||
466 | |||
467 | ###################################### | ||
468 | # FPSR combinations used in the FPSP # | ||
469 | ###################################### | ||
470 | set dzinf_mask, inf_mask+dz_mask+adz_mask | ||
471 | set opnan_mask, nan_mask+operr_mask+aiop_mask | ||
472 | set nzi_mask, 0x01ffffff #clears N, Z, and I | ||
473 | set unfinx_mask, unfl_mask+inex2_mask+aunfl_mask+ainex_mask | ||
474 | set unf2inx_mask, unfl_mask+inex2_mask+ainex_mask | ||
475 | set ovfinx_mask, ovfl_mask+inex2_mask+aovfl_mask+ainex_mask | ||
476 | set inx1a_mask, inex1_mask+ainex_mask | ||
477 | set inx2a_mask, inex2_mask+ainex_mask | ||
478 | set snaniop_mask, nan_mask+snan_mask+aiop_mask | ||
479 | set snaniop2_mask, snan_mask+aiop_mask | ||
480 | set naniop_mask, nan_mask+aiop_mask | ||
481 | set neginf_mask, neg_mask+inf_mask | ||
482 | set infaiop_mask, inf_mask+aiop_mask | ||
483 | set negz_mask, neg_mask+z_mask | ||
484 | set opaop_mask, operr_mask+aiop_mask | ||
485 | set unfl_inx_mask, unfl_mask+aunfl_mask+ainex_mask | ||
486 | set ovfl_inx_mask, ovfl_mask+aovfl_mask+ainex_mask | ||
487 | |||
488 | ######### | ||
489 | # misc. # | ||
490 | ######### | ||
491 | set rnd_stky_bit, 29 # stky bit pos in longword | ||
492 | |||
493 | set sign_bit, 0x7 # sign bit | ||
494 | set signan_bit, 0x6 # signalling nan bit | ||
495 | |||
496 | set sgl_thresh, 0x3f81 # minimum sgl exponent | ||
497 | set dbl_thresh, 0x3c01 # minimum dbl exponent | ||
498 | |||
499 | set x_mode, 0x0 # extended precision | ||
500 | set s_mode, 0x4 # single precision | ||
501 | set d_mode, 0x8 # double precision | ||
502 | |||
503 | set rn_mode, 0x0 # round-to-nearest | ||
504 | set rz_mode, 0x1 # round-to-zero | ||
505 | set rm_mode, 0x2 # round-tp-minus-infinity | ||
506 | set rp_mode, 0x3 # round-to-plus-infinity | ||
507 | |||
508 | set mantissalen, 64 # length of mantissa in bits | ||
509 | |||
510 | set BYTE, 1 # len(byte) == 1 byte | ||
511 | set WORD, 2 # len(word) == 2 bytes | ||
512 | set LONG, 4 # len(longword) == 2 bytes | ||
513 | |||
514 | set BSUN_VEC, 0xc0 # bsun vector offset | ||
515 | set INEX_VEC, 0xc4 # inexact vector offset | ||
516 | set DZ_VEC, 0xc8 # dz vector offset | ||
517 | set UNFL_VEC, 0xcc # unfl vector offset | ||
518 | set OPERR_VEC, 0xd0 # operr vector offset | ||
519 | set OVFL_VEC, 0xd4 # ovfl vector offset | ||
520 | set SNAN_VEC, 0xd8 # snan vector offset | ||
521 | |||
522 | ########################### | ||
523 | # SPecial CONDition FLaGs # | ||
524 | ########################### | ||
525 | set ftrapcc_flg, 0x01 # flag bit: ftrapcc exception | ||
526 | set fbsun_flg, 0x02 # flag bit: bsun exception | ||
527 | set mia7_flg, 0x04 # flag bit: (a7)+ <ea> | ||
528 | set mda7_flg, 0x08 # flag bit: -(a7) <ea> | ||
529 | set fmovm_flg, 0x40 # flag bit: fmovm instruction | ||
530 | set immed_flg, 0x80 # flag bit: &<data> <ea> | ||
531 | |||
532 | set ftrapcc_bit, 0x0 | ||
533 | set fbsun_bit, 0x1 | ||
534 | set mia7_bit, 0x2 | ||
535 | set mda7_bit, 0x3 | ||
536 | set immed_bit, 0x7 | ||
537 | |||
538 | ################################## | ||
539 | # TRANSCENDENTAL "LAST-OP" FLAGS # | ||
540 | ################################## | ||
541 | set FMUL_OP, 0x0 # fmul instr performed last | ||
542 | set FDIV_OP, 0x1 # fdiv performed last | ||
543 | set FADD_OP, 0x2 # fadd performed last | ||
544 | set FMOV_OP, 0x3 # fmov performed last | ||
545 | |||
546 | ############# | ||
547 | # CONSTANTS # | ||
548 | ############# | ||
549 | T1: long 0x40C62D38,0xD3D64634 # 16381 LOG2 LEAD | ||
550 | T2: long 0x3D6F90AE,0xB1E75CC7 # 16381 LOG2 TRAIL | ||
551 | |||
552 | PI: long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000 | ||
553 | PIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000 | ||
554 | |||
555 | TWOBYPI: | ||
556 | long 0x3FE45F30,0x6DC9C883 | ||
557 | |||
558 | ######################################################################### | ||
559 | # MONADIC TEMPLATE # | ||
560 | ######################################################################### | ||
561 | global _fsins_ | ||
562 | _fsins_: | ||
563 | link %a6,&-LOCAL_SIZE | ||
564 | |||
565 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
566 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
567 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
568 | |||
569 | fmov.l &0x0,%fpcr # zero FPCR | ||
570 | |||
571 | # | ||
572 | # copy, convert, and tag input argument | ||
573 | # | ||
574 | fmov.s 0x8(%a6),%fp0 # load sgl input | ||
575 | fmov.x %fp0,FP_SRC(%a6) | ||
576 | lea FP_SRC(%a6),%a0 | ||
577 | bsr.l tag # fetch operand type | ||
578 | mov.b %d0,STAG(%a6) | ||
579 | mov.b %d0,%d1 | ||
580 | |||
581 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
582 | |||
583 | clr.l %d0 | ||
584 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
585 | |||
586 | tst.b %d1 | ||
587 | bne.b _L0_2s | ||
588 | bsr.l ssin # operand is a NORM | ||
589 | bra.b _L0_6s | ||
590 | _L0_2s: | ||
591 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
592 | bne.b _L0_3s # no | ||
593 | bsr.l src_zero # yes | ||
594 | bra.b _L0_6s | ||
595 | _L0_3s: | ||
596 | cmpi.b %d1,&INF # is operand an INF? | ||
597 | bne.b _L0_4s # no | ||
598 | bsr.l t_operr # yes | ||
599 | bra.b _L0_6s | ||
600 | _L0_4s: | ||
601 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
602 | bne.b _L0_5s # no | ||
603 | bsr.l src_qnan # yes | ||
604 | bra.b _L0_6s | ||
605 | _L0_5s: | ||
606 | bsr.l ssind # operand is a DENORM | ||
607 | _L0_6s: | ||
608 | |||
609 | # | ||
610 | # Result is now in FP0 | ||
611 | # | ||
612 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
613 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
614 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
615 | unlk %a6 | ||
616 | rts | ||
617 | |||
618 | global _fsind_ | ||
619 | _fsind_: | ||
620 | link %a6,&-LOCAL_SIZE | ||
621 | |||
622 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
623 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
624 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
625 | |||
626 | fmov.l &0x0,%fpcr # zero FPCR | ||
627 | |||
628 | # | ||
629 | # copy, convert, and tag input argument | ||
630 | # | ||
631 | fmov.d 0x8(%a6),%fp0 # load dbl input | ||
632 | fmov.x %fp0,FP_SRC(%a6) | ||
633 | lea FP_SRC(%a6),%a0 | ||
634 | bsr.l tag # fetch operand type | ||
635 | mov.b %d0,STAG(%a6) | ||
636 | mov.b %d0,%d1 | ||
637 | |||
638 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
639 | |||
640 | clr.l %d0 | ||
641 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
642 | |||
643 | mov.b %d1,STAG(%a6) | ||
644 | tst.b %d1 | ||
645 | bne.b _L0_2d | ||
646 | bsr.l ssin # operand is a NORM | ||
647 | bra.b _L0_6d | ||
648 | _L0_2d: | ||
649 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
650 | bne.b _L0_3d # no | ||
651 | bsr.l src_zero # yes | ||
652 | bra.b _L0_6d | ||
653 | _L0_3d: | ||
654 | cmpi.b %d1,&INF # is operand an INF? | ||
655 | bne.b _L0_4d # no | ||
656 | bsr.l t_operr # yes | ||
657 | bra.b _L0_6d | ||
658 | _L0_4d: | ||
659 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
660 | bne.b _L0_5d # no | ||
661 | bsr.l src_qnan # yes | ||
662 | bra.b _L0_6d | ||
663 | _L0_5d: | ||
664 | bsr.l ssind # operand is a DENORM | ||
665 | _L0_6d: | ||
666 | |||
667 | # | ||
668 | # Result is now in FP0 | ||
669 | # | ||
670 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
671 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
672 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
673 | unlk %a6 | ||
674 | rts | ||
675 | |||
676 | global _fsinx_ | ||
677 | _fsinx_: | ||
678 | link %a6,&-LOCAL_SIZE | ||
679 | |||
680 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
681 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
682 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
683 | |||
684 | fmov.l &0x0,%fpcr # zero FPCR | ||
685 | |||
686 | # | ||
687 | # copy, convert, and tag input argument | ||
688 | # | ||
689 | lea FP_SRC(%a6),%a0 | ||
690 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | ||
691 | mov.l 0x8+0x4(%a6),0x4(%a0) | ||
692 | mov.l 0x8+0x8(%a6),0x8(%a0) | ||
693 | bsr.l tag # fetch operand type | ||
694 | mov.b %d0,STAG(%a6) | ||
695 | mov.b %d0,%d1 | ||
696 | |||
697 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
698 | |||
699 | clr.l %d0 | ||
700 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
701 | |||
702 | tst.b %d1 | ||
703 | bne.b _L0_2x | ||
704 | bsr.l ssin # operand is a NORM | ||
705 | bra.b _L0_6x | ||
706 | _L0_2x: | ||
707 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
708 | bne.b _L0_3x # no | ||
709 | bsr.l src_zero # yes | ||
710 | bra.b _L0_6x | ||
711 | _L0_3x: | ||
712 | cmpi.b %d1,&INF # is operand an INF? | ||
713 | bne.b _L0_4x # no | ||
714 | bsr.l t_operr # yes | ||
715 | bra.b _L0_6x | ||
716 | _L0_4x: | ||
717 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
718 | bne.b _L0_5x # no | ||
719 | bsr.l src_qnan # yes | ||
720 | bra.b _L0_6x | ||
721 | _L0_5x: | ||
722 | bsr.l ssind # operand is a DENORM | ||
723 | _L0_6x: | ||
724 | |||
725 | # | ||
726 | # Result is now in FP0 | ||
727 | # | ||
728 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
729 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
730 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
731 | unlk %a6 | ||
732 | rts | ||
733 | |||
734 | |||
735 | ######################################################################### | ||
736 | # MONADIC TEMPLATE # | ||
737 | ######################################################################### | ||
738 | global _fcoss_ | ||
739 | _fcoss_: | ||
740 | link %a6,&-LOCAL_SIZE | ||
741 | |||
742 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
743 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
744 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
745 | |||
746 | fmov.l &0x0,%fpcr # zero FPCR | ||
747 | |||
748 | # | ||
749 | # copy, convert, and tag input argument | ||
750 | # | ||
751 | fmov.s 0x8(%a6),%fp0 # load sgl input | ||
752 | fmov.x %fp0,FP_SRC(%a6) | ||
753 | lea FP_SRC(%a6),%a0 | ||
754 | bsr.l tag # fetch operand type | ||
755 | mov.b %d0,STAG(%a6) | ||
756 | mov.b %d0,%d1 | ||
757 | |||
758 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
759 | |||
760 | clr.l %d0 | ||
761 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
762 | |||
763 | tst.b %d1 | ||
764 | bne.b _L1_2s | ||
765 | bsr.l scos # operand is a NORM | ||
766 | bra.b _L1_6s | ||
767 | _L1_2s: | ||
768 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
769 | bne.b _L1_3s # no | ||
770 | bsr.l ld_pone # yes | ||
771 | bra.b _L1_6s | ||
772 | _L1_3s: | ||
773 | cmpi.b %d1,&INF # is operand an INF? | ||
774 | bne.b _L1_4s # no | ||
775 | bsr.l t_operr # yes | ||
776 | bra.b _L1_6s | ||
777 | _L1_4s: | ||
778 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
779 | bne.b _L1_5s # no | ||
780 | bsr.l src_qnan # yes | ||
781 | bra.b _L1_6s | ||
782 | _L1_5s: | ||
783 | bsr.l scosd # operand is a DENORM | ||
784 | _L1_6s: | ||
785 | |||
786 | # | ||
787 | # Result is now in FP0 | ||
788 | # | ||
789 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
790 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
791 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
792 | unlk %a6 | ||
793 | rts | ||
794 | |||
795 | global _fcosd_ | ||
796 | _fcosd_: | ||
797 | link %a6,&-LOCAL_SIZE | ||
798 | |||
799 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
800 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
801 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
802 | |||
803 | fmov.l &0x0,%fpcr # zero FPCR | ||
804 | |||
805 | # | ||
806 | # copy, convert, and tag input argument | ||
807 | # | ||
808 | fmov.d 0x8(%a6),%fp0 # load dbl input | ||
809 | fmov.x %fp0,FP_SRC(%a6) | ||
810 | lea FP_SRC(%a6),%a0 | ||
811 | bsr.l tag # fetch operand type | ||
812 | mov.b %d0,STAG(%a6) | ||
813 | mov.b %d0,%d1 | ||
814 | |||
815 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
816 | |||
817 | clr.l %d0 | ||
818 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
819 | |||
820 | mov.b %d1,STAG(%a6) | ||
821 | tst.b %d1 | ||
822 | bne.b _L1_2d | ||
823 | bsr.l scos # operand is a NORM | ||
824 | bra.b _L1_6d | ||
825 | _L1_2d: | ||
826 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
827 | bne.b _L1_3d # no | ||
828 | bsr.l ld_pone # yes | ||
829 | bra.b _L1_6d | ||
830 | _L1_3d: | ||
831 | cmpi.b %d1,&INF # is operand an INF? | ||
832 | bne.b _L1_4d # no | ||
833 | bsr.l t_operr # yes | ||
834 | bra.b _L1_6d | ||
835 | _L1_4d: | ||
836 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
837 | bne.b _L1_5d # no | ||
838 | bsr.l src_qnan # yes | ||
839 | bra.b _L1_6d | ||
840 | _L1_5d: | ||
841 | bsr.l scosd # operand is a DENORM | ||
842 | _L1_6d: | ||
843 | |||
844 | # | ||
845 | # Result is now in FP0 | ||
846 | # | ||
847 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
848 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
849 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
850 | unlk %a6 | ||
851 | rts | ||
852 | |||
853 | global _fcosx_ | ||
854 | _fcosx_: | ||
855 | link %a6,&-LOCAL_SIZE | ||
856 | |||
857 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
858 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
859 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
860 | |||
861 | fmov.l &0x0,%fpcr # zero FPCR | ||
862 | |||
863 | # | ||
864 | # copy, convert, and tag input argument | ||
865 | # | ||
866 | lea FP_SRC(%a6),%a0 | ||
867 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | ||
868 | mov.l 0x8+0x4(%a6),0x4(%a0) | ||
869 | mov.l 0x8+0x8(%a6),0x8(%a0) | ||
870 | bsr.l tag # fetch operand type | ||
871 | mov.b %d0,STAG(%a6) | ||
872 | mov.b %d0,%d1 | ||
873 | |||
874 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
875 | |||
876 | clr.l %d0 | ||
877 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
878 | |||
879 | tst.b %d1 | ||
880 | bne.b _L1_2x | ||
881 | bsr.l scos # operand is a NORM | ||
882 | bra.b _L1_6x | ||
883 | _L1_2x: | ||
884 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
885 | bne.b _L1_3x # no | ||
886 | bsr.l ld_pone # yes | ||
887 | bra.b _L1_6x | ||
888 | _L1_3x: | ||
889 | cmpi.b %d1,&INF # is operand an INF? | ||
890 | bne.b _L1_4x # no | ||
891 | bsr.l t_operr # yes | ||
892 | bra.b _L1_6x | ||
893 | _L1_4x: | ||
894 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
895 | bne.b _L1_5x # no | ||
896 | bsr.l src_qnan # yes | ||
897 | bra.b _L1_6x | ||
898 | _L1_5x: | ||
899 | bsr.l scosd # operand is a DENORM | ||
900 | _L1_6x: | ||
901 | |||
902 | # | ||
903 | # Result is now in FP0 | ||
904 | # | ||
905 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
906 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
907 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
908 | unlk %a6 | ||
909 | rts | ||
910 | |||
911 | |||
912 | ######################################################################### | ||
913 | # MONADIC TEMPLATE # | ||
914 | ######################################################################### | ||
915 | global _fsinhs_ | ||
916 | _fsinhs_: | ||
917 | link %a6,&-LOCAL_SIZE | ||
918 | |||
919 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
920 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
921 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
922 | |||
923 | fmov.l &0x0,%fpcr # zero FPCR | ||
924 | |||
925 | # | ||
926 | # copy, convert, and tag input argument | ||
927 | # | ||
928 | fmov.s 0x8(%a6),%fp0 # load sgl input | ||
929 | fmov.x %fp0,FP_SRC(%a6) | ||
930 | lea FP_SRC(%a6),%a0 | ||
931 | bsr.l tag # fetch operand type | ||
932 | mov.b %d0,STAG(%a6) | ||
933 | mov.b %d0,%d1 | ||
934 | |||
935 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
936 | |||
937 | clr.l %d0 | ||
938 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
939 | |||
940 | tst.b %d1 | ||
941 | bne.b _L2_2s | ||
942 | bsr.l ssinh # operand is a NORM | ||
943 | bra.b _L2_6s | ||
944 | _L2_2s: | ||
945 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
946 | bne.b _L2_3s # no | ||
947 | bsr.l src_zero # yes | ||
948 | bra.b _L2_6s | ||
949 | _L2_3s: | ||
950 | cmpi.b %d1,&INF # is operand an INF? | ||
951 | bne.b _L2_4s # no | ||
952 | bsr.l src_inf # yes | ||
953 | bra.b _L2_6s | ||
954 | _L2_4s: | ||
955 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
956 | bne.b _L2_5s # no | ||
957 | bsr.l src_qnan # yes | ||
958 | bra.b _L2_6s | ||
959 | _L2_5s: | ||
960 | bsr.l ssinhd # operand is a DENORM | ||
961 | _L2_6s: | ||
962 | |||
963 | # | ||
964 | # Result is now in FP0 | ||
965 | # | ||
966 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
967 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
968 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
969 | unlk %a6 | ||
970 | rts | ||
971 | |||
972 | global _fsinhd_ | ||
973 | _fsinhd_: | ||
974 | link %a6,&-LOCAL_SIZE | ||
975 | |||
976 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
977 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
978 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
979 | |||
980 | fmov.l &0x0,%fpcr # zero FPCR | ||
981 | |||
982 | # | ||
983 | # copy, convert, and tag input argument | ||
984 | # | ||
985 | fmov.d 0x8(%a6),%fp0 # load dbl input | ||
986 | fmov.x %fp0,FP_SRC(%a6) | ||
987 | lea FP_SRC(%a6),%a0 | ||
988 | bsr.l tag # fetch operand type | ||
989 | mov.b %d0,STAG(%a6) | ||
990 | mov.b %d0,%d1 | ||
991 | |||
992 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
993 | |||
994 | clr.l %d0 | ||
995 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
996 | |||
997 | mov.b %d1,STAG(%a6) | ||
998 | tst.b %d1 | ||
999 | bne.b _L2_2d | ||
1000 | bsr.l ssinh # operand is a NORM | ||
1001 | bra.b _L2_6d | ||
1002 | _L2_2d: | ||
1003 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
1004 | bne.b _L2_3d # no | ||
1005 | bsr.l src_zero # yes | ||
1006 | bra.b _L2_6d | ||
1007 | _L2_3d: | ||
1008 | cmpi.b %d1,&INF # is operand an INF? | ||
1009 | bne.b _L2_4d # no | ||
1010 | bsr.l src_inf # yes | ||
1011 | bra.b _L2_6d | ||
1012 | _L2_4d: | ||
1013 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
1014 | bne.b _L2_5d # no | ||
1015 | bsr.l src_qnan # yes | ||
1016 | bra.b _L2_6d | ||
1017 | _L2_5d: | ||
1018 | bsr.l ssinhd # operand is a DENORM | ||
1019 | _L2_6d: | ||
1020 | |||
1021 | # | ||
1022 | # Result is now in FP0 | ||
1023 | # | ||
1024 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
1025 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
1026 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
1027 | unlk %a6 | ||
1028 | rts | ||
1029 | |||
1030 | global _fsinhx_ | ||
1031 | _fsinhx_: | ||
1032 | link %a6,&-LOCAL_SIZE | ||
1033 | |||
1034 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
1035 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
1036 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
1037 | |||
1038 | fmov.l &0x0,%fpcr # zero FPCR | ||
1039 | |||
1040 | # | ||
1041 | # copy, convert, and tag input argument | ||
1042 | # | ||
1043 | lea FP_SRC(%a6),%a0 | ||
1044 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | ||
1045 | mov.l 0x8+0x4(%a6),0x4(%a0) | ||
1046 | mov.l 0x8+0x8(%a6),0x8(%a0) | ||
1047 | bsr.l tag # fetch operand type | ||
1048 | mov.b %d0,STAG(%a6) | ||
1049 | mov.b %d0,%d1 | ||
1050 | |||
1051 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
1052 | |||
1053 | clr.l %d0 | ||
1054 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
1055 | |||
1056 | tst.b %d1 | ||
1057 | bne.b _L2_2x | ||
1058 | bsr.l ssinh # operand is a NORM | ||
1059 | bra.b _L2_6x | ||
1060 | _L2_2x: | ||
1061 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
1062 | bne.b _L2_3x # no | ||
1063 | bsr.l src_zero # yes | ||
1064 | bra.b _L2_6x | ||
1065 | _L2_3x: | ||
1066 | cmpi.b %d1,&INF # is operand an INF? | ||
1067 | bne.b _L2_4x # no | ||
1068 | bsr.l src_inf # yes | ||
1069 | bra.b _L2_6x | ||
1070 | _L2_4x: | ||
1071 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
1072 | bne.b _L2_5x # no | ||
1073 | bsr.l src_qnan # yes | ||
1074 | bra.b _L2_6x | ||
1075 | _L2_5x: | ||
1076 | bsr.l ssinhd # operand is a DENORM | ||
1077 | _L2_6x: | ||
1078 | |||
1079 | # | ||
1080 | # Result is now in FP0 | ||
1081 | # | ||
1082 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
1083 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
1084 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
1085 | unlk %a6 | ||
1086 | rts | ||
1087 | |||
1088 | |||
1089 | ######################################################################### | ||
1090 | # MONADIC TEMPLATE # | ||
1091 | ######################################################################### | ||
1092 | global _flognp1s_ | ||
1093 | _flognp1s_: | ||
1094 | link %a6,&-LOCAL_SIZE | ||
1095 | |||
1096 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
1097 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
1098 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
1099 | |||
1100 | fmov.l &0x0,%fpcr # zero FPCR | ||
1101 | |||
1102 | # | ||
1103 | # copy, convert, and tag input argument | ||
1104 | # | ||
1105 | fmov.s 0x8(%a6),%fp0 # load sgl input | ||
1106 | fmov.x %fp0,FP_SRC(%a6) | ||
1107 | lea FP_SRC(%a6),%a0 | ||
1108 | bsr.l tag # fetch operand type | ||
1109 | mov.b %d0,STAG(%a6) | ||
1110 | mov.b %d0,%d1 | ||
1111 | |||
1112 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
1113 | |||
1114 | clr.l %d0 | ||
1115 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
1116 | |||
1117 | tst.b %d1 | ||
1118 | bne.b _L3_2s | ||
1119 | bsr.l slognp1 # operand is a NORM | ||
1120 | bra.b _L3_6s | ||
1121 | _L3_2s: | ||
1122 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
1123 | bne.b _L3_3s # no | ||
1124 | bsr.l src_zero # yes | ||
1125 | bra.b _L3_6s | ||
1126 | _L3_3s: | ||
1127 | cmpi.b %d1,&INF # is operand an INF? | ||
1128 | bne.b _L3_4s # no | ||
1129 | bsr.l sopr_inf # yes | ||
1130 | bra.b _L3_6s | ||
1131 | _L3_4s: | ||
1132 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
1133 | bne.b _L3_5s # no | ||
1134 | bsr.l src_qnan # yes | ||
1135 | bra.b _L3_6s | ||
1136 | _L3_5s: | ||
1137 | bsr.l slognp1d # operand is a DENORM | ||
1138 | _L3_6s: | ||
1139 | |||
1140 | # | ||
1141 | # Result is now in FP0 | ||
1142 | # | ||
1143 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
1144 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
1145 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
1146 | unlk %a6 | ||
1147 | rts | ||
1148 | |||
1149 | global _flognp1d_ | ||
1150 | _flognp1d_: | ||
1151 | link %a6,&-LOCAL_SIZE | ||
1152 | |||
1153 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
1154 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
1155 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
1156 | |||
1157 | fmov.l &0x0,%fpcr # zero FPCR | ||
1158 | |||
1159 | # | ||
1160 | # copy, convert, and tag input argument | ||
1161 | # | ||
1162 | fmov.d 0x8(%a6),%fp0 # load dbl input | ||
1163 | fmov.x %fp0,FP_SRC(%a6) | ||
1164 | lea FP_SRC(%a6),%a0 | ||
1165 | bsr.l tag # fetch operand type | ||
1166 | mov.b %d0,STAG(%a6) | ||
1167 | mov.b %d0,%d1 | ||
1168 | |||
1169 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
1170 | |||
1171 | clr.l %d0 | ||
1172 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
1173 | |||
1174 | mov.b %d1,STAG(%a6) | ||
1175 | tst.b %d1 | ||
1176 | bne.b _L3_2d | ||
1177 | bsr.l slognp1 # operand is a NORM | ||
1178 | bra.b _L3_6d | ||
1179 | _L3_2d: | ||
1180 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
1181 | bne.b _L3_3d # no | ||
1182 | bsr.l src_zero # yes | ||
1183 | bra.b _L3_6d | ||
1184 | _L3_3d: | ||
1185 | cmpi.b %d1,&INF # is operand an INF? | ||
1186 | bne.b _L3_4d # no | ||
1187 | bsr.l sopr_inf # yes | ||
1188 | bra.b _L3_6d | ||
1189 | _L3_4d: | ||
1190 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
1191 | bne.b _L3_5d # no | ||
1192 | bsr.l src_qnan # yes | ||
1193 | bra.b _L3_6d | ||
1194 | _L3_5d: | ||
1195 | bsr.l slognp1d # operand is a DENORM | ||
1196 | _L3_6d: | ||
1197 | |||
1198 | # | ||
1199 | # Result is now in FP0 | ||
1200 | # | ||
1201 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
1202 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
1203 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
1204 | unlk %a6 | ||
1205 | rts | ||
1206 | |||
1207 | global _flognp1x_ | ||
1208 | _flognp1x_: | ||
1209 | link %a6,&-LOCAL_SIZE | ||
1210 | |||
1211 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
1212 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
1213 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
1214 | |||
1215 | fmov.l &0x0,%fpcr # zero FPCR | ||
1216 | |||
1217 | # | ||
1218 | # copy, convert, and tag input argument | ||
1219 | # | ||
1220 | lea FP_SRC(%a6),%a0 | ||
1221 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | ||
1222 | mov.l 0x8+0x4(%a6),0x4(%a0) | ||
1223 | mov.l 0x8+0x8(%a6),0x8(%a0) | ||
1224 | bsr.l tag # fetch operand type | ||
1225 | mov.b %d0,STAG(%a6) | ||
1226 | mov.b %d0,%d1 | ||
1227 | |||
1228 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
1229 | |||
1230 | clr.l %d0 | ||
1231 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
1232 | |||
1233 | tst.b %d1 | ||
1234 | bne.b _L3_2x | ||
1235 | bsr.l slognp1 # operand is a NORM | ||
1236 | bra.b _L3_6x | ||
1237 | _L3_2x: | ||
1238 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
1239 | bne.b _L3_3x # no | ||
1240 | bsr.l src_zero # yes | ||
1241 | bra.b _L3_6x | ||
1242 | _L3_3x: | ||
1243 | cmpi.b %d1,&INF # is operand an INF? | ||
1244 | bne.b _L3_4x # no | ||
1245 | bsr.l sopr_inf # yes | ||
1246 | bra.b _L3_6x | ||
1247 | _L3_4x: | ||
1248 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
1249 | bne.b _L3_5x # no | ||
1250 | bsr.l src_qnan # yes | ||
1251 | bra.b _L3_6x | ||
1252 | _L3_5x: | ||
1253 | bsr.l slognp1d # operand is a DENORM | ||
1254 | _L3_6x: | ||
1255 | |||
1256 | # | ||
1257 | # Result is now in FP0 | ||
1258 | # | ||
1259 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
1260 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
1261 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
1262 | unlk %a6 | ||
1263 | rts | ||
1264 | |||
1265 | |||
1266 | ######################################################################### | ||
1267 | # MONADIC TEMPLATE # | ||
1268 | ######################################################################### | ||
1269 | global _fetoxm1s_ | ||
1270 | _fetoxm1s_: | ||
1271 | link %a6,&-LOCAL_SIZE | ||
1272 | |||
1273 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
1274 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
1275 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
1276 | |||
1277 | fmov.l &0x0,%fpcr # zero FPCR | ||
1278 | |||
1279 | # | ||
1280 | # copy, convert, and tag input argument | ||
1281 | # | ||
1282 | fmov.s 0x8(%a6),%fp0 # load sgl input | ||
1283 | fmov.x %fp0,FP_SRC(%a6) | ||
1284 | lea FP_SRC(%a6),%a0 | ||
1285 | bsr.l tag # fetch operand type | ||
1286 | mov.b %d0,STAG(%a6) | ||
1287 | mov.b %d0,%d1 | ||
1288 | |||
1289 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
1290 | |||
1291 | clr.l %d0 | ||
1292 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
1293 | |||
1294 | tst.b %d1 | ||
1295 | bne.b _L4_2s | ||
1296 | bsr.l setoxm1 # operand is a NORM | ||
1297 | bra.b _L4_6s | ||
1298 | _L4_2s: | ||
1299 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
1300 | bne.b _L4_3s # no | ||
1301 | bsr.l src_zero # yes | ||
1302 | bra.b _L4_6s | ||
1303 | _L4_3s: | ||
1304 | cmpi.b %d1,&INF # is operand an INF? | ||
1305 | bne.b _L4_4s # no | ||
1306 | bsr.l setoxm1i # yes | ||
1307 | bra.b _L4_6s | ||
1308 | _L4_4s: | ||
1309 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
1310 | bne.b _L4_5s # no | ||
1311 | bsr.l src_qnan # yes | ||
1312 | bra.b _L4_6s | ||
1313 | _L4_5s: | ||
1314 | bsr.l setoxm1d # operand is a DENORM | ||
1315 | _L4_6s: | ||
1316 | |||
1317 | # | ||
1318 | # Result is now in FP0 | ||
1319 | # | ||
1320 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
1321 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
1322 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
1323 | unlk %a6 | ||
1324 | rts | ||
1325 | |||
1326 | global _fetoxm1d_ | ||
1327 | _fetoxm1d_: | ||
1328 | link %a6,&-LOCAL_SIZE | ||
1329 | |||
1330 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
1331 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
1332 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
1333 | |||
1334 | fmov.l &0x0,%fpcr # zero FPCR | ||
1335 | |||
1336 | # | ||
1337 | # copy, convert, and tag input argument | ||
1338 | # | ||
1339 | fmov.d 0x8(%a6),%fp0 # load dbl input | ||
1340 | fmov.x %fp0,FP_SRC(%a6) | ||
1341 | lea FP_SRC(%a6),%a0 | ||
1342 | bsr.l tag # fetch operand type | ||
1343 | mov.b %d0,STAG(%a6) | ||
1344 | mov.b %d0,%d1 | ||
1345 | |||
1346 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
1347 | |||
1348 | clr.l %d0 | ||
1349 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
1350 | |||
1351 | mov.b %d1,STAG(%a6) | ||
1352 | tst.b %d1 | ||
1353 | bne.b _L4_2d | ||
1354 | bsr.l setoxm1 # operand is a NORM | ||
1355 | bra.b _L4_6d | ||
1356 | _L4_2d: | ||
1357 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
1358 | bne.b _L4_3d # no | ||
1359 | bsr.l src_zero # yes | ||
1360 | bra.b _L4_6d | ||
1361 | _L4_3d: | ||
1362 | cmpi.b %d1,&INF # is operand an INF? | ||
1363 | bne.b _L4_4d # no | ||
1364 | bsr.l setoxm1i # yes | ||
1365 | bra.b _L4_6d | ||
1366 | _L4_4d: | ||
1367 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
1368 | bne.b _L4_5d # no | ||
1369 | bsr.l src_qnan # yes | ||
1370 | bra.b _L4_6d | ||
1371 | _L4_5d: | ||
1372 | bsr.l setoxm1d # operand is a DENORM | ||
1373 | _L4_6d: | ||
1374 | |||
1375 | # | ||
1376 | # Result is now in FP0 | ||
1377 | # | ||
1378 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
1379 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
1380 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
1381 | unlk %a6 | ||
1382 | rts | ||
1383 | |||
1384 | global _fetoxm1x_ | ||
1385 | _fetoxm1x_: | ||
1386 | link %a6,&-LOCAL_SIZE | ||
1387 | |||
1388 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
1389 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
1390 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
1391 | |||
1392 | fmov.l &0x0,%fpcr # zero FPCR | ||
1393 | |||
1394 | # | ||
1395 | # copy, convert, and tag input argument | ||
1396 | # | ||
1397 | lea FP_SRC(%a6),%a0 | ||
1398 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | ||
1399 | mov.l 0x8+0x4(%a6),0x4(%a0) | ||
1400 | mov.l 0x8+0x8(%a6),0x8(%a0) | ||
1401 | bsr.l tag # fetch operand type | ||
1402 | mov.b %d0,STAG(%a6) | ||
1403 | mov.b %d0,%d1 | ||
1404 | |||
1405 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
1406 | |||
1407 | clr.l %d0 | ||
1408 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
1409 | |||
1410 | tst.b %d1 | ||
1411 | bne.b _L4_2x | ||
1412 | bsr.l setoxm1 # operand is a NORM | ||
1413 | bra.b _L4_6x | ||
1414 | _L4_2x: | ||
1415 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
1416 | bne.b _L4_3x # no | ||
1417 | bsr.l src_zero # yes | ||
1418 | bra.b _L4_6x | ||
1419 | _L4_3x: | ||
1420 | cmpi.b %d1,&INF # is operand an INF? | ||
1421 | bne.b _L4_4x # no | ||
1422 | bsr.l setoxm1i # yes | ||
1423 | bra.b _L4_6x | ||
1424 | _L4_4x: | ||
1425 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
1426 | bne.b _L4_5x # no | ||
1427 | bsr.l src_qnan # yes | ||
1428 | bra.b _L4_6x | ||
1429 | _L4_5x: | ||
1430 | bsr.l setoxm1d # operand is a DENORM | ||
1431 | _L4_6x: | ||
1432 | |||
1433 | # | ||
1434 | # Result is now in FP0 | ||
1435 | # | ||
1436 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
1437 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
1438 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
1439 | unlk %a6 | ||
1440 | rts | ||
1441 | |||
1442 | |||
1443 | ######################################################################### | ||
1444 | # MONADIC TEMPLATE # | ||
1445 | ######################################################################### | ||
1446 | global _ftanhs_ | ||
1447 | _ftanhs_: | ||
1448 | link %a6,&-LOCAL_SIZE | ||
1449 | |||
1450 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
1451 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
1452 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
1453 | |||
1454 | fmov.l &0x0,%fpcr # zero FPCR | ||
1455 | |||
1456 | # | ||
1457 | # copy, convert, and tag input argument | ||
1458 | # | ||
1459 | fmov.s 0x8(%a6),%fp0 # load sgl input | ||
1460 | fmov.x %fp0,FP_SRC(%a6) | ||
1461 | lea FP_SRC(%a6),%a0 | ||
1462 | bsr.l tag # fetch operand type | ||
1463 | mov.b %d0,STAG(%a6) | ||
1464 | mov.b %d0,%d1 | ||
1465 | |||
1466 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
1467 | |||
1468 | clr.l %d0 | ||
1469 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
1470 | |||
1471 | tst.b %d1 | ||
1472 | bne.b _L5_2s | ||
1473 | bsr.l stanh # operand is a NORM | ||
1474 | bra.b _L5_6s | ||
1475 | _L5_2s: | ||
1476 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
1477 | bne.b _L5_3s # no | ||
1478 | bsr.l src_zero # yes | ||
1479 | bra.b _L5_6s | ||
1480 | _L5_3s: | ||
1481 | cmpi.b %d1,&INF # is operand an INF? | ||
1482 | bne.b _L5_4s # no | ||
1483 | bsr.l src_one # yes | ||
1484 | bra.b _L5_6s | ||
1485 | _L5_4s: | ||
1486 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
1487 | bne.b _L5_5s # no | ||
1488 | bsr.l src_qnan # yes | ||
1489 | bra.b _L5_6s | ||
1490 | _L5_5s: | ||
1491 | bsr.l stanhd # operand is a DENORM | ||
1492 | _L5_6s: | ||
1493 | |||
1494 | # | ||
1495 | # Result is now in FP0 | ||
1496 | # | ||
1497 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
1498 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
1499 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
1500 | unlk %a6 | ||
1501 | rts | ||
1502 | |||
1503 | global _ftanhd_ | ||
1504 | _ftanhd_: | ||
1505 | link %a6,&-LOCAL_SIZE | ||
1506 | |||
1507 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
1508 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
1509 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
1510 | |||
1511 | fmov.l &0x0,%fpcr # zero FPCR | ||
1512 | |||
1513 | # | ||
1514 | # copy, convert, and tag input argument | ||
1515 | # | ||
1516 | fmov.d 0x8(%a6),%fp0 # load dbl input | ||
1517 | fmov.x %fp0,FP_SRC(%a6) | ||
1518 | lea FP_SRC(%a6),%a0 | ||
1519 | bsr.l tag # fetch operand type | ||
1520 | mov.b %d0,STAG(%a6) | ||
1521 | mov.b %d0,%d1 | ||
1522 | |||
1523 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
1524 | |||
1525 | clr.l %d0 | ||
1526 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
1527 | |||
1528 | mov.b %d1,STAG(%a6) | ||
1529 | tst.b %d1 | ||
1530 | bne.b _L5_2d | ||
1531 | bsr.l stanh # operand is a NORM | ||
1532 | bra.b _L5_6d | ||
1533 | _L5_2d: | ||
1534 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
1535 | bne.b _L5_3d # no | ||
1536 | bsr.l src_zero # yes | ||
1537 | bra.b _L5_6d | ||
1538 | _L5_3d: | ||
1539 | cmpi.b %d1,&INF # is operand an INF? | ||
1540 | bne.b _L5_4d # no | ||
1541 | bsr.l src_one # yes | ||
1542 | bra.b _L5_6d | ||
1543 | _L5_4d: | ||
1544 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
1545 | bne.b _L5_5d # no | ||
1546 | bsr.l src_qnan # yes | ||
1547 | bra.b _L5_6d | ||
1548 | _L5_5d: | ||
1549 | bsr.l stanhd # operand is a DENORM | ||
1550 | _L5_6d: | ||
1551 | |||
1552 | # | ||
1553 | # Result is now in FP0 | ||
1554 | # | ||
1555 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
1556 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
1557 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
1558 | unlk %a6 | ||
1559 | rts | ||
1560 | |||
1561 | global _ftanhx_ | ||
1562 | _ftanhx_: | ||
1563 | link %a6,&-LOCAL_SIZE | ||
1564 | |||
1565 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
1566 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
1567 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
1568 | |||
1569 | fmov.l &0x0,%fpcr # zero FPCR | ||
1570 | |||
1571 | # | ||
1572 | # copy, convert, and tag input argument | ||
1573 | # | ||
1574 | lea FP_SRC(%a6),%a0 | ||
1575 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | ||
1576 | mov.l 0x8+0x4(%a6),0x4(%a0) | ||
1577 | mov.l 0x8+0x8(%a6),0x8(%a0) | ||
1578 | bsr.l tag # fetch operand type | ||
1579 | mov.b %d0,STAG(%a6) | ||
1580 | mov.b %d0,%d1 | ||
1581 | |||
1582 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
1583 | |||
1584 | clr.l %d0 | ||
1585 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
1586 | |||
1587 | tst.b %d1 | ||
1588 | bne.b _L5_2x | ||
1589 | bsr.l stanh # operand is a NORM | ||
1590 | bra.b _L5_6x | ||
1591 | _L5_2x: | ||
1592 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
1593 | bne.b _L5_3x # no | ||
1594 | bsr.l src_zero # yes | ||
1595 | bra.b _L5_6x | ||
1596 | _L5_3x: | ||
1597 | cmpi.b %d1,&INF # is operand an INF? | ||
1598 | bne.b _L5_4x # no | ||
1599 | bsr.l src_one # yes | ||
1600 | bra.b _L5_6x | ||
1601 | _L5_4x: | ||
1602 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
1603 | bne.b _L5_5x # no | ||
1604 | bsr.l src_qnan # yes | ||
1605 | bra.b _L5_6x | ||
1606 | _L5_5x: | ||
1607 | bsr.l stanhd # operand is a DENORM | ||
1608 | _L5_6x: | ||
1609 | |||
1610 | # | ||
1611 | # Result is now in FP0 | ||
1612 | # | ||
1613 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
1614 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
1615 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
1616 | unlk %a6 | ||
1617 | rts | ||
1618 | |||
1619 | |||
1620 | ######################################################################### | ||
1621 | # MONADIC TEMPLATE # | ||
1622 | ######################################################################### | ||
1623 | global _fatans_ | ||
1624 | _fatans_: | ||
1625 | link %a6,&-LOCAL_SIZE | ||
1626 | |||
1627 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
1628 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
1629 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
1630 | |||
1631 | fmov.l &0x0,%fpcr # zero FPCR | ||
1632 | |||
1633 | # | ||
1634 | # copy, convert, and tag input argument | ||
1635 | # | ||
1636 | fmov.s 0x8(%a6),%fp0 # load sgl input | ||
1637 | fmov.x %fp0,FP_SRC(%a6) | ||
1638 | lea FP_SRC(%a6),%a0 | ||
1639 | bsr.l tag # fetch operand type | ||
1640 | mov.b %d0,STAG(%a6) | ||
1641 | mov.b %d0,%d1 | ||
1642 | |||
1643 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
1644 | |||
1645 | clr.l %d0 | ||
1646 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
1647 | |||
1648 | tst.b %d1 | ||
1649 | bne.b _L6_2s | ||
1650 | bsr.l satan # operand is a NORM | ||
1651 | bra.b _L6_6s | ||
1652 | _L6_2s: | ||
1653 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
1654 | bne.b _L6_3s # no | ||
1655 | bsr.l src_zero # yes | ||
1656 | bra.b _L6_6s | ||
1657 | _L6_3s: | ||
1658 | cmpi.b %d1,&INF # is operand an INF? | ||
1659 | bne.b _L6_4s # no | ||
1660 | bsr.l spi_2 # yes | ||
1661 | bra.b _L6_6s | ||
1662 | _L6_4s: | ||
1663 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
1664 | bne.b _L6_5s # no | ||
1665 | bsr.l src_qnan # yes | ||
1666 | bra.b _L6_6s | ||
1667 | _L6_5s: | ||
1668 | bsr.l satand # operand is a DENORM | ||
1669 | _L6_6s: | ||
1670 | |||
1671 | # | ||
1672 | # Result is now in FP0 | ||
1673 | # | ||
1674 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
1675 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
1676 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
1677 | unlk %a6 | ||
1678 | rts | ||
1679 | |||
1680 | global _fatand_ | ||
1681 | _fatand_: | ||
1682 | link %a6,&-LOCAL_SIZE | ||
1683 | |||
1684 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
1685 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
1686 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
1687 | |||
1688 | fmov.l &0x0,%fpcr # zero FPCR | ||
1689 | |||
1690 | # | ||
1691 | # copy, convert, and tag input argument | ||
1692 | # | ||
1693 | fmov.d 0x8(%a6),%fp0 # load dbl input | ||
1694 | fmov.x %fp0,FP_SRC(%a6) | ||
1695 | lea FP_SRC(%a6),%a0 | ||
1696 | bsr.l tag # fetch operand type | ||
1697 | mov.b %d0,STAG(%a6) | ||
1698 | mov.b %d0,%d1 | ||
1699 | |||
1700 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
1701 | |||
1702 | clr.l %d0 | ||
1703 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
1704 | |||
1705 | mov.b %d1,STAG(%a6) | ||
1706 | tst.b %d1 | ||
1707 | bne.b _L6_2d | ||
1708 | bsr.l satan # operand is a NORM | ||
1709 | bra.b _L6_6d | ||
1710 | _L6_2d: | ||
1711 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
1712 | bne.b _L6_3d # no | ||
1713 | bsr.l src_zero # yes | ||
1714 | bra.b _L6_6d | ||
1715 | _L6_3d: | ||
1716 | cmpi.b %d1,&INF # is operand an INF? | ||
1717 | bne.b _L6_4d # no | ||
1718 | bsr.l spi_2 # yes | ||
1719 | bra.b _L6_6d | ||
1720 | _L6_4d: | ||
1721 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
1722 | bne.b _L6_5d # no | ||
1723 | bsr.l src_qnan # yes | ||
1724 | bra.b _L6_6d | ||
1725 | _L6_5d: | ||
1726 | bsr.l satand # operand is a DENORM | ||
1727 | _L6_6d: | ||
1728 | |||
1729 | # | ||
1730 | # Result is now in FP0 | ||
1731 | # | ||
1732 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
1733 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
1734 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
1735 | unlk %a6 | ||
1736 | rts | ||
1737 | |||
1738 | global _fatanx_ | ||
1739 | _fatanx_: | ||
1740 | link %a6,&-LOCAL_SIZE | ||
1741 | |||
1742 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
1743 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
1744 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
1745 | |||
1746 | fmov.l &0x0,%fpcr # zero FPCR | ||
1747 | |||
1748 | # | ||
1749 | # copy, convert, and tag input argument | ||
1750 | # | ||
1751 | lea FP_SRC(%a6),%a0 | ||
1752 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | ||
1753 | mov.l 0x8+0x4(%a6),0x4(%a0) | ||
1754 | mov.l 0x8+0x8(%a6),0x8(%a0) | ||
1755 | bsr.l tag # fetch operand type | ||
1756 | mov.b %d0,STAG(%a6) | ||
1757 | mov.b %d0,%d1 | ||
1758 | |||
1759 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
1760 | |||
1761 | clr.l %d0 | ||
1762 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
1763 | |||
1764 | tst.b %d1 | ||
1765 | bne.b _L6_2x | ||
1766 | bsr.l satan # operand is a NORM | ||
1767 | bra.b _L6_6x | ||
1768 | _L6_2x: | ||
1769 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
1770 | bne.b _L6_3x # no | ||
1771 | bsr.l src_zero # yes | ||
1772 | bra.b _L6_6x | ||
1773 | _L6_3x: | ||
1774 | cmpi.b %d1,&INF # is operand an INF? | ||
1775 | bne.b _L6_4x # no | ||
1776 | bsr.l spi_2 # yes | ||
1777 | bra.b _L6_6x | ||
1778 | _L6_4x: | ||
1779 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
1780 | bne.b _L6_5x # no | ||
1781 | bsr.l src_qnan # yes | ||
1782 | bra.b _L6_6x | ||
1783 | _L6_5x: | ||
1784 | bsr.l satand # operand is a DENORM | ||
1785 | _L6_6x: | ||
1786 | |||
1787 | # | ||
1788 | # Result is now in FP0 | ||
1789 | # | ||
1790 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
1791 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
1792 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
1793 | unlk %a6 | ||
1794 | rts | ||
1795 | |||
1796 | |||
1797 | ######################################################################### | ||
1798 | # MONADIC TEMPLATE # | ||
1799 | ######################################################################### | ||
1800 | global _fasins_ | ||
1801 | _fasins_: | ||
1802 | link %a6,&-LOCAL_SIZE | ||
1803 | |||
1804 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
1805 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
1806 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
1807 | |||
1808 | fmov.l &0x0,%fpcr # zero FPCR | ||
1809 | |||
1810 | # | ||
1811 | # copy, convert, and tag input argument | ||
1812 | # | ||
1813 | fmov.s 0x8(%a6),%fp0 # load sgl input | ||
1814 | fmov.x %fp0,FP_SRC(%a6) | ||
1815 | lea FP_SRC(%a6),%a0 | ||
1816 | bsr.l tag # fetch operand type | ||
1817 | mov.b %d0,STAG(%a6) | ||
1818 | mov.b %d0,%d1 | ||
1819 | |||
1820 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
1821 | |||
1822 | clr.l %d0 | ||
1823 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
1824 | |||
1825 | tst.b %d1 | ||
1826 | bne.b _L7_2s | ||
1827 | bsr.l sasin # operand is a NORM | ||
1828 | bra.b _L7_6s | ||
1829 | _L7_2s: | ||
1830 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
1831 | bne.b _L7_3s # no | ||
1832 | bsr.l src_zero # yes | ||
1833 | bra.b _L7_6s | ||
1834 | _L7_3s: | ||
1835 | cmpi.b %d1,&INF # is operand an INF? | ||
1836 | bne.b _L7_4s # no | ||
1837 | bsr.l t_operr # yes | ||
1838 | bra.b _L7_6s | ||
1839 | _L7_4s: | ||
1840 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
1841 | bne.b _L7_5s # no | ||
1842 | bsr.l src_qnan # yes | ||
1843 | bra.b _L7_6s | ||
1844 | _L7_5s: | ||
1845 | bsr.l sasind # operand is a DENORM | ||
1846 | _L7_6s: | ||
1847 | |||
1848 | # | ||
1849 | # Result is now in FP0 | ||
1850 | # | ||
1851 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
1852 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
1853 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
1854 | unlk %a6 | ||
1855 | rts | ||
1856 | |||
1857 | global _fasind_ | ||
1858 | _fasind_: | ||
1859 | link %a6,&-LOCAL_SIZE | ||
1860 | |||
1861 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
1862 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
1863 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
1864 | |||
1865 | fmov.l &0x0,%fpcr # zero FPCR | ||
1866 | |||
1867 | # | ||
1868 | # copy, convert, and tag input argument | ||
1869 | # | ||
1870 | fmov.d 0x8(%a6),%fp0 # load dbl input | ||
1871 | fmov.x %fp0,FP_SRC(%a6) | ||
1872 | lea FP_SRC(%a6),%a0 | ||
1873 | bsr.l tag # fetch operand type | ||
1874 | mov.b %d0,STAG(%a6) | ||
1875 | mov.b %d0,%d1 | ||
1876 | |||
1877 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
1878 | |||
1879 | clr.l %d0 | ||
1880 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
1881 | |||
1882 | mov.b %d1,STAG(%a6) | ||
1883 | tst.b %d1 | ||
1884 | bne.b _L7_2d | ||
1885 | bsr.l sasin # operand is a NORM | ||
1886 | bra.b _L7_6d | ||
1887 | _L7_2d: | ||
1888 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
1889 | bne.b _L7_3d # no | ||
1890 | bsr.l src_zero # yes | ||
1891 | bra.b _L7_6d | ||
1892 | _L7_3d: | ||
1893 | cmpi.b %d1,&INF # is operand an INF? | ||
1894 | bne.b _L7_4d # no | ||
1895 | bsr.l t_operr # yes | ||
1896 | bra.b _L7_6d | ||
1897 | _L7_4d: | ||
1898 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
1899 | bne.b _L7_5d # no | ||
1900 | bsr.l src_qnan # yes | ||
1901 | bra.b _L7_6d | ||
1902 | _L7_5d: | ||
1903 | bsr.l sasind # operand is a DENORM | ||
1904 | _L7_6d: | ||
1905 | |||
1906 | # | ||
1907 | # Result is now in FP0 | ||
1908 | # | ||
1909 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
1910 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
1911 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
1912 | unlk %a6 | ||
1913 | rts | ||
1914 | |||
1915 | global _fasinx_ | ||
1916 | _fasinx_: | ||
1917 | link %a6,&-LOCAL_SIZE | ||
1918 | |||
1919 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
1920 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
1921 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
1922 | |||
1923 | fmov.l &0x0,%fpcr # zero FPCR | ||
1924 | |||
1925 | # | ||
1926 | # copy, convert, and tag input argument | ||
1927 | # | ||
1928 | lea FP_SRC(%a6),%a0 | ||
1929 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | ||
1930 | mov.l 0x8+0x4(%a6),0x4(%a0) | ||
1931 | mov.l 0x8+0x8(%a6),0x8(%a0) | ||
1932 | bsr.l tag # fetch operand type | ||
1933 | mov.b %d0,STAG(%a6) | ||
1934 | mov.b %d0,%d1 | ||
1935 | |||
1936 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
1937 | |||
1938 | clr.l %d0 | ||
1939 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
1940 | |||
1941 | tst.b %d1 | ||
1942 | bne.b _L7_2x | ||
1943 | bsr.l sasin # operand is a NORM | ||
1944 | bra.b _L7_6x | ||
1945 | _L7_2x: | ||
1946 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
1947 | bne.b _L7_3x # no | ||
1948 | bsr.l src_zero # yes | ||
1949 | bra.b _L7_6x | ||
1950 | _L7_3x: | ||
1951 | cmpi.b %d1,&INF # is operand an INF? | ||
1952 | bne.b _L7_4x # no | ||
1953 | bsr.l t_operr # yes | ||
1954 | bra.b _L7_6x | ||
1955 | _L7_4x: | ||
1956 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
1957 | bne.b _L7_5x # no | ||
1958 | bsr.l src_qnan # yes | ||
1959 | bra.b _L7_6x | ||
1960 | _L7_5x: | ||
1961 | bsr.l sasind # operand is a DENORM | ||
1962 | _L7_6x: | ||
1963 | |||
1964 | # | ||
1965 | # Result is now in FP0 | ||
1966 | # | ||
1967 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
1968 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
1969 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
1970 | unlk %a6 | ||
1971 | rts | ||
1972 | |||
1973 | |||
1974 | ######################################################################### | ||
1975 | # MONADIC TEMPLATE # | ||
1976 | ######################################################################### | ||
1977 | global _fatanhs_ | ||
1978 | _fatanhs_: | ||
1979 | link %a6,&-LOCAL_SIZE | ||
1980 | |||
1981 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
1982 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
1983 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
1984 | |||
1985 | fmov.l &0x0,%fpcr # zero FPCR | ||
1986 | |||
1987 | # | ||
1988 | # copy, convert, and tag input argument | ||
1989 | # | ||
1990 | fmov.s 0x8(%a6),%fp0 # load sgl input | ||
1991 | fmov.x %fp0,FP_SRC(%a6) | ||
1992 | lea FP_SRC(%a6),%a0 | ||
1993 | bsr.l tag # fetch operand type | ||
1994 | mov.b %d0,STAG(%a6) | ||
1995 | mov.b %d0,%d1 | ||
1996 | |||
1997 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
1998 | |||
1999 | clr.l %d0 | ||
2000 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
2001 | |||
2002 | tst.b %d1 | ||
2003 | bne.b _L8_2s | ||
2004 | bsr.l satanh # operand is a NORM | ||
2005 | bra.b _L8_6s | ||
2006 | _L8_2s: | ||
2007 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
2008 | bne.b _L8_3s # no | ||
2009 | bsr.l src_zero # yes | ||
2010 | bra.b _L8_6s | ||
2011 | _L8_3s: | ||
2012 | cmpi.b %d1,&INF # is operand an INF? | ||
2013 | bne.b _L8_4s # no | ||
2014 | bsr.l t_operr # yes | ||
2015 | bra.b _L8_6s | ||
2016 | _L8_4s: | ||
2017 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
2018 | bne.b _L8_5s # no | ||
2019 | bsr.l src_qnan # yes | ||
2020 | bra.b _L8_6s | ||
2021 | _L8_5s: | ||
2022 | bsr.l satanhd # operand is a DENORM | ||
2023 | _L8_6s: | ||
2024 | |||
2025 | # | ||
2026 | # Result is now in FP0 | ||
2027 | # | ||
2028 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
2029 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
2030 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
2031 | unlk %a6 | ||
2032 | rts | ||
2033 | |||
2034 | global _fatanhd_ | ||
2035 | _fatanhd_: | ||
2036 | link %a6,&-LOCAL_SIZE | ||
2037 | |||
2038 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
2039 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
2040 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
2041 | |||
2042 | fmov.l &0x0,%fpcr # zero FPCR | ||
2043 | |||
2044 | # | ||
2045 | # copy, convert, and tag input argument | ||
2046 | # | ||
2047 | fmov.d 0x8(%a6),%fp0 # load dbl input | ||
2048 | fmov.x %fp0,FP_SRC(%a6) | ||
2049 | lea FP_SRC(%a6),%a0 | ||
2050 | bsr.l tag # fetch operand type | ||
2051 | mov.b %d0,STAG(%a6) | ||
2052 | mov.b %d0,%d1 | ||
2053 | |||
2054 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
2055 | |||
2056 | clr.l %d0 | ||
2057 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
2058 | |||
2059 | mov.b %d1,STAG(%a6) | ||
2060 | tst.b %d1 | ||
2061 | bne.b _L8_2d | ||
2062 | bsr.l satanh # operand is a NORM | ||
2063 | bra.b _L8_6d | ||
2064 | _L8_2d: | ||
2065 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
2066 | bne.b _L8_3d # no | ||
2067 | bsr.l src_zero # yes | ||
2068 | bra.b _L8_6d | ||
2069 | _L8_3d: | ||
2070 | cmpi.b %d1,&INF # is operand an INF? | ||
2071 | bne.b _L8_4d # no | ||
2072 | bsr.l t_operr # yes | ||
2073 | bra.b _L8_6d | ||
2074 | _L8_4d: | ||
2075 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
2076 | bne.b _L8_5d # no | ||
2077 | bsr.l src_qnan # yes | ||
2078 | bra.b _L8_6d | ||
2079 | _L8_5d: | ||
2080 | bsr.l satanhd # operand is a DENORM | ||
2081 | _L8_6d: | ||
2082 | |||
2083 | # | ||
2084 | # Result is now in FP0 | ||
2085 | # | ||
2086 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
2087 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
2088 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
2089 | unlk %a6 | ||
2090 | rts | ||
2091 | |||
2092 | global _fatanhx_ | ||
2093 | _fatanhx_: | ||
2094 | link %a6,&-LOCAL_SIZE | ||
2095 | |||
2096 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
2097 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
2098 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
2099 | |||
2100 | fmov.l &0x0,%fpcr # zero FPCR | ||
2101 | |||
2102 | # | ||
2103 | # copy, convert, and tag input argument | ||
2104 | # | ||
2105 | lea FP_SRC(%a6),%a0 | ||
2106 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | ||
2107 | mov.l 0x8+0x4(%a6),0x4(%a0) | ||
2108 | mov.l 0x8+0x8(%a6),0x8(%a0) | ||
2109 | bsr.l tag # fetch operand type | ||
2110 | mov.b %d0,STAG(%a6) | ||
2111 | mov.b %d0,%d1 | ||
2112 | |||
2113 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
2114 | |||
2115 | clr.l %d0 | ||
2116 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
2117 | |||
2118 | tst.b %d1 | ||
2119 | bne.b _L8_2x | ||
2120 | bsr.l satanh # operand is a NORM | ||
2121 | bra.b _L8_6x | ||
2122 | _L8_2x: | ||
2123 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
2124 | bne.b _L8_3x # no | ||
2125 | bsr.l src_zero # yes | ||
2126 | bra.b _L8_6x | ||
2127 | _L8_3x: | ||
2128 | cmpi.b %d1,&INF # is operand an INF? | ||
2129 | bne.b _L8_4x # no | ||
2130 | bsr.l t_operr # yes | ||
2131 | bra.b _L8_6x | ||
2132 | _L8_4x: | ||
2133 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
2134 | bne.b _L8_5x # no | ||
2135 | bsr.l src_qnan # yes | ||
2136 | bra.b _L8_6x | ||
2137 | _L8_5x: | ||
2138 | bsr.l satanhd # operand is a DENORM | ||
2139 | _L8_6x: | ||
2140 | |||
2141 | # | ||
2142 | # Result is now in FP0 | ||
2143 | # | ||
2144 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
2145 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
2146 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
2147 | unlk %a6 | ||
2148 | rts | ||
2149 | |||
2150 | |||
2151 | ######################################################################### | ||
2152 | # MONADIC TEMPLATE # | ||
2153 | ######################################################################### | ||
2154 | global _ftans_ | ||
2155 | _ftans_: | ||
2156 | link %a6,&-LOCAL_SIZE | ||
2157 | |||
2158 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
2159 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
2160 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
2161 | |||
2162 | fmov.l &0x0,%fpcr # zero FPCR | ||
2163 | |||
2164 | # | ||
2165 | # copy, convert, and tag input argument | ||
2166 | # | ||
2167 | fmov.s 0x8(%a6),%fp0 # load sgl input | ||
2168 | fmov.x %fp0,FP_SRC(%a6) | ||
2169 | lea FP_SRC(%a6),%a0 | ||
2170 | bsr.l tag # fetch operand type | ||
2171 | mov.b %d0,STAG(%a6) | ||
2172 | mov.b %d0,%d1 | ||
2173 | |||
2174 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
2175 | |||
2176 | clr.l %d0 | ||
2177 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
2178 | |||
2179 | tst.b %d1 | ||
2180 | bne.b _L9_2s | ||
2181 | bsr.l stan # operand is a NORM | ||
2182 | bra.b _L9_6s | ||
2183 | _L9_2s: | ||
2184 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
2185 | bne.b _L9_3s # no | ||
2186 | bsr.l src_zero # yes | ||
2187 | bra.b _L9_6s | ||
2188 | _L9_3s: | ||
2189 | cmpi.b %d1,&INF # is operand an INF? | ||
2190 | bne.b _L9_4s # no | ||
2191 | bsr.l t_operr # yes | ||
2192 | bra.b _L9_6s | ||
2193 | _L9_4s: | ||
2194 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
2195 | bne.b _L9_5s # no | ||
2196 | bsr.l src_qnan # yes | ||
2197 | bra.b _L9_6s | ||
2198 | _L9_5s: | ||
2199 | bsr.l stand # operand is a DENORM | ||
2200 | _L9_6s: | ||
2201 | |||
2202 | # | ||
2203 | # Result is now in FP0 | ||
2204 | # | ||
2205 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
2206 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
2207 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
2208 | unlk %a6 | ||
2209 | rts | ||
2210 | |||
2211 | global _ftand_ | ||
2212 | _ftand_: | ||
2213 | link %a6,&-LOCAL_SIZE | ||
2214 | |||
2215 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
2216 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
2217 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
2218 | |||
2219 | fmov.l &0x0,%fpcr # zero FPCR | ||
2220 | |||
2221 | # | ||
2222 | # copy, convert, and tag input argument | ||
2223 | # | ||
2224 | fmov.d 0x8(%a6),%fp0 # load dbl input | ||
2225 | fmov.x %fp0,FP_SRC(%a6) | ||
2226 | lea FP_SRC(%a6),%a0 | ||
2227 | bsr.l tag # fetch operand type | ||
2228 | mov.b %d0,STAG(%a6) | ||
2229 | mov.b %d0,%d1 | ||
2230 | |||
2231 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
2232 | |||
2233 | clr.l %d0 | ||
2234 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
2235 | |||
2236 | mov.b %d1,STAG(%a6) | ||
2237 | tst.b %d1 | ||
2238 | bne.b _L9_2d | ||
2239 | bsr.l stan # operand is a NORM | ||
2240 | bra.b _L9_6d | ||
2241 | _L9_2d: | ||
2242 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
2243 | bne.b _L9_3d # no | ||
2244 | bsr.l src_zero # yes | ||
2245 | bra.b _L9_6d | ||
2246 | _L9_3d: | ||
2247 | cmpi.b %d1,&INF # is operand an INF? | ||
2248 | bne.b _L9_4d # no | ||
2249 | bsr.l t_operr # yes | ||
2250 | bra.b _L9_6d | ||
2251 | _L9_4d: | ||
2252 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
2253 | bne.b _L9_5d # no | ||
2254 | bsr.l src_qnan # yes | ||
2255 | bra.b _L9_6d | ||
2256 | _L9_5d: | ||
2257 | bsr.l stand # operand is a DENORM | ||
2258 | _L9_6d: | ||
2259 | |||
2260 | # | ||
2261 | # Result is now in FP0 | ||
2262 | # | ||
2263 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
2264 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
2265 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
2266 | unlk %a6 | ||
2267 | rts | ||
2268 | |||
2269 | global _ftanx_ | ||
2270 | _ftanx_: | ||
2271 | link %a6,&-LOCAL_SIZE | ||
2272 | |||
2273 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
2274 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
2275 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
2276 | |||
2277 | fmov.l &0x0,%fpcr # zero FPCR | ||
2278 | |||
2279 | # | ||
2280 | # copy, convert, and tag input argument | ||
2281 | # | ||
2282 | lea FP_SRC(%a6),%a0 | ||
2283 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | ||
2284 | mov.l 0x8+0x4(%a6),0x4(%a0) | ||
2285 | mov.l 0x8+0x8(%a6),0x8(%a0) | ||
2286 | bsr.l tag # fetch operand type | ||
2287 | mov.b %d0,STAG(%a6) | ||
2288 | mov.b %d0,%d1 | ||
2289 | |||
2290 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
2291 | |||
2292 | clr.l %d0 | ||
2293 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
2294 | |||
2295 | tst.b %d1 | ||
2296 | bne.b _L9_2x | ||
2297 | bsr.l stan # operand is a NORM | ||
2298 | bra.b _L9_6x | ||
2299 | _L9_2x: | ||
2300 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
2301 | bne.b _L9_3x # no | ||
2302 | bsr.l src_zero # yes | ||
2303 | bra.b _L9_6x | ||
2304 | _L9_3x: | ||
2305 | cmpi.b %d1,&INF # is operand an INF? | ||
2306 | bne.b _L9_4x # no | ||
2307 | bsr.l t_operr # yes | ||
2308 | bra.b _L9_6x | ||
2309 | _L9_4x: | ||
2310 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
2311 | bne.b _L9_5x # no | ||
2312 | bsr.l src_qnan # yes | ||
2313 | bra.b _L9_6x | ||
2314 | _L9_5x: | ||
2315 | bsr.l stand # operand is a DENORM | ||
2316 | _L9_6x: | ||
2317 | |||
2318 | # | ||
2319 | # Result is now in FP0 | ||
2320 | # | ||
2321 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
2322 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
2323 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
2324 | unlk %a6 | ||
2325 | rts | ||
2326 | |||
2327 | |||
2328 | ######################################################################### | ||
2329 | # MONADIC TEMPLATE # | ||
2330 | ######################################################################### | ||
2331 | global _fetoxs_ | ||
2332 | _fetoxs_: | ||
2333 | link %a6,&-LOCAL_SIZE | ||
2334 | |||
2335 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
2336 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
2337 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
2338 | |||
2339 | fmov.l &0x0,%fpcr # zero FPCR | ||
2340 | |||
2341 | # | ||
2342 | # copy, convert, and tag input argument | ||
2343 | # | ||
2344 | fmov.s 0x8(%a6),%fp0 # load sgl input | ||
2345 | fmov.x %fp0,FP_SRC(%a6) | ||
2346 | lea FP_SRC(%a6),%a0 | ||
2347 | bsr.l tag # fetch operand type | ||
2348 | mov.b %d0,STAG(%a6) | ||
2349 | mov.b %d0,%d1 | ||
2350 | |||
2351 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
2352 | |||
2353 | clr.l %d0 | ||
2354 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
2355 | |||
2356 | tst.b %d1 | ||
2357 | bne.b _L10_2s | ||
2358 | bsr.l setox # operand is a NORM | ||
2359 | bra.b _L10_6s | ||
2360 | _L10_2s: | ||
2361 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
2362 | bne.b _L10_3s # no | ||
2363 | bsr.l ld_pone # yes | ||
2364 | bra.b _L10_6s | ||
2365 | _L10_3s: | ||
2366 | cmpi.b %d1,&INF # is operand an INF? | ||
2367 | bne.b _L10_4s # no | ||
2368 | bsr.l szr_inf # yes | ||
2369 | bra.b _L10_6s | ||
2370 | _L10_4s: | ||
2371 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
2372 | bne.b _L10_5s # no | ||
2373 | bsr.l src_qnan # yes | ||
2374 | bra.b _L10_6s | ||
2375 | _L10_5s: | ||
2376 | bsr.l setoxd # operand is a DENORM | ||
2377 | _L10_6s: | ||
2378 | |||
2379 | # | ||
2380 | # Result is now in FP0 | ||
2381 | # | ||
2382 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
2383 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
2384 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
2385 | unlk %a6 | ||
2386 | rts | ||
2387 | |||
2388 | global _fetoxd_ | ||
2389 | _fetoxd_: | ||
2390 | link %a6,&-LOCAL_SIZE | ||
2391 | |||
2392 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
2393 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
2394 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
2395 | |||
2396 | fmov.l &0x0,%fpcr # zero FPCR | ||
2397 | |||
2398 | # | ||
2399 | # copy, convert, and tag input argument | ||
2400 | # | ||
2401 | fmov.d 0x8(%a6),%fp0 # load dbl input | ||
2402 | fmov.x %fp0,FP_SRC(%a6) | ||
2403 | lea FP_SRC(%a6),%a0 | ||
2404 | bsr.l tag # fetch operand type | ||
2405 | mov.b %d0,STAG(%a6) | ||
2406 | mov.b %d0,%d1 | ||
2407 | |||
2408 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
2409 | |||
2410 | clr.l %d0 | ||
2411 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
2412 | |||
2413 | mov.b %d1,STAG(%a6) | ||
2414 | tst.b %d1 | ||
2415 | bne.b _L10_2d | ||
2416 | bsr.l setox # operand is a NORM | ||
2417 | bra.b _L10_6d | ||
2418 | _L10_2d: | ||
2419 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
2420 | bne.b _L10_3d # no | ||
2421 | bsr.l ld_pone # yes | ||
2422 | bra.b _L10_6d | ||
2423 | _L10_3d: | ||
2424 | cmpi.b %d1,&INF # is operand an INF? | ||
2425 | bne.b _L10_4d # no | ||
2426 | bsr.l szr_inf # yes | ||
2427 | bra.b _L10_6d | ||
2428 | _L10_4d: | ||
2429 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
2430 | bne.b _L10_5d # no | ||
2431 | bsr.l src_qnan # yes | ||
2432 | bra.b _L10_6d | ||
2433 | _L10_5d: | ||
2434 | bsr.l setoxd # operand is a DENORM | ||
2435 | _L10_6d: | ||
2436 | |||
2437 | # | ||
2438 | # Result is now in FP0 | ||
2439 | # | ||
2440 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
2441 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
2442 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
2443 | unlk %a6 | ||
2444 | rts | ||
2445 | |||
2446 | global _fetoxx_ | ||
2447 | _fetoxx_: | ||
2448 | link %a6,&-LOCAL_SIZE | ||
2449 | |||
2450 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
2451 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
2452 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
2453 | |||
2454 | fmov.l &0x0,%fpcr # zero FPCR | ||
2455 | |||
2456 | # | ||
2457 | # copy, convert, and tag input argument | ||
2458 | # | ||
2459 | lea FP_SRC(%a6),%a0 | ||
2460 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | ||
2461 | mov.l 0x8+0x4(%a6),0x4(%a0) | ||
2462 | mov.l 0x8+0x8(%a6),0x8(%a0) | ||
2463 | bsr.l tag # fetch operand type | ||
2464 | mov.b %d0,STAG(%a6) | ||
2465 | mov.b %d0,%d1 | ||
2466 | |||
2467 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
2468 | |||
2469 | clr.l %d0 | ||
2470 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
2471 | |||
2472 | tst.b %d1 | ||
2473 | bne.b _L10_2x | ||
2474 | bsr.l setox # operand is a NORM | ||
2475 | bra.b _L10_6x | ||
2476 | _L10_2x: | ||
2477 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
2478 | bne.b _L10_3x # no | ||
2479 | bsr.l ld_pone # yes | ||
2480 | bra.b _L10_6x | ||
2481 | _L10_3x: | ||
2482 | cmpi.b %d1,&INF # is operand an INF? | ||
2483 | bne.b _L10_4x # no | ||
2484 | bsr.l szr_inf # yes | ||
2485 | bra.b _L10_6x | ||
2486 | _L10_4x: | ||
2487 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
2488 | bne.b _L10_5x # no | ||
2489 | bsr.l src_qnan # yes | ||
2490 | bra.b _L10_6x | ||
2491 | _L10_5x: | ||
2492 | bsr.l setoxd # operand is a DENORM | ||
2493 | _L10_6x: | ||
2494 | |||
2495 | # | ||
2496 | # Result is now in FP0 | ||
2497 | # | ||
2498 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
2499 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
2500 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
2501 | unlk %a6 | ||
2502 | rts | ||
2503 | |||
2504 | |||
2505 | ######################################################################### | ||
2506 | # MONADIC TEMPLATE # | ||
2507 | ######################################################################### | ||
2508 | global _ftwotoxs_ | ||
2509 | _ftwotoxs_: | ||
2510 | link %a6,&-LOCAL_SIZE | ||
2511 | |||
2512 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
2513 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
2514 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
2515 | |||
2516 | fmov.l &0x0,%fpcr # zero FPCR | ||
2517 | |||
2518 | # | ||
2519 | # copy, convert, and tag input argument | ||
2520 | # | ||
2521 | fmov.s 0x8(%a6),%fp0 # load sgl input | ||
2522 | fmov.x %fp0,FP_SRC(%a6) | ||
2523 | lea FP_SRC(%a6),%a0 | ||
2524 | bsr.l tag # fetch operand type | ||
2525 | mov.b %d0,STAG(%a6) | ||
2526 | mov.b %d0,%d1 | ||
2527 | |||
2528 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
2529 | |||
2530 | clr.l %d0 | ||
2531 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
2532 | |||
2533 | tst.b %d1 | ||
2534 | bne.b _L11_2s | ||
2535 | bsr.l stwotox # operand is a NORM | ||
2536 | bra.b _L11_6s | ||
2537 | _L11_2s: | ||
2538 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
2539 | bne.b _L11_3s # no | ||
2540 | bsr.l ld_pone # yes | ||
2541 | bra.b _L11_6s | ||
2542 | _L11_3s: | ||
2543 | cmpi.b %d1,&INF # is operand an INF? | ||
2544 | bne.b _L11_4s # no | ||
2545 | bsr.l szr_inf # yes | ||
2546 | bra.b _L11_6s | ||
2547 | _L11_4s: | ||
2548 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
2549 | bne.b _L11_5s # no | ||
2550 | bsr.l src_qnan # yes | ||
2551 | bra.b _L11_6s | ||
2552 | _L11_5s: | ||
2553 | bsr.l stwotoxd # operand is a DENORM | ||
2554 | _L11_6s: | ||
2555 | |||
2556 | # | ||
2557 | # Result is now in FP0 | ||
2558 | # | ||
2559 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
2560 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
2561 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
2562 | unlk %a6 | ||
2563 | rts | ||
2564 | |||
2565 | global _ftwotoxd_ | ||
2566 | _ftwotoxd_: | ||
2567 | link %a6,&-LOCAL_SIZE | ||
2568 | |||
2569 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
2570 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
2571 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
2572 | |||
2573 | fmov.l &0x0,%fpcr # zero FPCR | ||
2574 | |||
2575 | # | ||
2576 | # copy, convert, and tag input argument | ||
2577 | # | ||
2578 | fmov.d 0x8(%a6),%fp0 # load dbl input | ||
2579 | fmov.x %fp0,FP_SRC(%a6) | ||
2580 | lea FP_SRC(%a6),%a0 | ||
2581 | bsr.l tag # fetch operand type | ||
2582 | mov.b %d0,STAG(%a6) | ||
2583 | mov.b %d0,%d1 | ||
2584 | |||
2585 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
2586 | |||
2587 | clr.l %d0 | ||
2588 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
2589 | |||
2590 | mov.b %d1,STAG(%a6) | ||
2591 | tst.b %d1 | ||
2592 | bne.b _L11_2d | ||
2593 | bsr.l stwotox # operand is a NORM | ||
2594 | bra.b _L11_6d | ||
2595 | _L11_2d: | ||
2596 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
2597 | bne.b _L11_3d # no | ||
2598 | bsr.l ld_pone # yes | ||
2599 | bra.b _L11_6d | ||
2600 | _L11_3d: | ||
2601 | cmpi.b %d1,&INF # is operand an INF? | ||
2602 | bne.b _L11_4d # no | ||
2603 | bsr.l szr_inf # yes | ||
2604 | bra.b _L11_6d | ||
2605 | _L11_4d: | ||
2606 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
2607 | bne.b _L11_5d # no | ||
2608 | bsr.l src_qnan # yes | ||
2609 | bra.b _L11_6d | ||
2610 | _L11_5d: | ||
2611 | bsr.l stwotoxd # operand is a DENORM | ||
2612 | _L11_6d: | ||
2613 | |||
2614 | # | ||
2615 | # Result is now in FP0 | ||
2616 | # | ||
2617 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
2618 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
2619 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
2620 | unlk %a6 | ||
2621 | rts | ||
2622 | |||
2623 | global _ftwotoxx_ | ||
2624 | _ftwotoxx_: | ||
2625 | link %a6,&-LOCAL_SIZE | ||
2626 | |||
2627 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
2628 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
2629 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
2630 | |||
2631 | fmov.l &0x0,%fpcr # zero FPCR | ||
2632 | |||
2633 | # | ||
2634 | # copy, convert, and tag input argument | ||
2635 | # | ||
2636 | lea FP_SRC(%a6),%a0 | ||
2637 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | ||
2638 | mov.l 0x8+0x4(%a6),0x4(%a0) | ||
2639 | mov.l 0x8+0x8(%a6),0x8(%a0) | ||
2640 | bsr.l tag # fetch operand type | ||
2641 | mov.b %d0,STAG(%a6) | ||
2642 | mov.b %d0,%d1 | ||
2643 | |||
2644 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
2645 | |||
2646 | clr.l %d0 | ||
2647 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
2648 | |||
2649 | tst.b %d1 | ||
2650 | bne.b _L11_2x | ||
2651 | bsr.l stwotox # operand is a NORM | ||
2652 | bra.b _L11_6x | ||
2653 | _L11_2x: | ||
2654 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
2655 | bne.b _L11_3x # no | ||
2656 | bsr.l ld_pone # yes | ||
2657 | bra.b _L11_6x | ||
2658 | _L11_3x: | ||
2659 | cmpi.b %d1,&INF # is operand an INF? | ||
2660 | bne.b _L11_4x # no | ||
2661 | bsr.l szr_inf # yes | ||
2662 | bra.b _L11_6x | ||
2663 | _L11_4x: | ||
2664 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
2665 | bne.b _L11_5x # no | ||
2666 | bsr.l src_qnan # yes | ||
2667 | bra.b _L11_6x | ||
2668 | _L11_5x: | ||
2669 | bsr.l stwotoxd # operand is a DENORM | ||
2670 | _L11_6x: | ||
2671 | |||
2672 | # | ||
2673 | # Result is now in FP0 | ||
2674 | # | ||
2675 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
2676 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
2677 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
2678 | unlk %a6 | ||
2679 | rts | ||
2680 | |||
2681 | |||
2682 | ######################################################################### | ||
2683 | # MONADIC TEMPLATE # | ||
2684 | ######################################################################### | ||
2685 | global _ftentoxs_ | ||
2686 | _ftentoxs_: | ||
2687 | link %a6,&-LOCAL_SIZE | ||
2688 | |||
2689 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
2690 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
2691 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
2692 | |||
2693 | fmov.l &0x0,%fpcr # zero FPCR | ||
2694 | |||
2695 | # | ||
2696 | # copy, convert, and tag input argument | ||
2697 | # | ||
2698 | fmov.s 0x8(%a6),%fp0 # load sgl input | ||
2699 | fmov.x %fp0,FP_SRC(%a6) | ||
2700 | lea FP_SRC(%a6),%a0 | ||
2701 | bsr.l tag # fetch operand type | ||
2702 | mov.b %d0,STAG(%a6) | ||
2703 | mov.b %d0,%d1 | ||
2704 | |||
2705 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
2706 | |||
2707 | clr.l %d0 | ||
2708 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
2709 | |||
2710 | tst.b %d1 | ||
2711 | bne.b _L12_2s | ||
2712 | bsr.l stentox # operand is a NORM | ||
2713 | bra.b _L12_6s | ||
2714 | _L12_2s: | ||
2715 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
2716 | bne.b _L12_3s # no | ||
2717 | bsr.l ld_pone # yes | ||
2718 | bra.b _L12_6s | ||
2719 | _L12_3s: | ||
2720 | cmpi.b %d1,&INF # is operand an INF? | ||
2721 | bne.b _L12_4s # no | ||
2722 | bsr.l szr_inf # yes | ||
2723 | bra.b _L12_6s | ||
2724 | _L12_4s: | ||
2725 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
2726 | bne.b _L12_5s # no | ||
2727 | bsr.l src_qnan # yes | ||
2728 | bra.b _L12_6s | ||
2729 | _L12_5s: | ||
2730 | bsr.l stentoxd # operand is a DENORM | ||
2731 | _L12_6s: | ||
2732 | |||
2733 | # | ||
2734 | # Result is now in FP0 | ||
2735 | # | ||
2736 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
2737 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
2738 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
2739 | unlk %a6 | ||
2740 | rts | ||
2741 | |||
2742 | global _ftentoxd_ | ||
2743 | _ftentoxd_: | ||
2744 | link %a6,&-LOCAL_SIZE | ||
2745 | |||
2746 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
2747 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
2748 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
2749 | |||
2750 | fmov.l &0x0,%fpcr # zero FPCR | ||
2751 | |||
2752 | # | ||
2753 | # copy, convert, and tag input argument | ||
2754 | # | ||
2755 | fmov.d 0x8(%a6),%fp0 # load dbl input | ||
2756 | fmov.x %fp0,FP_SRC(%a6) | ||
2757 | lea FP_SRC(%a6),%a0 | ||
2758 | bsr.l tag # fetch operand type | ||
2759 | mov.b %d0,STAG(%a6) | ||
2760 | mov.b %d0,%d1 | ||
2761 | |||
2762 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
2763 | |||
2764 | clr.l %d0 | ||
2765 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
2766 | |||
2767 | mov.b %d1,STAG(%a6) | ||
2768 | tst.b %d1 | ||
2769 | bne.b _L12_2d | ||
2770 | bsr.l stentox # operand is a NORM | ||
2771 | bra.b _L12_6d | ||
2772 | _L12_2d: | ||
2773 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
2774 | bne.b _L12_3d # no | ||
2775 | bsr.l ld_pone # yes | ||
2776 | bra.b _L12_6d | ||
2777 | _L12_3d: | ||
2778 | cmpi.b %d1,&INF # is operand an INF? | ||
2779 | bne.b _L12_4d # no | ||
2780 | bsr.l szr_inf # yes | ||
2781 | bra.b _L12_6d | ||
2782 | _L12_4d: | ||
2783 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
2784 | bne.b _L12_5d # no | ||
2785 | bsr.l src_qnan # yes | ||
2786 | bra.b _L12_6d | ||
2787 | _L12_5d: | ||
2788 | bsr.l stentoxd # operand is a DENORM | ||
2789 | _L12_6d: | ||
2790 | |||
2791 | # | ||
2792 | # Result is now in FP0 | ||
2793 | # | ||
2794 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
2795 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
2796 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
2797 | unlk %a6 | ||
2798 | rts | ||
2799 | |||
2800 | global _ftentoxx_ | ||
2801 | _ftentoxx_: | ||
2802 | link %a6,&-LOCAL_SIZE | ||
2803 | |||
2804 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
2805 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
2806 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
2807 | |||
2808 | fmov.l &0x0,%fpcr # zero FPCR | ||
2809 | |||
2810 | # | ||
2811 | # copy, convert, and tag input argument | ||
2812 | # | ||
2813 | lea FP_SRC(%a6),%a0 | ||
2814 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | ||
2815 | mov.l 0x8+0x4(%a6),0x4(%a0) | ||
2816 | mov.l 0x8+0x8(%a6),0x8(%a0) | ||
2817 | bsr.l tag # fetch operand type | ||
2818 | mov.b %d0,STAG(%a6) | ||
2819 | mov.b %d0,%d1 | ||
2820 | |||
2821 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
2822 | |||
2823 | clr.l %d0 | ||
2824 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
2825 | |||
2826 | tst.b %d1 | ||
2827 | bne.b _L12_2x | ||
2828 | bsr.l stentox # operand is a NORM | ||
2829 | bra.b _L12_6x | ||
2830 | _L12_2x: | ||
2831 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
2832 | bne.b _L12_3x # no | ||
2833 | bsr.l ld_pone # yes | ||
2834 | bra.b _L12_6x | ||
2835 | _L12_3x: | ||
2836 | cmpi.b %d1,&INF # is operand an INF? | ||
2837 | bne.b _L12_4x # no | ||
2838 | bsr.l szr_inf # yes | ||
2839 | bra.b _L12_6x | ||
2840 | _L12_4x: | ||
2841 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
2842 | bne.b _L12_5x # no | ||
2843 | bsr.l src_qnan # yes | ||
2844 | bra.b _L12_6x | ||
2845 | _L12_5x: | ||
2846 | bsr.l stentoxd # operand is a DENORM | ||
2847 | _L12_6x: | ||
2848 | |||
2849 | # | ||
2850 | # Result is now in FP0 | ||
2851 | # | ||
2852 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
2853 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
2854 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
2855 | unlk %a6 | ||
2856 | rts | ||
2857 | |||
2858 | |||
2859 | ######################################################################### | ||
2860 | # MONADIC TEMPLATE # | ||
2861 | ######################################################################### | ||
2862 | global _flogns_ | ||
2863 | _flogns_: | ||
2864 | link %a6,&-LOCAL_SIZE | ||
2865 | |||
2866 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
2867 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
2868 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
2869 | |||
2870 | fmov.l &0x0,%fpcr # zero FPCR | ||
2871 | |||
2872 | # | ||
2873 | # copy, convert, and tag input argument | ||
2874 | # | ||
2875 | fmov.s 0x8(%a6),%fp0 # load sgl input | ||
2876 | fmov.x %fp0,FP_SRC(%a6) | ||
2877 | lea FP_SRC(%a6),%a0 | ||
2878 | bsr.l tag # fetch operand type | ||
2879 | mov.b %d0,STAG(%a6) | ||
2880 | mov.b %d0,%d1 | ||
2881 | |||
2882 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
2883 | |||
2884 | clr.l %d0 | ||
2885 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
2886 | |||
2887 | tst.b %d1 | ||
2888 | bne.b _L13_2s | ||
2889 | bsr.l slogn # operand is a NORM | ||
2890 | bra.b _L13_6s | ||
2891 | _L13_2s: | ||
2892 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
2893 | bne.b _L13_3s # no | ||
2894 | bsr.l t_dz2 # yes | ||
2895 | bra.b _L13_6s | ||
2896 | _L13_3s: | ||
2897 | cmpi.b %d1,&INF # is operand an INF? | ||
2898 | bne.b _L13_4s # no | ||
2899 | bsr.l sopr_inf # yes | ||
2900 | bra.b _L13_6s | ||
2901 | _L13_4s: | ||
2902 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
2903 | bne.b _L13_5s # no | ||
2904 | bsr.l src_qnan # yes | ||
2905 | bra.b _L13_6s | ||
2906 | _L13_5s: | ||
2907 | bsr.l slognd # operand is a DENORM | ||
2908 | _L13_6s: | ||
2909 | |||
2910 | # | ||
2911 | # Result is now in FP0 | ||
2912 | # | ||
2913 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
2914 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
2915 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
2916 | unlk %a6 | ||
2917 | rts | ||
2918 | |||
2919 | global _flognd_ | ||
2920 | _flognd_: | ||
2921 | link %a6,&-LOCAL_SIZE | ||
2922 | |||
2923 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
2924 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
2925 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
2926 | |||
2927 | fmov.l &0x0,%fpcr # zero FPCR | ||
2928 | |||
2929 | # | ||
2930 | # copy, convert, and tag input argument | ||
2931 | # | ||
2932 | fmov.d 0x8(%a6),%fp0 # load dbl input | ||
2933 | fmov.x %fp0,FP_SRC(%a6) | ||
2934 | lea FP_SRC(%a6),%a0 | ||
2935 | bsr.l tag # fetch operand type | ||
2936 | mov.b %d0,STAG(%a6) | ||
2937 | mov.b %d0,%d1 | ||
2938 | |||
2939 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
2940 | |||
2941 | clr.l %d0 | ||
2942 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
2943 | |||
2944 | mov.b %d1,STAG(%a6) | ||
2945 | tst.b %d1 | ||
2946 | bne.b _L13_2d | ||
2947 | bsr.l slogn # operand is a NORM | ||
2948 | bra.b _L13_6d | ||
2949 | _L13_2d: | ||
2950 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
2951 | bne.b _L13_3d # no | ||
2952 | bsr.l t_dz2 # yes | ||
2953 | bra.b _L13_6d | ||
2954 | _L13_3d: | ||
2955 | cmpi.b %d1,&INF # is operand an INF? | ||
2956 | bne.b _L13_4d # no | ||
2957 | bsr.l sopr_inf # yes | ||
2958 | bra.b _L13_6d | ||
2959 | _L13_4d: | ||
2960 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
2961 | bne.b _L13_5d # no | ||
2962 | bsr.l src_qnan # yes | ||
2963 | bra.b _L13_6d | ||
2964 | _L13_5d: | ||
2965 | bsr.l slognd # operand is a DENORM | ||
2966 | _L13_6d: | ||
2967 | |||
2968 | # | ||
2969 | # Result is now in FP0 | ||
2970 | # | ||
2971 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
2972 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
2973 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
2974 | unlk %a6 | ||
2975 | rts | ||
2976 | |||
2977 | global _flognx_ | ||
2978 | _flognx_: | ||
2979 | link %a6,&-LOCAL_SIZE | ||
2980 | |||
2981 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
2982 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
2983 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
2984 | |||
2985 | fmov.l &0x0,%fpcr # zero FPCR | ||
2986 | |||
2987 | # | ||
2988 | # copy, convert, and tag input argument | ||
2989 | # | ||
2990 | lea FP_SRC(%a6),%a0 | ||
2991 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | ||
2992 | mov.l 0x8+0x4(%a6),0x4(%a0) | ||
2993 | mov.l 0x8+0x8(%a6),0x8(%a0) | ||
2994 | bsr.l tag # fetch operand type | ||
2995 | mov.b %d0,STAG(%a6) | ||
2996 | mov.b %d0,%d1 | ||
2997 | |||
2998 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
2999 | |||
3000 | clr.l %d0 | ||
3001 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
3002 | |||
3003 | tst.b %d1 | ||
3004 | bne.b _L13_2x | ||
3005 | bsr.l slogn # operand is a NORM | ||
3006 | bra.b _L13_6x | ||
3007 | _L13_2x: | ||
3008 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
3009 | bne.b _L13_3x # no | ||
3010 | bsr.l t_dz2 # yes | ||
3011 | bra.b _L13_6x | ||
3012 | _L13_3x: | ||
3013 | cmpi.b %d1,&INF # is operand an INF? | ||
3014 | bne.b _L13_4x # no | ||
3015 | bsr.l sopr_inf # yes | ||
3016 | bra.b _L13_6x | ||
3017 | _L13_4x: | ||
3018 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
3019 | bne.b _L13_5x # no | ||
3020 | bsr.l src_qnan # yes | ||
3021 | bra.b _L13_6x | ||
3022 | _L13_5x: | ||
3023 | bsr.l slognd # operand is a DENORM | ||
3024 | _L13_6x: | ||
3025 | |||
3026 | # | ||
3027 | # Result is now in FP0 | ||
3028 | # | ||
3029 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
3030 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
3031 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
3032 | unlk %a6 | ||
3033 | rts | ||
3034 | |||
3035 | |||
3036 | ######################################################################### | ||
3037 | # MONADIC TEMPLATE # | ||
3038 | ######################################################################### | ||
3039 | global _flog10s_ | ||
3040 | _flog10s_: | ||
3041 | link %a6,&-LOCAL_SIZE | ||
3042 | |||
3043 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
3044 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
3045 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
3046 | |||
3047 | fmov.l &0x0,%fpcr # zero FPCR | ||
3048 | |||
3049 | # | ||
3050 | # copy, convert, and tag input argument | ||
3051 | # | ||
3052 | fmov.s 0x8(%a6),%fp0 # load sgl input | ||
3053 | fmov.x %fp0,FP_SRC(%a6) | ||
3054 | lea FP_SRC(%a6),%a0 | ||
3055 | bsr.l tag # fetch operand type | ||
3056 | mov.b %d0,STAG(%a6) | ||
3057 | mov.b %d0,%d1 | ||
3058 | |||
3059 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
3060 | |||
3061 | clr.l %d0 | ||
3062 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
3063 | |||
3064 | tst.b %d1 | ||
3065 | bne.b _L14_2s | ||
3066 | bsr.l slog10 # operand is a NORM | ||
3067 | bra.b _L14_6s | ||
3068 | _L14_2s: | ||
3069 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
3070 | bne.b _L14_3s # no | ||
3071 | bsr.l t_dz2 # yes | ||
3072 | bra.b _L14_6s | ||
3073 | _L14_3s: | ||
3074 | cmpi.b %d1,&INF # is operand an INF? | ||
3075 | bne.b _L14_4s # no | ||
3076 | bsr.l sopr_inf # yes | ||
3077 | bra.b _L14_6s | ||
3078 | _L14_4s: | ||
3079 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
3080 | bne.b _L14_5s # no | ||
3081 | bsr.l src_qnan # yes | ||
3082 | bra.b _L14_6s | ||
3083 | _L14_5s: | ||
3084 | bsr.l slog10d # operand is a DENORM | ||
3085 | _L14_6s: | ||
3086 | |||
3087 | # | ||
3088 | # Result is now in FP0 | ||
3089 | # | ||
3090 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
3091 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
3092 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
3093 | unlk %a6 | ||
3094 | rts | ||
3095 | |||
3096 | global _flog10d_ | ||
3097 | _flog10d_: | ||
3098 | link %a6,&-LOCAL_SIZE | ||
3099 | |||
3100 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
3101 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
3102 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
3103 | |||
3104 | fmov.l &0x0,%fpcr # zero FPCR | ||
3105 | |||
3106 | # | ||
3107 | # copy, convert, and tag input argument | ||
3108 | # | ||
3109 | fmov.d 0x8(%a6),%fp0 # load dbl input | ||
3110 | fmov.x %fp0,FP_SRC(%a6) | ||
3111 | lea FP_SRC(%a6),%a0 | ||
3112 | bsr.l tag # fetch operand type | ||
3113 | mov.b %d0,STAG(%a6) | ||
3114 | mov.b %d0,%d1 | ||
3115 | |||
3116 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
3117 | |||
3118 | clr.l %d0 | ||
3119 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
3120 | |||
3121 | mov.b %d1,STAG(%a6) | ||
3122 | tst.b %d1 | ||
3123 | bne.b _L14_2d | ||
3124 | bsr.l slog10 # operand is a NORM | ||
3125 | bra.b _L14_6d | ||
3126 | _L14_2d: | ||
3127 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
3128 | bne.b _L14_3d # no | ||
3129 | bsr.l t_dz2 # yes | ||
3130 | bra.b _L14_6d | ||
3131 | _L14_3d: | ||
3132 | cmpi.b %d1,&INF # is operand an INF? | ||
3133 | bne.b _L14_4d # no | ||
3134 | bsr.l sopr_inf # yes | ||
3135 | bra.b _L14_6d | ||
3136 | _L14_4d: | ||
3137 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
3138 | bne.b _L14_5d # no | ||
3139 | bsr.l src_qnan # yes | ||
3140 | bra.b _L14_6d | ||
3141 | _L14_5d: | ||
3142 | bsr.l slog10d # operand is a DENORM | ||
3143 | _L14_6d: | ||
3144 | |||
3145 | # | ||
3146 | # Result is now in FP0 | ||
3147 | # | ||
3148 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
3149 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
3150 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
3151 | unlk %a6 | ||
3152 | rts | ||
3153 | |||
3154 | global _flog10x_ | ||
3155 | _flog10x_: | ||
3156 | link %a6,&-LOCAL_SIZE | ||
3157 | |||
3158 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
3159 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
3160 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
3161 | |||
3162 | fmov.l &0x0,%fpcr # zero FPCR | ||
3163 | |||
3164 | # | ||
3165 | # copy, convert, and tag input argument | ||
3166 | # | ||
3167 | lea FP_SRC(%a6),%a0 | ||
3168 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | ||
3169 | mov.l 0x8+0x4(%a6),0x4(%a0) | ||
3170 | mov.l 0x8+0x8(%a6),0x8(%a0) | ||
3171 | bsr.l tag # fetch operand type | ||
3172 | mov.b %d0,STAG(%a6) | ||
3173 | mov.b %d0,%d1 | ||
3174 | |||
3175 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
3176 | |||
3177 | clr.l %d0 | ||
3178 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
3179 | |||
3180 | tst.b %d1 | ||
3181 | bne.b _L14_2x | ||
3182 | bsr.l slog10 # operand is a NORM | ||
3183 | bra.b _L14_6x | ||
3184 | _L14_2x: | ||
3185 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
3186 | bne.b _L14_3x # no | ||
3187 | bsr.l t_dz2 # yes | ||
3188 | bra.b _L14_6x | ||
3189 | _L14_3x: | ||
3190 | cmpi.b %d1,&INF # is operand an INF? | ||
3191 | bne.b _L14_4x # no | ||
3192 | bsr.l sopr_inf # yes | ||
3193 | bra.b _L14_6x | ||
3194 | _L14_4x: | ||
3195 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
3196 | bne.b _L14_5x # no | ||
3197 | bsr.l src_qnan # yes | ||
3198 | bra.b _L14_6x | ||
3199 | _L14_5x: | ||
3200 | bsr.l slog10d # operand is a DENORM | ||
3201 | _L14_6x: | ||
3202 | |||
3203 | # | ||
3204 | # Result is now in FP0 | ||
3205 | # | ||
3206 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
3207 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
3208 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
3209 | unlk %a6 | ||
3210 | rts | ||
3211 | |||
3212 | |||
3213 | ######################################################################### | ||
3214 | # MONADIC TEMPLATE # | ||
3215 | ######################################################################### | ||
3216 | global _flog2s_ | ||
3217 | _flog2s_: | ||
3218 | link %a6,&-LOCAL_SIZE | ||
3219 | |||
3220 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
3221 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
3222 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
3223 | |||
3224 | fmov.l &0x0,%fpcr # zero FPCR | ||
3225 | |||
3226 | # | ||
3227 | # copy, convert, and tag input argument | ||
3228 | # | ||
3229 | fmov.s 0x8(%a6),%fp0 # load sgl input | ||
3230 | fmov.x %fp0,FP_SRC(%a6) | ||
3231 | lea FP_SRC(%a6),%a0 | ||
3232 | bsr.l tag # fetch operand type | ||
3233 | mov.b %d0,STAG(%a6) | ||
3234 | mov.b %d0,%d1 | ||
3235 | |||
3236 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
3237 | |||
3238 | clr.l %d0 | ||
3239 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
3240 | |||
3241 | tst.b %d1 | ||
3242 | bne.b _L15_2s | ||
3243 | bsr.l slog2 # operand is a NORM | ||
3244 | bra.b _L15_6s | ||
3245 | _L15_2s: | ||
3246 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
3247 | bne.b _L15_3s # no | ||
3248 | bsr.l t_dz2 # yes | ||
3249 | bra.b _L15_6s | ||
3250 | _L15_3s: | ||
3251 | cmpi.b %d1,&INF # is operand an INF? | ||
3252 | bne.b _L15_4s # no | ||
3253 | bsr.l sopr_inf # yes | ||
3254 | bra.b _L15_6s | ||
3255 | _L15_4s: | ||
3256 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
3257 | bne.b _L15_5s # no | ||
3258 | bsr.l src_qnan # yes | ||
3259 | bra.b _L15_6s | ||
3260 | _L15_5s: | ||
3261 | bsr.l slog2d # operand is a DENORM | ||
3262 | _L15_6s: | ||
3263 | |||
3264 | # | ||
3265 | # Result is now in FP0 | ||
3266 | # | ||
3267 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
3268 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
3269 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
3270 | unlk %a6 | ||
3271 | rts | ||
3272 | |||
3273 | global _flog2d_ | ||
3274 | _flog2d_: | ||
3275 | link %a6,&-LOCAL_SIZE | ||
3276 | |||
3277 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
3278 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
3279 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
3280 | |||
3281 | fmov.l &0x0,%fpcr # zero FPCR | ||
3282 | |||
3283 | # | ||
3284 | # copy, convert, and tag input argument | ||
3285 | # | ||
3286 | fmov.d 0x8(%a6),%fp0 # load dbl input | ||
3287 | fmov.x %fp0,FP_SRC(%a6) | ||
3288 | lea FP_SRC(%a6),%a0 | ||
3289 | bsr.l tag # fetch operand type | ||
3290 | mov.b %d0,STAG(%a6) | ||
3291 | mov.b %d0,%d1 | ||
3292 | |||
3293 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
3294 | |||
3295 | clr.l %d0 | ||
3296 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
3297 | |||
3298 | mov.b %d1,STAG(%a6) | ||
3299 | tst.b %d1 | ||
3300 | bne.b _L15_2d | ||
3301 | bsr.l slog2 # operand is a NORM | ||
3302 | bra.b _L15_6d | ||
3303 | _L15_2d: | ||
3304 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
3305 | bne.b _L15_3d # no | ||
3306 | bsr.l t_dz2 # yes | ||
3307 | bra.b _L15_6d | ||
3308 | _L15_3d: | ||
3309 | cmpi.b %d1,&INF # is operand an INF? | ||
3310 | bne.b _L15_4d # no | ||
3311 | bsr.l sopr_inf # yes | ||
3312 | bra.b _L15_6d | ||
3313 | _L15_4d: | ||
3314 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
3315 | bne.b _L15_5d # no | ||
3316 | bsr.l src_qnan # yes | ||
3317 | bra.b _L15_6d | ||
3318 | _L15_5d: | ||
3319 | bsr.l slog2d # operand is a DENORM | ||
3320 | _L15_6d: | ||
3321 | |||
3322 | # | ||
3323 | # Result is now in FP0 | ||
3324 | # | ||
3325 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
3326 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
3327 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
3328 | unlk %a6 | ||
3329 | rts | ||
3330 | |||
3331 | global _flog2x_ | ||
3332 | _flog2x_: | ||
3333 | link %a6,&-LOCAL_SIZE | ||
3334 | |||
3335 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
3336 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
3337 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
3338 | |||
3339 | fmov.l &0x0,%fpcr # zero FPCR | ||
3340 | |||
3341 | # | ||
3342 | # copy, convert, and tag input argument | ||
3343 | # | ||
3344 | lea FP_SRC(%a6),%a0 | ||
3345 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | ||
3346 | mov.l 0x8+0x4(%a6),0x4(%a0) | ||
3347 | mov.l 0x8+0x8(%a6),0x8(%a0) | ||
3348 | bsr.l tag # fetch operand type | ||
3349 | mov.b %d0,STAG(%a6) | ||
3350 | mov.b %d0,%d1 | ||
3351 | |||
3352 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
3353 | |||
3354 | clr.l %d0 | ||
3355 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
3356 | |||
3357 | tst.b %d1 | ||
3358 | bne.b _L15_2x | ||
3359 | bsr.l slog2 # operand is a NORM | ||
3360 | bra.b _L15_6x | ||
3361 | _L15_2x: | ||
3362 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
3363 | bne.b _L15_3x # no | ||
3364 | bsr.l t_dz2 # yes | ||
3365 | bra.b _L15_6x | ||
3366 | _L15_3x: | ||
3367 | cmpi.b %d1,&INF # is operand an INF? | ||
3368 | bne.b _L15_4x # no | ||
3369 | bsr.l sopr_inf # yes | ||
3370 | bra.b _L15_6x | ||
3371 | _L15_4x: | ||
3372 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
3373 | bne.b _L15_5x # no | ||
3374 | bsr.l src_qnan # yes | ||
3375 | bra.b _L15_6x | ||
3376 | _L15_5x: | ||
3377 | bsr.l slog2d # operand is a DENORM | ||
3378 | _L15_6x: | ||
3379 | |||
3380 | # | ||
3381 | # Result is now in FP0 | ||
3382 | # | ||
3383 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
3384 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
3385 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
3386 | unlk %a6 | ||
3387 | rts | ||
3388 | |||
3389 | |||
3390 | ######################################################################### | ||
3391 | # MONADIC TEMPLATE # | ||
3392 | ######################################################################### | ||
3393 | global _fcoshs_ | ||
3394 | _fcoshs_: | ||
3395 | link %a6,&-LOCAL_SIZE | ||
3396 | |||
3397 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
3398 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
3399 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
3400 | |||
3401 | fmov.l &0x0,%fpcr # zero FPCR | ||
3402 | |||
3403 | # | ||
3404 | # copy, convert, and tag input argument | ||
3405 | # | ||
3406 | fmov.s 0x8(%a6),%fp0 # load sgl input | ||
3407 | fmov.x %fp0,FP_SRC(%a6) | ||
3408 | lea FP_SRC(%a6),%a0 | ||
3409 | bsr.l tag # fetch operand type | ||
3410 | mov.b %d0,STAG(%a6) | ||
3411 | mov.b %d0,%d1 | ||
3412 | |||
3413 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
3414 | |||
3415 | clr.l %d0 | ||
3416 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
3417 | |||
3418 | tst.b %d1 | ||
3419 | bne.b _L16_2s | ||
3420 | bsr.l scosh # operand is a NORM | ||
3421 | bra.b _L16_6s | ||
3422 | _L16_2s: | ||
3423 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
3424 | bne.b _L16_3s # no | ||
3425 | bsr.l ld_pone # yes | ||
3426 | bra.b _L16_6s | ||
3427 | _L16_3s: | ||
3428 | cmpi.b %d1,&INF # is operand an INF? | ||
3429 | bne.b _L16_4s # no | ||
3430 | bsr.l ld_pinf # yes | ||
3431 | bra.b _L16_6s | ||
3432 | _L16_4s: | ||
3433 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
3434 | bne.b _L16_5s # no | ||
3435 | bsr.l src_qnan # yes | ||
3436 | bra.b _L16_6s | ||
3437 | _L16_5s: | ||
3438 | bsr.l scoshd # operand is a DENORM | ||
3439 | _L16_6s: | ||
3440 | |||
3441 | # | ||
3442 | # Result is now in FP0 | ||
3443 | # | ||
3444 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
3445 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
3446 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
3447 | unlk %a6 | ||
3448 | rts | ||
3449 | |||
3450 | global _fcoshd_ | ||
3451 | _fcoshd_: | ||
3452 | link %a6,&-LOCAL_SIZE | ||
3453 | |||
3454 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
3455 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
3456 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
3457 | |||
3458 | fmov.l &0x0,%fpcr # zero FPCR | ||
3459 | |||
3460 | # | ||
3461 | # copy, convert, and tag input argument | ||
3462 | # | ||
3463 | fmov.d 0x8(%a6),%fp0 # load dbl input | ||
3464 | fmov.x %fp0,FP_SRC(%a6) | ||
3465 | lea FP_SRC(%a6),%a0 | ||
3466 | bsr.l tag # fetch operand type | ||
3467 | mov.b %d0,STAG(%a6) | ||
3468 | mov.b %d0,%d1 | ||
3469 | |||
3470 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
3471 | |||
3472 | clr.l %d0 | ||
3473 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
3474 | |||
3475 | mov.b %d1,STAG(%a6) | ||
3476 | tst.b %d1 | ||
3477 | bne.b _L16_2d | ||
3478 | bsr.l scosh # operand is a NORM | ||
3479 | bra.b _L16_6d | ||
3480 | _L16_2d: | ||
3481 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
3482 | bne.b _L16_3d # no | ||
3483 | bsr.l ld_pone # yes | ||
3484 | bra.b _L16_6d | ||
3485 | _L16_3d: | ||
3486 | cmpi.b %d1,&INF # is operand an INF? | ||
3487 | bne.b _L16_4d # no | ||
3488 | bsr.l ld_pinf # yes | ||
3489 | bra.b _L16_6d | ||
3490 | _L16_4d: | ||
3491 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
3492 | bne.b _L16_5d # no | ||
3493 | bsr.l src_qnan # yes | ||
3494 | bra.b _L16_6d | ||
3495 | _L16_5d: | ||
3496 | bsr.l scoshd # operand is a DENORM | ||
3497 | _L16_6d: | ||
3498 | |||
3499 | # | ||
3500 | # Result is now in FP0 | ||
3501 | # | ||
3502 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
3503 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
3504 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
3505 | unlk %a6 | ||
3506 | rts | ||
3507 | |||
3508 | global _fcoshx_ | ||
3509 | _fcoshx_: | ||
3510 | link %a6,&-LOCAL_SIZE | ||
3511 | |||
3512 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
3513 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
3514 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
3515 | |||
3516 | fmov.l &0x0,%fpcr # zero FPCR | ||
3517 | |||
3518 | # | ||
3519 | # copy, convert, and tag input argument | ||
3520 | # | ||
3521 | lea FP_SRC(%a6),%a0 | ||
3522 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | ||
3523 | mov.l 0x8+0x4(%a6),0x4(%a0) | ||
3524 | mov.l 0x8+0x8(%a6),0x8(%a0) | ||
3525 | bsr.l tag # fetch operand type | ||
3526 | mov.b %d0,STAG(%a6) | ||
3527 | mov.b %d0,%d1 | ||
3528 | |||
3529 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
3530 | |||
3531 | clr.l %d0 | ||
3532 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
3533 | |||
3534 | tst.b %d1 | ||
3535 | bne.b _L16_2x | ||
3536 | bsr.l scosh # operand is a NORM | ||
3537 | bra.b _L16_6x | ||
3538 | _L16_2x: | ||
3539 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
3540 | bne.b _L16_3x # no | ||
3541 | bsr.l ld_pone # yes | ||
3542 | bra.b _L16_6x | ||
3543 | _L16_3x: | ||
3544 | cmpi.b %d1,&INF # is operand an INF? | ||
3545 | bne.b _L16_4x # no | ||
3546 | bsr.l ld_pinf # yes | ||
3547 | bra.b _L16_6x | ||
3548 | _L16_4x: | ||
3549 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
3550 | bne.b _L16_5x # no | ||
3551 | bsr.l src_qnan # yes | ||
3552 | bra.b _L16_6x | ||
3553 | _L16_5x: | ||
3554 | bsr.l scoshd # operand is a DENORM | ||
3555 | _L16_6x: | ||
3556 | |||
3557 | # | ||
3558 | # Result is now in FP0 | ||
3559 | # | ||
3560 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
3561 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
3562 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
3563 | unlk %a6 | ||
3564 | rts | ||
3565 | |||
3566 | |||
3567 | ######################################################################### | ||
3568 | # MONADIC TEMPLATE # | ||
3569 | ######################################################################### | ||
3570 | global _facoss_ | ||
3571 | _facoss_: | ||
3572 | link %a6,&-LOCAL_SIZE | ||
3573 | |||
3574 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
3575 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
3576 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
3577 | |||
3578 | fmov.l &0x0,%fpcr # zero FPCR | ||
3579 | |||
3580 | # | ||
3581 | # copy, convert, and tag input argument | ||
3582 | # | ||
3583 | fmov.s 0x8(%a6),%fp0 # load sgl input | ||
3584 | fmov.x %fp0,FP_SRC(%a6) | ||
3585 | lea FP_SRC(%a6),%a0 | ||
3586 | bsr.l tag # fetch operand type | ||
3587 | mov.b %d0,STAG(%a6) | ||
3588 | mov.b %d0,%d1 | ||
3589 | |||
3590 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
3591 | |||
3592 | clr.l %d0 | ||
3593 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
3594 | |||
3595 | tst.b %d1 | ||
3596 | bne.b _L17_2s | ||
3597 | bsr.l sacos # operand is a NORM | ||
3598 | bra.b _L17_6s | ||
3599 | _L17_2s: | ||
3600 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
3601 | bne.b _L17_3s # no | ||
3602 | bsr.l ld_ppi2 # yes | ||
3603 | bra.b _L17_6s | ||
3604 | _L17_3s: | ||
3605 | cmpi.b %d1,&INF # is operand an INF? | ||
3606 | bne.b _L17_4s # no | ||
3607 | bsr.l t_operr # yes | ||
3608 | bra.b _L17_6s | ||
3609 | _L17_4s: | ||
3610 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
3611 | bne.b _L17_5s # no | ||
3612 | bsr.l src_qnan # yes | ||
3613 | bra.b _L17_6s | ||
3614 | _L17_5s: | ||
3615 | bsr.l sacosd # operand is a DENORM | ||
3616 | _L17_6s: | ||
3617 | |||
3618 | # | ||
3619 | # Result is now in FP0 | ||
3620 | # | ||
3621 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
3622 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
3623 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
3624 | unlk %a6 | ||
3625 | rts | ||
3626 | |||
3627 | global _facosd_ | ||
3628 | _facosd_: | ||
3629 | link %a6,&-LOCAL_SIZE | ||
3630 | |||
3631 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
3632 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
3633 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
3634 | |||
3635 | fmov.l &0x0,%fpcr # zero FPCR | ||
3636 | |||
3637 | # | ||
3638 | # copy, convert, and tag input argument | ||
3639 | # | ||
3640 | fmov.d 0x8(%a6),%fp0 # load dbl input | ||
3641 | fmov.x %fp0,FP_SRC(%a6) | ||
3642 | lea FP_SRC(%a6),%a0 | ||
3643 | bsr.l tag # fetch operand type | ||
3644 | mov.b %d0,STAG(%a6) | ||
3645 | mov.b %d0,%d1 | ||
3646 | |||
3647 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
3648 | |||
3649 | clr.l %d0 | ||
3650 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
3651 | |||
3652 | mov.b %d1,STAG(%a6) | ||
3653 | tst.b %d1 | ||
3654 | bne.b _L17_2d | ||
3655 | bsr.l sacos # operand is a NORM | ||
3656 | bra.b _L17_6d | ||
3657 | _L17_2d: | ||
3658 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
3659 | bne.b _L17_3d # no | ||
3660 | bsr.l ld_ppi2 # yes | ||
3661 | bra.b _L17_6d | ||
3662 | _L17_3d: | ||
3663 | cmpi.b %d1,&INF # is operand an INF? | ||
3664 | bne.b _L17_4d # no | ||
3665 | bsr.l t_operr # yes | ||
3666 | bra.b _L17_6d | ||
3667 | _L17_4d: | ||
3668 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
3669 | bne.b _L17_5d # no | ||
3670 | bsr.l src_qnan # yes | ||
3671 | bra.b _L17_6d | ||
3672 | _L17_5d: | ||
3673 | bsr.l sacosd # operand is a DENORM | ||
3674 | _L17_6d: | ||
3675 | |||
3676 | # | ||
3677 | # Result is now in FP0 | ||
3678 | # | ||
3679 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
3680 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
3681 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
3682 | unlk %a6 | ||
3683 | rts | ||
3684 | |||
3685 | global _facosx_ | ||
3686 | _facosx_: | ||
3687 | link %a6,&-LOCAL_SIZE | ||
3688 | |||
3689 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
3690 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
3691 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
3692 | |||
3693 | fmov.l &0x0,%fpcr # zero FPCR | ||
3694 | |||
3695 | # | ||
3696 | # copy, convert, and tag input argument | ||
3697 | # | ||
3698 | lea FP_SRC(%a6),%a0 | ||
3699 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | ||
3700 | mov.l 0x8+0x4(%a6),0x4(%a0) | ||
3701 | mov.l 0x8+0x8(%a6),0x8(%a0) | ||
3702 | bsr.l tag # fetch operand type | ||
3703 | mov.b %d0,STAG(%a6) | ||
3704 | mov.b %d0,%d1 | ||
3705 | |||
3706 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
3707 | |||
3708 | clr.l %d0 | ||
3709 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
3710 | |||
3711 | tst.b %d1 | ||
3712 | bne.b _L17_2x | ||
3713 | bsr.l sacos # operand is a NORM | ||
3714 | bra.b _L17_6x | ||
3715 | _L17_2x: | ||
3716 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
3717 | bne.b _L17_3x # no | ||
3718 | bsr.l ld_ppi2 # yes | ||
3719 | bra.b _L17_6x | ||
3720 | _L17_3x: | ||
3721 | cmpi.b %d1,&INF # is operand an INF? | ||
3722 | bne.b _L17_4x # no | ||
3723 | bsr.l t_operr # yes | ||
3724 | bra.b _L17_6x | ||
3725 | _L17_4x: | ||
3726 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
3727 | bne.b _L17_5x # no | ||
3728 | bsr.l src_qnan # yes | ||
3729 | bra.b _L17_6x | ||
3730 | _L17_5x: | ||
3731 | bsr.l sacosd # operand is a DENORM | ||
3732 | _L17_6x: | ||
3733 | |||
3734 | # | ||
3735 | # Result is now in FP0 | ||
3736 | # | ||
3737 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
3738 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
3739 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
3740 | unlk %a6 | ||
3741 | rts | ||
3742 | |||
3743 | |||
3744 | ######################################################################### | ||
3745 | # MONADIC TEMPLATE # | ||
3746 | ######################################################################### | ||
3747 | global _fgetexps_ | ||
3748 | _fgetexps_: | ||
3749 | link %a6,&-LOCAL_SIZE | ||
3750 | |||
3751 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
3752 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
3753 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
3754 | |||
3755 | fmov.l &0x0,%fpcr # zero FPCR | ||
3756 | |||
3757 | # | ||
3758 | # copy, convert, and tag input argument | ||
3759 | # | ||
3760 | fmov.s 0x8(%a6),%fp0 # load sgl input | ||
3761 | fmov.x %fp0,FP_SRC(%a6) | ||
3762 | lea FP_SRC(%a6),%a0 | ||
3763 | bsr.l tag # fetch operand type | ||
3764 | mov.b %d0,STAG(%a6) | ||
3765 | mov.b %d0,%d1 | ||
3766 | |||
3767 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
3768 | |||
3769 | clr.l %d0 | ||
3770 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
3771 | |||
3772 | tst.b %d1 | ||
3773 | bne.b _L18_2s | ||
3774 | bsr.l sgetexp # operand is a NORM | ||
3775 | bra.b _L18_6s | ||
3776 | _L18_2s: | ||
3777 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
3778 | bne.b _L18_3s # no | ||
3779 | bsr.l src_zero # yes | ||
3780 | bra.b _L18_6s | ||
3781 | _L18_3s: | ||
3782 | cmpi.b %d1,&INF # is operand an INF? | ||
3783 | bne.b _L18_4s # no | ||
3784 | bsr.l t_operr # yes | ||
3785 | bra.b _L18_6s | ||
3786 | _L18_4s: | ||
3787 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
3788 | bne.b _L18_5s # no | ||
3789 | bsr.l src_qnan # yes | ||
3790 | bra.b _L18_6s | ||
3791 | _L18_5s: | ||
3792 | bsr.l sgetexpd # operand is a DENORM | ||
3793 | _L18_6s: | ||
3794 | |||
3795 | # | ||
3796 | # Result is now in FP0 | ||
3797 | # | ||
3798 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
3799 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
3800 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
3801 | unlk %a6 | ||
3802 | rts | ||
3803 | |||
3804 | global _fgetexpd_ | ||
3805 | _fgetexpd_: | ||
3806 | link %a6,&-LOCAL_SIZE | ||
3807 | |||
3808 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
3809 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
3810 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
3811 | |||
3812 | fmov.l &0x0,%fpcr # zero FPCR | ||
3813 | |||
3814 | # | ||
3815 | # copy, convert, and tag input argument | ||
3816 | # | ||
3817 | fmov.d 0x8(%a6),%fp0 # load dbl input | ||
3818 | fmov.x %fp0,FP_SRC(%a6) | ||
3819 | lea FP_SRC(%a6),%a0 | ||
3820 | bsr.l tag # fetch operand type | ||
3821 | mov.b %d0,STAG(%a6) | ||
3822 | mov.b %d0,%d1 | ||
3823 | |||
3824 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
3825 | |||
3826 | clr.l %d0 | ||
3827 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
3828 | |||
3829 | mov.b %d1,STAG(%a6) | ||
3830 | tst.b %d1 | ||
3831 | bne.b _L18_2d | ||
3832 | bsr.l sgetexp # operand is a NORM | ||
3833 | bra.b _L18_6d | ||
3834 | _L18_2d: | ||
3835 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
3836 | bne.b _L18_3d # no | ||
3837 | bsr.l src_zero # yes | ||
3838 | bra.b _L18_6d | ||
3839 | _L18_3d: | ||
3840 | cmpi.b %d1,&INF # is operand an INF? | ||
3841 | bne.b _L18_4d # no | ||
3842 | bsr.l t_operr # yes | ||
3843 | bra.b _L18_6d | ||
3844 | _L18_4d: | ||
3845 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
3846 | bne.b _L18_5d # no | ||
3847 | bsr.l src_qnan # yes | ||
3848 | bra.b _L18_6d | ||
3849 | _L18_5d: | ||
3850 | bsr.l sgetexpd # operand is a DENORM | ||
3851 | _L18_6d: | ||
3852 | |||
3853 | # | ||
3854 | # Result is now in FP0 | ||
3855 | # | ||
3856 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
3857 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
3858 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
3859 | unlk %a6 | ||
3860 | rts | ||
3861 | |||
3862 | global _fgetexpx_ | ||
3863 | _fgetexpx_: | ||
3864 | link %a6,&-LOCAL_SIZE | ||
3865 | |||
3866 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
3867 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
3868 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
3869 | |||
3870 | fmov.l &0x0,%fpcr # zero FPCR | ||
3871 | |||
3872 | # | ||
3873 | # copy, convert, and tag input argument | ||
3874 | # | ||
3875 | lea FP_SRC(%a6),%a0 | ||
3876 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | ||
3877 | mov.l 0x8+0x4(%a6),0x4(%a0) | ||
3878 | mov.l 0x8+0x8(%a6),0x8(%a0) | ||
3879 | bsr.l tag # fetch operand type | ||
3880 | mov.b %d0,STAG(%a6) | ||
3881 | mov.b %d0,%d1 | ||
3882 | |||
3883 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
3884 | |||
3885 | clr.l %d0 | ||
3886 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
3887 | |||
3888 | tst.b %d1 | ||
3889 | bne.b _L18_2x | ||
3890 | bsr.l sgetexp # operand is a NORM | ||
3891 | bra.b _L18_6x | ||
3892 | _L18_2x: | ||
3893 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
3894 | bne.b _L18_3x # no | ||
3895 | bsr.l src_zero # yes | ||
3896 | bra.b _L18_6x | ||
3897 | _L18_3x: | ||
3898 | cmpi.b %d1,&INF # is operand an INF? | ||
3899 | bne.b _L18_4x # no | ||
3900 | bsr.l t_operr # yes | ||
3901 | bra.b _L18_6x | ||
3902 | _L18_4x: | ||
3903 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
3904 | bne.b _L18_5x # no | ||
3905 | bsr.l src_qnan # yes | ||
3906 | bra.b _L18_6x | ||
3907 | _L18_5x: | ||
3908 | bsr.l sgetexpd # operand is a DENORM | ||
3909 | _L18_6x: | ||
3910 | |||
3911 | # | ||
3912 | # Result is now in FP0 | ||
3913 | # | ||
3914 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
3915 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
3916 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
3917 | unlk %a6 | ||
3918 | rts | ||
3919 | |||
3920 | |||
3921 | ######################################################################### | ||
3922 | # MONADIC TEMPLATE # | ||
3923 | ######################################################################### | ||
3924 | global _fgetmans_ | ||
3925 | _fgetmans_: | ||
3926 | link %a6,&-LOCAL_SIZE | ||
3927 | |||
3928 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
3929 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
3930 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
3931 | |||
3932 | fmov.l &0x0,%fpcr # zero FPCR | ||
3933 | |||
3934 | # | ||
3935 | # copy, convert, and tag input argument | ||
3936 | # | ||
3937 | fmov.s 0x8(%a6),%fp0 # load sgl input | ||
3938 | fmov.x %fp0,FP_SRC(%a6) | ||
3939 | lea FP_SRC(%a6),%a0 | ||
3940 | bsr.l tag # fetch operand type | ||
3941 | mov.b %d0,STAG(%a6) | ||
3942 | mov.b %d0,%d1 | ||
3943 | |||
3944 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
3945 | |||
3946 | clr.l %d0 | ||
3947 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
3948 | |||
3949 | tst.b %d1 | ||
3950 | bne.b _L19_2s | ||
3951 | bsr.l sgetman # operand is a NORM | ||
3952 | bra.b _L19_6s | ||
3953 | _L19_2s: | ||
3954 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
3955 | bne.b _L19_3s # no | ||
3956 | bsr.l src_zero # yes | ||
3957 | bra.b _L19_6s | ||
3958 | _L19_3s: | ||
3959 | cmpi.b %d1,&INF # is operand an INF? | ||
3960 | bne.b _L19_4s # no | ||
3961 | bsr.l t_operr # yes | ||
3962 | bra.b _L19_6s | ||
3963 | _L19_4s: | ||
3964 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
3965 | bne.b _L19_5s # no | ||
3966 | bsr.l src_qnan # yes | ||
3967 | bra.b _L19_6s | ||
3968 | _L19_5s: | ||
3969 | bsr.l sgetmand # operand is a DENORM | ||
3970 | _L19_6s: | ||
3971 | |||
3972 | # | ||
3973 | # Result is now in FP0 | ||
3974 | # | ||
3975 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
3976 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
3977 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
3978 | unlk %a6 | ||
3979 | rts | ||
3980 | |||
3981 | global _fgetmand_ | ||
3982 | _fgetmand_: | ||
3983 | link %a6,&-LOCAL_SIZE | ||
3984 | |||
3985 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
3986 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
3987 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
3988 | |||
3989 | fmov.l &0x0,%fpcr # zero FPCR | ||
3990 | |||
3991 | # | ||
3992 | # copy, convert, and tag input argument | ||
3993 | # | ||
3994 | fmov.d 0x8(%a6),%fp0 # load dbl input | ||
3995 | fmov.x %fp0,FP_SRC(%a6) | ||
3996 | lea FP_SRC(%a6),%a0 | ||
3997 | bsr.l tag # fetch operand type | ||
3998 | mov.b %d0,STAG(%a6) | ||
3999 | mov.b %d0,%d1 | ||
4000 | |||
4001 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
4002 | |||
4003 | clr.l %d0 | ||
4004 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
4005 | |||
4006 | mov.b %d1,STAG(%a6) | ||
4007 | tst.b %d1 | ||
4008 | bne.b _L19_2d | ||
4009 | bsr.l sgetman # operand is a NORM | ||
4010 | bra.b _L19_6d | ||
4011 | _L19_2d: | ||
4012 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
4013 | bne.b _L19_3d # no | ||
4014 | bsr.l src_zero # yes | ||
4015 | bra.b _L19_6d | ||
4016 | _L19_3d: | ||
4017 | cmpi.b %d1,&INF # is operand an INF? | ||
4018 | bne.b _L19_4d # no | ||
4019 | bsr.l t_operr # yes | ||
4020 | bra.b _L19_6d | ||
4021 | _L19_4d: | ||
4022 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
4023 | bne.b _L19_5d # no | ||
4024 | bsr.l src_qnan # yes | ||
4025 | bra.b _L19_6d | ||
4026 | _L19_5d: | ||
4027 | bsr.l sgetmand # operand is a DENORM | ||
4028 | _L19_6d: | ||
4029 | |||
4030 | # | ||
4031 | # Result is now in FP0 | ||
4032 | # | ||
4033 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
4034 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
4035 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
4036 | unlk %a6 | ||
4037 | rts | ||
4038 | |||
4039 | global _fgetmanx_ | ||
4040 | _fgetmanx_: | ||
4041 | link %a6,&-LOCAL_SIZE | ||
4042 | |||
4043 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
4044 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
4045 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
4046 | |||
4047 | fmov.l &0x0,%fpcr # zero FPCR | ||
4048 | |||
4049 | # | ||
4050 | # copy, convert, and tag input argument | ||
4051 | # | ||
4052 | lea FP_SRC(%a6),%a0 | ||
4053 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | ||
4054 | mov.l 0x8+0x4(%a6),0x4(%a0) | ||
4055 | mov.l 0x8+0x8(%a6),0x8(%a0) | ||
4056 | bsr.l tag # fetch operand type | ||
4057 | mov.b %d0,STAG(%a6) | ||
4058 | mov.b %d0,%d1 | ||
4059 | |||
4060 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
4061 | |||
4062 | clr.l %d0 | ||
4063 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
4064 | |||
4065 | tst.b %d1 | ||
4066 | bne.b _L19_2x | ||
4067 | bsr.l sgetman # operand is a NORM | ||
4068 | bra.b _L19_6x | ||
4069 | _L19_2x: | ||
4070 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
4071 | bne.b _L19_3x # no | ||
4072 | bsr.l src_zero # yes | ||
4073 | bra.b _L19_6x | ||
4074 | _L19_3x: | ||
4075 | cmpi.b %d1,&INF # is operand an INF? | ||
4076 | bne.b _L19_4x # no | ||
4077 | bsr.l t_operr # yes | ||
4078 | bra.b _L19_6x | ||
4079 | _L19_4x: | ||
4080 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
4081 | bne.b _L19_5x # no | ||
4082 | bsr.l src_qnan # yes | ||
4083 | bra.b _L19_6x | ||
4084 | _L19_5x: | ||
4085 | bsr.l sgetmand # operand is a DENORM | ||
4086 | _L19_6x: | ||
4087 | |||
4088 | # | ||
4089 | # Result is now in FP0 | ||
4090 | # | ||
4091 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
4092 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
4093 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
4094 | unlk %a6 | ||
4095 | rts | ||
4096 | |||
4097 | |||
4098 | ######################################################################### | ||
4099 | # MONADIC TEMPLATE # | ||
4100 | ######################################################################### | ||
4101 | global _fsincoss_ | ||
4102 | _fsincoss_: | ||
4103 | link %a6,&-LOCAL_SIZE | ||
4104 | |||
4105 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
4106 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
4107 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
4108 | |||
4109 | fmov.l &0x0,%fpcr # zero FPCR | ||
4110 | |||
4111 | # | ||
4112 | # copy, convert, and tag input argument | ||
4113 | # | ||
4114 | fmov.s 0x8(%a6),%fp0 # load sgl input | ||
4115 | fmov.x %fp0,FP_SRC(%a6) | ||
4116 | lea FP_SRC(%a6),%a0 | ||
4117 | bsr.l tag # fetch operand type | ||
4118 | mov.b %d0,STAG(%a6) | ||
4119 | mov.b %d0,%d1 | ||
4120 | |||
4121 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
4122 | |||
4123 | clr.l %d0 | ||
4124 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
4125 | |||
4126 | tst.b %d1 | ||
4127 | bne.b _L20_2s | ||
4128 | bsr.l ssincos # operand is a NORM | ||
4129 | bra.b _L20_6s | ||
4130 | _L20_2s: | ||
4131 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
4132 | bne.b _L20_3s # no | ||
4133 | bsr.l ssincosz # yes | ||
4134 | bra.b _L20_6s | ||
4135 | _L20_3s: | ||
4136 | cmpi.b %d1,&INF # is operand an INF? | ||
4137 | bne.b _L20_4s # no | ||
4138 | bsr.l ssincosi # yes | ||
4139 | bra.b _L20_6s | ||
4140 | _L20_4s: | ||
4141 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
4142 | bne.b _L20_5s # no | ||
4143 | bsr.l ssincosqnan # yes | ||
4144 | bra.b _L20_6s | ||
4145 | _L20_5s: | ||
4146 | bsr.l ssincosd # operand is a DENORM | ||
4147 | _L20_6s: | ||
4148 | |||
4149 | # | ||
4150 | # Result is now in FP0 | ||
4151 | # | ||
4152 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
4153 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
4154 | fmovm.x &0x03,-(%sp) # store off fp0/fp1 | ||
4155 | fmovm.x (%sp)+,&0x40 # fp0 now in fp1 | ||
4156 | fmovm.x (%sp)+,&0x80 # fp1 now in fp0 | ||
4157 | unlk %a6 | ||
4158 | rts | ||
4159 | |||
4160 | global _fsincosd_ | ||
4161 | _fsincosd_: | ||
4162 | link %a6,&-LOCAL_SIZE | ||
4163 | |||
4164 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
4165 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
4166 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
4167 | |||
4168 | fmov.l &0x0,%fpcr # zero FPCR | ||
4169 | |||
4170 | # | ||
4171 | # copy, convert, and tag input argument | ||
4172 | # | ||
4173 | fmov.d 0x8(%a6),%fp0 # load dbl input | ||
4174 | fmov.x %fp0,FP_SRC(%a6) | ||
4175 | lea FP_SRC(%a6),%a0 | ||
4176 | bsr.l tag # fetch operand type | ||
4177 | mov.b %d0,STAG(%a6) | ||
4178 | mov.b %d0,%d1 | ||
4179 | |||
4180 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
4181 | |||
4182 | clr.l %d0 | ||
4183 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
4184 | |||
4185 | mov.b %d1,STAG(%a6) | ||
4186 | tst.b %d1 | ||
4187 | bne.b _L20_2d | ||
4188 | bsr.l ssincos # operand is a NORM | ||
4189 | bra.b _L20_6d | ||
4190 | _L20_2d: | ||
4191 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
4192 | bne.b _L20_3d # no | ||
4193 | bsr.l ssincosz # yes | ||
4194 | bra.b _L20_6d | ||
4195 | _L20_3d: | ||
4196 | cmpi.b %d1,&INF # is operand an INF? | ||
4197 | bne.b _L20_4d # no | ||
4198 | bsr.l ssincosi # yes | ||
4199 | bra.b _L20_6d | ||
4200 | _L20_4d: | ||
4201 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
4202 | bne.b _L20_5d # no | ||
4203 | bsr.l ssincosqnan # yes | ||
4204 | bra.b _L20_6d | ||
4205 | _L20_5d: | ||
4206 | bsr.l ssincosd # operand is a DENORM | ||
4207 | _L20_6d: | ||
4208 | |||
4209 | # | ||
4210 | # Result is now in FP0 | ||
4211 | # | ||
4212 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
4213 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
4214 | fmovm.x &0x03,-(%sp) # store off fp0/fp1 | ||
4215 | fmovm.x (%sp)+,&0x40 # fp0 now in fp1 | ||
4216 | fmovm.x (%sp)+,&0x80 # fp1 now in fp0 | ||
4217 | unlk %a6 | ||
4218 | rts | ||
4219 | |||
4220 | global _fsincosx_ | ||
4221 | _fsincosx_: | ||
4222 | link %a6,&-LOCAL_SIZE | ||
4223 | |||
4224 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
4225 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
4226 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
4227 | |||
4228 | fmov.l &0x0,%fpcr # zero FPCR | ||
4229 | |||
4230 | # | ||
4231 | # copy, convert, and tag input argument | ||
4232 | # | ||
4233 | lea FP_SRC(%a6),%a0 | ||
4234 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | ||
4235 | mov.l 0x8+0x4(%a6),0x4(%a0) | ||
4236 | mov.l 0x8+0x8(%a6),0x8(%a0) | ||
4237 | bsr.l tag # fetch operand type | ||
4238 | mov.b %d0,STAG(%a6) | ||
4239 | mov.b %d0,%d1 | ||
4240 | |||
4241 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
4242 | |||
4243 | clr.l %d0 | ||
4244 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
4245 | |||
4246 | tst.b %d1 | ||
4247 | bne.b _L20_2x | ||
4248 | bsr.l ssincos # operand is a NORM | ||
4249 | bra.b _L20_6x | ||
4250 | _L20_2x: | ||
4251 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
4252 | bne.b _L20_3x # no | ||
4253 | bsr.l ssincosz # yes | ||
4254 | bra.b _L20_6x | ||
4255 | _L20_3x: | ||
4256 | cmpi.b %d1,&INF # is operand an INF? | ||
4257 | bne.b _L20_4x # no | ||
4258 | bsr.l ssincosi # yes | ||
4259 | bra.b _L20_6x | ||
4260 | _L20_4x: | ||
4261 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
4262 | bne.b _L20_5x # no | ||
4263 | bsr.l ssincosqnan # yes | ||
4264 | bra.b _L20_6x | ||
4265 | _L20_5x: | ||
4266 | bsr.l ssincosd # operand is a DENORM | ||
4267 | _L20_6x: | ||
4268 | |||
4269 | # | ||
4270 | # Result is now in FP0 | ||
4271 | # | ||
4272 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
4273 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
4274 | fmovm.x &0x03,-(%sp) # store off fp0/fp1 | ||
4275 | fmovm.x (%sp)+,&0x40 # fp0 now in fp1 | ||
4276 | fmovm.x (%sp)+,&0x80 # fp1 now in fp0 | ||
4277 | unlk %a6 | ||
4278 | rts | ||
4279 | |||
4280 | |||
4281 | ######################################################################### | ||
4282 | # DYADIC TEMPLATE # | ||
4283 | ######################################################################### | ||
4284 | global _frems_ | ||
4285 | _frems_: | ||
4286 | link %a6,&-LOCAL_SIZE | ||
4287 | |||
4288 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
4289 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
4290 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
4291 | |||
4292 | fmov.l &0x0,%fpcr # zero FPCR | ||
4293 | |||
4294 | # | ||
4295 | # copy, convert, and tag input argument | ||
4296 | # | ||
4297 | fmov.s 0x8(%a6),%fp0 # load sgl dst | ||
4298 | fmov.x %fp0,FP_DST(%a6) | ||
4299 | lea FP_DST(%a6),%a0 | ||
4300 | bsr.l tag # fetch operand type | ||
4301 | mov.b %d0,DTAG(%a6) | ||
4302 | |||
4303 | fmov.s 0xc(%a6),%fp0 # load sgl src | ||
4304 | fmov.x %fp0,FP_SRC(%a6) | ||
4305 | lea FP_SRC(%a6),%a0 | ||
4306 | bsr.l tag # fetch operand type | ||
4307 | mov.b %d0,STAG(%a6) | ||
4308 | mov.l %d0,%d1 | ||
4309 | |||
4310 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
4311 | |||
4312 | clr.l %d0 | ||
4313 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
4314 | |||
4315 | lea FP_SRC(%a6),%a0 # pass ptr to src | ||
4316 | lea FP_DST(%a6),%a1 # pass ptr to dst | ||
4317 | |||
4318 | tst.b %d1 | ||
4319 | bne.b _L21_2s | ||
4320 | bsr.l srem_snorm # operand is a NORM | ||
4321 | bra.b _L21_6s | ||
4322 | _L21_2s: | ||
4323 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
4324 | bne.b _L21_3s # no | ||
4325 | bsr.l srem_szero # yes | ||
4326 | bra.b _L21_6s | ||
4327 | _L21_3s: | ||
4328 | cmpi.b %d1,&INF # is operand an INF? | ||
4329 | bne.b _L21_4s # no | ||
4330 | bsr.l srem_sinf # yes | ||
4331 | bra.b _L21_6s | ||
4332 | _L21_4s: | ||
4333 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
4334 | bne.b _L21_5s # no | ||
4335 | bsr.l sop_sqnan # yes | ||
4336 | bra.b _L21_6s | ||
4337 | _L21_5s: | ||
4338 | bsr.l srem_sdnrm # operand is a DENORM | ||
4339 | _L21_6s: | ||
4340 | |||
4341 | # | ||
4342 | # Result is now in FP0 | ||
4343 | # | ||
4344 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
4345 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
4346 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
4347 | unlk %a6 | ||
4348 | rts | ||
4349 | |||
4350 | global _fremd_ | ||
4351 | _fremd_: | ||
4352 | link %a6,&-LOCAL_SIZE | ||
4353 | |||
4354 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
4355 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
4356 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
4357 | |||
4358 | fmov.l &0x0,%fpcr # zero FPCR | ||
4359 | |||
4360 | # | ||
4361 | # copy, convert, and tag input argument | ||
4362 | # | ||
4363 | fmov.d 0x8(%a6),%fp0 # load dbl dst | ||
4364 | fmov.x %fp0,FP_DST(%a6) | ||
4365 | lea FP_DST(%a6),%a0 | ||
4366 | bsr.l tag # fetch operand type | ||
4367 | mov.b %d0,DTAG(%a6) | ||
4368 | |||
4369 | fmov.d 0x10(%a6),%fp0 # load dbl src | ||
4370 | fmov.x %fp0,FP_SRC(%a6) | ||
4371 | lea FP_SRC(%a6),%a0 | ||
4372 | bsr.l tag # fetch operand type | ||
4373 | mov.b %d0,STAG(%a6) | ||
4374 | mov.l %d0,%d1 | ||
4375 | |||
4376 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
4377 | |||
4378 | clr.l %d0 | ||
4379 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
4380 | |||
4381 | lea FP_SRC(%a6),%a0 # pass ptr to src | ||
4382 | lea FP_DST(%a6),%a1 # pass ptr to dst | ||
4383 | |||
4384 | tst.b %d1 | ||
4385 | bne.b _L21_2d | ||
4386 | bsr.l srem_snorm # operand is a NORM | ||
4387 | bra.b _L21_6d | ||
4388 | _L21_2d: | ||
4389 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
4390 | bne.b _L21_3d # no | ||
4391 | bsr.l srem_szero # yes | ||
4392 | bra.b _L21_6d | ||
4393 | _L21_3d: | ||
4394 | cmpi.b %d1,&INF # is operand an INF? | ||
4395 | bne.b _L21_4d # no | ||
4396 | bsr.l srem_sinf # yes | ||
4397 | bra.b _L21_6d | ||
4398 | _L21_4d: | ||
4399 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
4400 | bne.b _L21_5d # no | ||
4401 | bsr.l sop_sqnan # yes | ||
4402 | bra.b _L21_6d | ||
4403 | _L21_5d: | ||
4404 | bsr.l srem_sdnrm # operand is a DENORM | ||
4405 | _L21_6d: | ||
4406 | |||
4407 | # | ||
4408 | # Result is now in FP0 | ||
4409 | # | ||
4410 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
4411 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
4412 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
4413 | unlk %a6 | ||
4414 | rts | ||
4415 | |||
4416 | global _fremx_ | ||
4417 | _fremx_: | ||
4418 | link %a6,&-LOCAL_SIZE | ||
4419 | |||
4420 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
4421 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
4422 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
4423 | |||
4424 | fmov.l &0x0,%fpcr # zero FPCR | ||
4425 | |||
4426 | # | ||
4427 | # copy, convert, and tag input argument | ||
4428 | # | ||
4429 | lea FP_DST(%a6),%a0 | ||
4430 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext dst | ||
4431 | mov.l 0x8+0x4(%a6),0x4(%a0) | ||
4432 | mov.l 0x8+0x8(%a6),0x8(%a0) | ||
4433 | bsr.l tag # fetch operand type | ||
4434 | mov.b %d0,DTAG(%a6) | ||
4435 | |||
4436 | lea FP_SRC(%a6),%a0 | ||
4437 | mov.l 0x14+0x0(%a6),0x0(%a0) # load ext src | ||
4438 | mov.l 0x14+0x4(%a6),0x4(%a0) | ||
4439 | mov.l 0x14+0x8(%a6),0x8(%a0) | ||
4440 | bsr.l tag # fetch operand type | ||
4441 | mov.b %d0,STAG(%a6) | ||
4442 | mov.l %d0,%d1 | ||
4443 | |||
4444 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
4445 | |||
4446 | clr.l %d0 | ||
4447 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
4448 | |||
4449 | lea FP_SRC(%a6),%a0 # pass ptr to src | ||
4450 | lea FP_DST(%a6),%a1 # pass ptr to dst | ||
4451 | |||
4452 | tst.b %d1 | ||
4453 | bne.b _L21_2x | ||
4454 | bsr.l srem_snorm # operand is a NORM | ||
4455 | bra.b _L21_6x | ||
4456 | _L21_2x: | ||
4457 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
4458 | bne.b _L21_3x # no | ||
4459 | bsr.l srem_szero # yes | ||
4460 | bra.b _L21_6x | ||
4461 | _L21_3x: | ||
4462 | cmpi.b %d1,&INF # is operand an INF? | ||
4463 | bne.b _L21_4x # no | ||
4464 | bsr.l srem_sinf # yes | ||
4465 | bra.b _L21_6x | ||
4466 | _L21_4x: | ||
4467 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
4468 | bne.b _L21_5x # no | ||
4469 | bsr.l sop_sqnan # yes | ||
4470 | bra.b _L21_6x | ||
4471 | _L21_5x: | ||
4472 | bsr.l srem_sdnrm # operand is a DENORM | ||
4473 | _L21_6x: | ||
4474 | |||
4475 | # | ||
4476 | # Result is now in FP0 | ||
4477 | # | ||
4478 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
4479 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
4480 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
4481 | unlk %a6 | ||
4482 | rts | ||
4483 | |||
4484 | |||
4485 | ######################################################################### | ||
4486 | # DYADIC TEMPLATE # | ||
4487 | ######################################################################### | ||
4488 | global _fmods_ | ||
4489 | _fmods_: | ||
4490 | link %a6,&-LOCAL_SIZE | ||
4491 | |||
4492 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
4493 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
4494 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
4495 | |||
4496 | fmov.l &0x0,%fpcr # zero FPCR | ||
4497 | |||
4498 | # | ||
4499 | # copy, convert, and tag input argument | ||
4500 | # | ||
4501 | fmov.s 0x8(%a6),%fp0 # load sgl dst | ||
4502 | fmov.x %fp0,FP_DST(%a6) | ||
4503 | lea FP_DST(%a6),%a0 | ||
4504 | bsr.l tag # fetch operand type | ||
4505 | mov.b %d0,DTAG(%a6) | ||
4506 | |||
4507 | fmov.s 0xc(%a6),%fp0 # load sgl src | ||
4508 | fmov.x %fp0,FP_SRC(%a6) | ||
4509 | lea FP_SRC(%a6),%a0 | ||
4510 | bsr.l tag # fetch operand type | ||
4511 | mov.b %d0,STAG(%a6) | ||
4512 | mov.l %d0,%d1 | ||
4513 | |||
4514 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
4515 | |||
4516 | clr.l %d0 | ||
4517 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
4518 | |||
4519 | lea FP_SRC(%a6),%a0 # pass ptr to src | ||
4520 | lea FP_DST(%a6),%a1 # pass ptr to dst | ||
4521 | |||
4522 | tst.b %d1 | ||
4523 | bne.b _L22_2s | ||
4524 | bsr.l smod_snorm # operand is a NORM | ||
4525 | bra.b _L22_6s | ||
4526 | _L22_2s: | ||
4527 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
4528 | bne.b _L22_3s # no | ||
4529 | bsr.l smod_szero # yes | ||
4530 | bra.b _L22_6s | ||
4531 | _L22_3s: | ||
4532 | cmpi.b %d1,&INF # is operand an INF? | ||
4533 | bne.b _L22_4s # no | ||
4534 | bsr.l smod_sinf # yes | ||
4535 | bra.b _L22_6s | ||
4536 | _L22_4s: | ||
4537 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
4538 | bne.b _L22_5s # no | ||
4539 | bsr.l sop_sqnan # yes | ||
4540 | bra.b _L22_6s | ||
4541 | _L22_5s: | ||
4542 | bsr.l smod_sdnrm # operand is a DENORM | ||
4543 | _L22_6s: | ||
4544 | |||
4545 | # | ||
4546 | # Result is now in FP0 | ||
4547 | # | ||
4548 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
4549 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
4550 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
4551 | unlk %a6 | ||
4552 | rts | ||
4553 | |||
4554 | global _fmodd_ | ||
4555 | _fmodd_: | ||
4556 | link %a6,&-LOCAL_SIZE | ||
4557 | |||
4558 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
4559 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
4560 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
4561 | |||
4562 | fmov.l &0x0,%fpcr # zero FPCR | ||
4563 | |||
4564 | # | ||
4565 | # copy, convert, and tag input argument | ||
4566 | # | ||
4567 | fmov.d 0x8(%a6),%fp0 # load dbl dst | ||
4568 | fmov.x %fp0,FP_DST(%a6) | ||
4569 | lea FP_DST(%a6),%a0 | ||
4570 | bsr.l tag # fetch operand type | ||
4571 | mov.b %d0,DTAG(%a6) | ||
4572 | |||
4573 | fmov.d 0x10(%a6),%fp0 # load dbl src | ||
4574 | fmov.x %fp0,FP_SRC(%a6) | ||
4575 | lea FP_SRC(%a6),%a0 | ||
4576 | bsr.l tag # fetch operand type | ||
4577 | mov.b %d0,STAG(%a6) | ||
4578 | mov.l %d0,%d1 | ||
4579 | |||
4580 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
4581 | |||
4582 | clr.l %d0 | ||
4583 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
4584 | |||
4585 | lea FP_SRC(%a6),%a0 # pass ptr to src | ||
4586 | lea FP_DST(%a6),%a1 # pass ptr to dst | ||
4587 | |||
4588 | tst.b %d1 | ||
4589 | bne.b _L22_2d | ||
4590 | bsr.l smod_snorm # operand is a NORM | ||
4591 | bra.b _L22_6d | ||
4592 | _L22_2d: | ||
4593 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
4594 | bne.b _L22_3d # no | ||
4595 | bsr.l smod_szero # yes | ||
4596 | bra.b _L22_6d | ||
4597 | _L22_3d: | ||
4598 | cmpi.b %d1,&INF # is operand an INF? | ||
4599 | bne.b _L22_4d # no | ||
4600 | bsr.l smod_sinf # yes | ||
4601 | bra.b _L22_6d | ||
4602 | _L22_4d: | ||
4603 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
4604 | bne.b _L22_5d # no | ||
4605 | bsr.l sop_sqnan # yes | ||
4606 | bra.b _L22_6d | ||
4607 | _L22_5d: | ||
4608 | bsr.l smod_sdnrm # operand is a DENORM | ||
4609 | _L22_6d: | ||
4610 | |||
4611 | # | ||
4612 | # Result is now in FP0 | ||
4613 | # | ||
4614 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
4615 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
4616 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
4617 | unlk %a6 | ||
4618 | rts | ||
4619 | |||
4620 | global _fmodx_ | ||
4621 | _fmodx_: | ||
4622 | link %a6,&-LOCAL_SIZE | ||
4623 | |||
4624 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
4625 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
4626 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
4627 | |||
4628 | fmov.l &0x0,%fpcr # zero FPCR | ||
4629 | |||
4630 | # | ||
4631 | # copy, convert, and tag input argument | ||
4632 | # | ||
4633 | lea FP_DST(%a6),%a0 | ||
4634 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext dst | ||
4635 | mov.l 0x8+0x4(%a6),0x4(%a0) | ||
4636 | mov.l 0x8+0x8(%a6),0x8(%a0) | ||
4637 | bsr.l tag # fetch operand type | ||
4638 | mov.b %d0,DTAG(%a6) | ||
4639 | |||
4640 | lea FP_SRC(%a6),%a0 | ||
4641 | mov.l 0x14+0x0(%a6),0x0(%a0) # load ext src | ||
4642 | mov.l 0x14+0x4(%a6),0x4(%a0) | ||
4643 | mov.l 0x14+0x8(%a6),0x8(%a0) | ||
4644 | bsr.l tag # fetch operand type | ||
4645 | mov.b %d0,STAG(%a6) | ||
4646 | mov.l %d0,%d1 | ||
4647 | |||
4648 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
4649 | |||
4650 | clr.l %d0 | ||
4651 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
4652 | |||
4653 | lea FP_SRC(%a6),%a0 # pass ptr to src | ||
4654 | lea FP_DST(%a6),%a1 # pass ptr to dst | ||
4655 | |||
4656 | tst.b %d1 | ||
4657 | bne.b _L22_2x | ||
4658 | bsr.l smod_snorm # operand is a NORM | ||
4659 | bra.b _L22_6x | ||
4660 | _L22_2x: | ||
4661 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
4662 | bne.b _L22_3x # no | ||
4663 | bsr.l smod_szero # yes | ||
4664 | bra.b _L22_6x | ||
4665 | _L22_3x: | ||
4666 | cmpi.b %d1,&INF # is operand an INF? | ||
4667 | bne.b _L22_4x # no | ||
4668 | bsr.l smod_sinf # yes | ||
4669 | bra.b _L22_6x | ||
4670 | _L22_4x: | ||
4671 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
4672 | bne.b _L22_5x # no | ||
4673 | bsr.l sop_sqnan # yes | ||
4674 | bra.b _L22_6x | ||
4675 | _L22_5x: | ||
4676 | bsr.l smod_sdnrm # operand is a DENORM | ||
4677 | _L22_6x: | ||
4678 | |||
4679 | # | ||
4680 | # Result is now in FP0 | ||
4681 | # | ||
4682 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
4683 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
4684 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
4685 | unlk %a6 | ||
4686 | rts | ||
4687 | |||
4688 | |||
4689 | ######################################################################### | ||
4690 | # DYADIC TEMPLATE # | ||
4691 | ######################################################################### | ||
4692 | global _fscales_ | ||
4693 | _fscales_: | ||
4694 | link %a6,&-LOCAL_SIZE | ||
4695 | |||
4696 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
4697 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
4698 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
4699 | |||
4700 | fmov.l &0x0,%fpcr # zero FPCR | ||
4701 | |||
4702 | # | ||
4703 | # copy, convert, and tag input argument | ||
4704 | # | ||
4705 | fmov.s 0x8(%a6),%fp0 # load sgl dst | ||
4706 | fmov.x %fp0,FP_DST(%a6) | ||
4707 | lea FP_DST(%a6),%a0 | ||
4708 | bsr.l tag # fetch operand type | ||
4709 | mov.b %d0,DTAG(%a6) | ||
4710 | |||
4711 | fmov.s 0xc(%a6),%fp0 # load sgl src | ||
4712 | fmov.x %fp0,FP_SRC(%a6) | ||
4713 | lea FP_SRC(%a6),%a0 | ||
4714 | bsr.l tag # fetch operand type | ||
4715 | mov.b %d0,STAG(%a6) | ||
4716 | mov.l %d0,%d1 | ||
4717 | |||
4718 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
4719 | |||
4720 | clr.l %d0 | ||
4721 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
4722 | |||
4723 | lea FP_SRC(%a6),%a0 # pass ptr to src | ||
4724 | lea FP_DST(%a6),%a1 # pass ptr to dst | ||
4725 | |||
4726 | tst.b %d1 | ||
4727 | bne.b _L23_2s | ||
4728 | bsr.l sscale_snorm # operand is a NORM | ||
4729 | bra.b _L23_6s | ||
4730 | _L23_2s: | ||
4731 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
4732 | bne.b _L23_3s # no | ||
4733 | bsr.l sscale_szero # yes | ||
4734 | bra.b _L23_6s | ||
4735 | _L23_3s: | ||
4736 | cmpi.b %d1,&INF # is operand an INF? | ||
4737 | bne.b _L23_4s # no | ||
4738 | bsr.l sscale_sinf # yes | ||
4739 | bra.b _L23_6s | ||
4740 | _L23_4s: | ||
4741 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
4742 | bne.b _L23_5s # no | ||
4743 | bsr.l sop_sqnan # yes | ||
4744 | bra.b _L23_6s | ||
4745 | _L23_5s: | ||
4746 | bsr.l sscale_sdnrm # operand is a DENORM | ||
4747 | _L23_6s: | ||
4748 | |||
4749 | # | ||
4750 | # Result is now in FP0 | ||
4751 | # | ||
4752 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
4753 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
4754 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
4755 | unlk %a6 | ||
4756 | rts | ||
4757 | |||
4758 | global _fscaled_ | ||
4759 | _fscaled_: | ||
4760 | link %a6,&-LOCAL_SIZE | ||
4761 | |||
4762 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
4763 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
4764 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
4765 | |||
4766 | fmov.l &0x0,%fpcr # zero FPCR | ||
4767 | |||
4768 | # | ||
4769 | # copy, convert, and tag input argument | ||
4770 | # | ||
4771 | fmov.d 0x8(%a6),%fp0 # load dbl dst | ||
4772 | fmov.x %fp0,FP_DST(%a6) | ||
4773 | lea FP_DST(%a6),%a0 | ||
4774 | bsr.l tag # fetch operand type | ||
4775 | mov.b %d0,DTAG(%a6) | ||
4776 | |||
4777 | fmov.d 0x10(%a6),%fp0 # load dbl src | ||
4778 | fmov.x %fp0,FP_SRC(%a6) | ||
4779 | lea FP_SRC(%a6),%a0 | ||
4780 | bsr.l tag # fetch operand type | ||
4781 | mov.b %d0,STAG(%a6) | ||
4782 | mov.l %d0,%d1 | ||
4783 | |||
4784 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
4785 | |||
4786 | clr.l %d0 | ||
4787 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
4788 | |||
4789 | lea FP_SRC(%a6),%a0 # pass ptr to src | ||
4790 | lea FP_DST(%a6),%a1 # pass ptr to dst | ||
4791 | |||
4792 | tst.b %d1 | ||
4793 | bne.b _L23_2d | ||
4794 | bsr.l sscale_snorm # operand is a NORM | ||
4795 | bra.b _L23_6d | ||
4796 | _L23_2d: | ||
4797 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
4798 | bne.b _L23_3d # no | ||
4799 | bsr.l sscale_szero # yes | ||
4800 | bra.b _L23_6d | ||
4801 | _L23_3d: | ||
4802 | cmpi.b %d1,&INF # is operand an INF? | ||
4803 | bne.b _L23_4d # no | ||
4804 | bsr.l sscale_sinf # yes | ||
4805 | bra.b _L23_6d | ||
4806 | _L23_4d: | ||
4807 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
4808 | bne.b _L23_5d # no | ||
4809 | bsr.l sop_sqnan # yes | ||
4810 | bra.b _L23_6d | ||
4811 | _L23_5d: | ||
4812 | bsr.l sscale_sdnrm # operand is a DENORM | ||
4813 | _L23_6d: | ||
4814 | |||
4815 | # | ||
4816 | # Result is now in FP0 | ||
4817 | # | ||
4818 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
4819 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
4820 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
4821 | unlk %a6 | ||
4822 | rts | ||
4823 | |||
4824 | global _fscalex_ | ||
4825 | _fscalex_: | ||
4826 | link %a6,&-LOCAL_SIZE | ||
4827 | |||
4828 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | ||
4829 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | ||
4830 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | ||
4831 | |||
4832 | fmov.l &0x0,%fpcr # zero FPCR | ||
4833 | |||
4834 | # | ||
4835 | # copy, convert, and tag input argument | ||
4836 | # | ||
4837 | lea FP_DST(%a6),%a0 | ||
4838 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext dst | ||
4839 | mov.l 0x8+0x4(%a6),0x4(%a0) | ||
4840 | mov.l 0x8+0x8(%a6),0x8(%a0) | ||
4841 | bsr.l tag # fetch operand type | ||
4842 | mov.b %d0,DTAG(%a6) | ||
4843 | |||
4844 | lea FP_SRC(%a6),%a0 | ||
4845 | mov.l 0x14+0x0(%a6),0x0(%a0) # load ext src | ||
4846 | mov.l 0x14+0x4(%a6),0x4(%a0) | ||
4847 | mov.l 0x14+0x8(%a6),0x8(%a0) | ||
4848 | bsr.l tag # fetch operand type | ||
4849 | mov.b %d0,STAG(%a6) | ||
4850 | mov.l %d0,%d1 | ||
4851 | |||
4852 | andi.l &0x00ff00ff,USER_FPSR(%a6) | ||
4853 | |||
4854 | clr.l %d0 | ||
4855 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | ||
4856 | |||
4857 | lea FP_SRC(%a6),%a0 # pass ptr to src | ||
4858 | lea FP_DST(%a6),%a1 # pass ptr to dst | ||
4859 | |||
4860 | tst.b %d1 | ||
4861 | bne.b _L23_2x | ||
4862 | bsr.l sscale_snorm # operand is a NORM | ||
4863 | bra.b _L23_6x | ||
4864 | _L23_2x: | ||
4865 | cmpi.b %d1,&ZERO # is operand a ZERO? | ||
4866 | bne.b _L23_3x # no | ||
4867 | bsr.l sscale_szero # yes | ||
4868 | bra.b _L23_6x | ||
4869 | _L23_3x: | ||
4870 | cmpi.b %d1,&INF # is operand an INF? | ||
4871 | bne.b _L23_4x # no | ||
4872 | bsr.l sscale_sinf # yes | ||
4873 | bra.b _L23_6x | ||
4874 | _L23_4x: | ||
4875 | cmpi.b %d1,&QNAN # is operand a QNAN? | ||
4876 | bne.b _L23_5x # no | ||
4877 | bsr.l sop_sqnan # yes | ||
4878 | bra.b _L23_6x | ||
4879 | _L23_5x: | ||
4880 | bsr.l sscale_sdnrm # operand is a DENORM | ||
4881 | _L23_6x: | ||
4882 | |||
4883 | # | ||
4884 | # Result is now in FP0 | ||
4885 | # | ||
4886 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | ||
4887 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | ||
4888 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | ||
4889 | unlk %a6 | ||
4890 | rts | ||
4891 | |||
4892 | |||
4893 | ######################################################################### | ||
4894 | # ssin(): computes the sine of a normalized input # | ||
4895 | # ssind(): computes the sine of a denormalized input # | ||
4896 | # scos(): computes the cosine of a normalized input # | ||
4897 | # scosd(): computes the cosine of a denormalized input # | ||
4898 | # ssincos(): computes the sine and cosine of a normalized input # | ||
4899 | # ssincosd(): computes the sine and cosine of a denormalized input # | ||
4900 | # # | ||
4901 | # INPUT *************************************************************** # | ||
4902 | # a0 = pointer to extended precision input # | ||
4903 | # d0 = round precision,mode # | ||
4904 | # # | ||
4905 | # OUTPUT ************************************************************** # | ||
4906 | # fp0 = sin(X) or cos(X) # | ||
4907 | # # | ||
4908 | # For ssincos(X): # | ||
4909 | # fp0 = sin(X) # | ||
4910 | # fp1 = cos(X) # | ||
4911 | # # | ||
4912 | # ACCURACY and MONOTONICITY ******************************************* # | ||
4913 | # The returned result is within 1 ulp in 64 significant bit, i.e. # | ||
4914 | # within 0.5001 ulp to 53 bits if the result is subsequently # | ||
4915 | # rounded to double precision. The result is provably monotonic # | ||
4916 | # in double precision. # | ||
4917 | # # | ||
4918 | # ALGORITHM *********************************************************** # | ||
4919 | # # | ||
4920 | # SIN and COS: # | ||
4921 | # 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1. # | ||
4922 | # # | ||
4923 | # 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7. # | ||
4924 | # # | ||
4925 | # 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let # | ||
4926 | # k = N mod 4, so in particular, k = 0,1,2,or 3. # | ||
4927 | # Overwrite k by k := k + AdjN. # | ||
4928 | # # | ||
4929 | # 4. If k is even, go to 6. # | ||
4930 | # # | ||
4931 | # 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. # | ||
4932 | # Return sgn*cos(r) where cos(r) is approximated by an # | ||
4933 | # even polynomial in r, 1 + r*r*(B1+s*(B2+ ... + s*B8)), # | ||
4934 | # s = r*r. # | ||
4935 | # Exit. # | ||
4936 | # # | ||
4937 | # 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r) # | ||
4938 | # where sin(r) is approximated by an odd polynomial in r # | ||
4939 | # r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r. # | ||
4940 | # Exit. # | ||
4941 | # # | ||
4942 | # 7. If |X| > 1, go to 9. # | ||
4943 | # # | ||
4944 | # 8. (|X|<2**(-40)) If SIN is invoked, return X; # | ||
4945 | # otherwise return 1. # | ||
4946 | # # | ||
4947 | # 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, # | ||
4948 | # go back to 3. # | ||
4949 | # # | ||
4950 | # SINCOS: # | ||
4951 | # 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. # | ||
4952 | # # | ||
4953 | # 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let # | ||
4954 | # k = N mod 4, so in particular, k = 0,1,2,or 3. # | ||
4955 | # # | ||
4956 | # 3. If k is even, go to 5. # | ||
4957 | # # | ||
4958 | # 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), ie. # | ||
4959 | # j1 exclusive or with the l.s.b. of k. # | ||
4960 | # sgn1 := (-1)**j1, sgn2 := (-1)**j2. # | ||
4961 | # SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where # | ||
4962 | # sin(r) and cos(r) are computed as odd and even # | ||
4963 | # polynomials in r, respectively. Exit # | ||
4964 | # # | ||
4965 | # 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1. # | ||
4966 | # SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where # | ||
4967 | # sin(r) and cos(r) are computed as odd and even # | ||
4968 | # polynomials in r, respectively. Exit # | ||
4969 | # # | ||
4970 | # 6. If |X| > 1, go to 8. # | ||
4971 | # # | ||
4972 | # 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit. # | ||
4973 | # # | ||
4974 | # 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, # | ||
4975 | # go back to 2. # | ||
4976 | # # | ||
4977 | ######################################################################### | ||
4978 | |||
4979 | SINA7: long 0xBD6AAA77,0xCCC994F5 | ||
4980 | SINA6: long 0x3DE61209,0x7AAE8DA1 | ||
4981 | SINA5: long 0xBE5AE645,0x2A118AE4 | ||
4982 | SINA4: long 0x3EC71DE3,0xA5341531 | ||
4983 | SINA3: long 0xBF2A01A0,0x1A018B59,0x00000000,0x00000000 | ||
4984 | SINA2: long 0x3FF80000,0x88888888,0x888859AF,0x00000000 | ||
4985 | SINA1: long 0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000 | ||
4986 | |||
4987 | COSB8: long 0x3D2AC4D0,0xD6011EE3 | ||
4988 | COSB7: long 0xBDA9396F,0x9F45AC19 | ||
4989 | COSB6: long 0x3E21EED9,0x0612C972 | ||
4990 | COSB5: long 0xBE927E4F,0xB79D9FCF | ||
4991 | COSB4: long 0x3EFA01A0,0x1A01D423,0x00000000,0x00000000 | ||
4992 | COSB3: long 0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000 | ||
4993 | COSB2: long 0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E | ||
4994 | COSB1: long 0xBF000000 | ||
4995 | |||
4996 | set INARG,FP_SCR0 | ||
4997 | |||
4998 | set X,FP_SCR0 | ||
4999 | # set XDCARE,X+2 | ||
5000 | set XFRAC,X+4 | ||
5001 | |||
5002 | set RPRIME,FP_SCR0 | ||
5003 | set SPRIME,FP_SCR1 | ||
5004 | |||
5005 | set POSNEG1,L_SCR1 | ||
5006 | set TWOTO63,L_SCR1 | ||
5007 | |||
5008 | set ENDFLAG,L_SCR2 | ||
5009 | set INT,L_SCR2 | ||
5010 | |||
5011 | set ADJN,L_SCR3 | ||
5012 | |||
5013 | ############################################ | ||
5014 | global ssin | ||
5015 | ssin: | ||
5016 | mov.l &0,ADJN(%a6) # yes; SET ADJN TO 0 | ||
5017 | bra.b SINBGN | ||
5018 | |||
5019 | ############################################ | ||
5020 | global scos | ||
5021 | scos: | ||
5022 | mov.l &1,ADJN(%a6) # yes; SET ADJN TO 1 | ||
5023 | |||
5024 | ############################################ | ||
5025 | SINBGN: | ||
5026 | #--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE | ||
5027 | |||
5028 | fmov.x (%a0),%fp0 # LOAD INPUT | ||
5029 | fmov.x %fp0,X(%a6) # save input at X | ||
5030 | |||
5031 | # "COMPACTIFY" X | ||
5032 | mov.l (%a0),%d1 # put exp in hi word | ||
5033 | mov.w 4(%a0),%d1 # fetch hi(man) | ||
5034 | and.l &0x7FFFFFFF,%d1 # strip sign | ||
5035 | |||
5036 | cmpi.l %d1,&0x3FD78000 # is |X| >= 2**(-40)? | ||
5037 | bge.b SOK1 # no | ||
5038 | bra.w SINSM # yes; input is very small | ||
5039 | |||
5040 | SOK1: | ||
5041 | cmp.l %d1,&0x4004BC7E # is |X| < 15 PI? | ||
5042 | blt.b SINMAIN # no | ||
5043 | bra.w SREDUCEX # yes; input is very large | ||
5044 | |||
5045 | #--THIS IS THE USUAL CASE, |X| <= 15 PI. | ||
5046 | #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. | ||
5047 | SINMAIN: | ||
5048 | fmov.x %fp0,%fp1 | ||
5049 | fmul.d TWOBYPI(%pc),%fp1 # X*2/PI | ||
5050 | |||
5051 | lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32 | ||
5052 | |||
5053 | fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER | ||
5054 | |||
5055 | mov.l INT(%a6),%d1 # make a copy of N | ||
5056 | asl.l &4,%d1 # N *= 16 | ||
5057 | add.l %d1,%a1 # tbl_addr = a1 + (N*16) | ||
5058 | |||
5059 | # A1 IS THE ADDRESS OF N*PIBY2 | ||
5060 | # ...WHICH IS IN TWO PIECES Y1 & Y2 | ||
5061 | fsub.x (%a1)+,%fp0 # X-Y1 | ||
5062 | fsub.s (%a1),%fp0 # fp0 = R = (X-Y1)-Y2 | ||
5063 | |||
5064 | SINCONT: | ||
5065 | #--continuation from REDUCEX | ||
5066 | |||
5067 | #--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED | ||
5068 | mov.l INT(%a6),%d1 | ||
5069 | add.l ADJN(%a6),%d1 # SEE IF D0 IS ODD OR EVEN | ||
5070 | ror.l &1,%d1 # D0 WAS ODD IFF D0 IS NEGATIVE | ||
5071 | cmp.l %d1,&0 | ||
5072 | blt.w COSPOLY | ||
5073 | |||
5074 | #--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J. | ||
5075 | #--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY | ||
5076 | #--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE | ||
5077 | #--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS | ||
5078 | #--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))]) | ||
5079 | #--WHERE T=S*S. | ||
5080 | #--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION | ||
5081 | #--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT. | ||
5082 | SINPOLY: | ||
5083 | fmovm.x &0x0c,-(%sp) # save fp2/fp3 | ||
5084 | |||
5085 | fmov.x %fp0,X(%a6) # X IS R | ||
5086 | fmul.x %fp0,%fp0 # FP0 IS S | ||
5087 | |||
5088 | fmov.d SINA7(%pc),%fp3 | ||
5089 | fmov.d SINA6(%pc),%fp2 | ||
5090 | |||
5091 | fmov.x %fp0,%fp1 | ||
5092 | fmul.x %fp1,%fp1 # FP1 IS T | ||
5093 | |||
5094 | ror.l &1,%d1 | ||
5095 | and.l &0x80000000,%d1 | ||
5096 | # ...LEAST SIG. BIT OF D0 IN SIGN POSITION | ||
5097 | eor.l %d1,X(%a6) # X IS NOW R'= SGN*R | ||
5098 | |||
5099 | fmul.x %fp1,%fp3 # TA7 | ||
5100 | fmul.x %fp1,%fp2 # TA6 | ||
5101 | |||
5102 | fadd.d SINA5(%pc),%fp3 # A5+TA7 | ||
5103 | fadd.d SINA4(%pc),%fp2 # A4+TA6 | ||
5104 | |||
5105 | fmul.x %fp1,%fp3 # T(A5+TA7) | ||
5106 | fmul.x %fp1,%fp2 # T(A4+TA6) | ||
5107 | |||
5108 | fadd.d SINA3(%pc),%fp3 # A3+T(A5+TA7) | ||
5109 | fadd.x SINA2(%pc),%fp2 # A2+T(A4+TA6) | ||
5110 | |||
5111 | fmul.x %fp3,%fp1 # T(A3+T(A5+TA7)) | ||
5112 | |||
5113 | fmul.x %fp0,%fp2 # S(A2+T(A4+TA6)) | ||
5114 | fadd.x SINA1(%pc),%fp1 # A1+T(A3+T(A5+TA7)) | ||
5115 | fmul.x X(%a6),%fp0 # R'*S | ||
5116 | |||
5117 | fadd.x %fp2,%fp1 # [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))] | ||
5118 | |||
5119 | fmul.x %fp1,%fp0 # SIN(R')-R' | ||
5120 | |||
5121 | fmovm.x (%sp)+,&0x30 # restore fp2/fp3 | ||
5122 | |||
5123 | fmov.l %d0,%fpcr # restore users round mode,prec | ||
5124 | fadd.x X(%a6),%fp0 # last inst - possible exception set | ||
5125 | bra t_inx2 | ||
5126 | |||
5127 | #--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J. | ||
5128 | #--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY | ||
5129 | #--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE | ||
5130 | #--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS | ||
5131 | #--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))]) | ||
5132 | #--WHERE T=S*S. | ||
5133 | #--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION | ||
5134 | #--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2 | ||
5135 | #--AND IS THEREFORE STORED AS SINGLE PRECISION. | ||
5136 | COSPOLY: | ||
5137 | fmovm.x &0x0c,-(%sp) # save fp2/fp3 | ||
5138 | |||
5139 | fmul.x %fp0,%fp0 # FP0 IS S | ||
5140 | |||
5141 | fmov.d COSB8(%pc),%fp2 | ||
5142 | fmov.d COSB7(%pc),%fp3 | ||
5143 | |||
5144 | fmov.x %fp0,%fp1 | ||
5145 | fmul.x %fp1,%fp1 # FP1 IS T | ||
5146 | |||
5147 | fmov.x %fp0,X(%a6) # X IS S | ||
5148 | ror.l &1,%d1 | ||
5149 | and.l &0x80000000,%d1 | ||
5150 | # ...LEAST SIG. BIT OF D0 IN SIGN POSITION | ||
5151 | |||
5152 | fmul.x %fp1,%fp2 # TB8 | ||
5153 | |||
5154 | eor.l %d1,X(%a6) # X IS NOW S'= SGN*S | ||
5155 | and.l &0x80000000,%d1 | ||
5156 | |||
5157 | fmul.x %fp1,%fp3 # TB7 | ||
5158 | |||
5159 | or.l &0x3F800000,%d1 # D0 IS SGN IN SINGLE | ||
5160 | mov.l %d1,POSNEG1(%a6) | ||
5161 | |||
5162 | fadd.d COSB6(%pc),%fp2 # B6+TB8 | ||
5163 | fadd.d COSB5(%pc),%fp3 # B5+TB7 | ||
5164 | |||
5165 | fmul.x %fp1,%fp2 # T(B6+TB8) | ||
5166 | fmul.x %fp1,%fp3 # T(B5+TB7) | ||
5167 | |||
5168 | fadd.d COSB4(%pc),%fp2 # B4+T(B6+TB8) | ||
5169 | fadd.x COSB3(%pc),%fp3 # B3+T(B5+TB7) | ||
5170 | |||
5171 | fmul.x %fp1,%fp2 # T(B4+T(B6+TB8)) | ||
5172 | fmul.x %fp3,%fp1 # T(B3+T(B5+TB7)) | ||
5173 | |||
5174 | fadd.x COSB2(%pc),%fp2 # B2+T(B4+T(B6+TB8)) | ||
5175 | fadd.s COSB1(%pc),%fp1 # B1+T(B3+T(B5+TB7)) | ||
5176 | |||
5177 | fmul.x %fp2,%fp0 # S(B2+T(B4+T(B6+TB8))) | ||
5178 | |||
5179 | fadd.x %fp1,%fp0 | ||
5180 | |||
5181 | fmul.x X(%a6),%fp0 | ||
5182 | |||
5183 | fmovm.x (%sp)+,&0x30 # restore fp2/fp3 | ||
5184 | |||
5185 | fmov.l %d0,%fpcr # restore users round mode,prec | ||
5186 | fadd.s POSNEG1(%a6),%fp0 # last inst - possible exception set | ||
5187 | bra t_inx2 | ||
5188 | |||
5189 | ############################################## | ||
5190 | |||
5191 | # SINe: Big OR Small? | ||
5192 | #--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION. | ||
5193 | #--IF |X| < 2**(-40), RETURN X OR 1. | ||
5194 | SINBORS: | ||
5195 | cmp.l %d1,&0x3FFF8000 | ||
5196 | bgt.l SREDUCEX | ||
5197 | |||
5198 | SINSM: | ||
5199 | mov.l ADJN(%a6),%d1 | ||
5200 | cmp.l %d1,&0 | ||
5201 | bgt.b COSTINY | ||
5202 | |||
5203 | # here, the operation may underflow iff the precision is sgl or dbl. | ||
5204 | # extended denorms are handled through another entry point. | ||
5205 | SINTINY: | ||
5206 | # mov.w &0x0000,XDCARE(%a6) # JUST IN CASE | ||
5207 | |||
5208 | fmov.l %d0,%fpcr # restore users round mode,prec | ||
5209 | mov.b &FMOV_OP,%d1 # last inst is MOVE | ||
5210 | fmov.x X(%a6),%fp0 # last inst - possible exception set | ||
5211 | bra t_catch | ||
5212 | |||
5213 | COSTINY: | ||
5214 | fmov.s &0x3F800000,%fp0 # fp0 = 1.0 | ||
5215 | fmov.l %d0,%fpcr # restore users round mode,prec | ||
5216 | fadd.s &0x80800000,%fp0 # last inst - possible exception set | ||
5217 | bra t_pinx2 | ||
5218 | |||
5219 | ################################################ | ||
5220 | global ssind | ||
5221 | #--SIN(X) = X FOR DENORMALIZED X | ||
5222 | ssind: | ||
5223 | bra t_extdnrm | ||
5224 | |||
5225 | ############################################ | ||
5226 | global scosd | ||
5227 | #--COS(X) = 1 FOR DENORMALIZED X | ||
5228 | scosd: | ||
5229 | fmov.s &0x3F800000,%fp0 # fp0 = 1.0 | ||
5230 | bra t_pinx2 | ||
5231 | |||
5232 | ################################################## | ||
5233 | |||
5234 | global ssincos | ||
5235 | ssincos: | ||
5236 | #--SET ADJN TO 4 | ||
5237 | mov.l &4,ADJN(%a6) | ||
5238 | |||
5239 | fmov.x (%a0),%fp0 # LOAD INPUT | ||
5240 | fmov.x %fp0,X(%a6) | ||
5241 | |||
5242 | mov.l (%a0),%d1 | ||
5243 | mov.w 4(%a0),%d1 | ||
5244 | and.l &0x7FFFFFFF,%d1 # COMPACTIFY X | ||
5245 | |||
5246 | cmp.l %d1,&0x3FD78000 # |X| >= 2**(-40)? | ||
5247 | bge.b SCOK1 | ||
5248 | bra.w SCSM | ||
5249 | |||
5250 | SCOK1: | ||
5251 | cmp.l %d1,&0x4004BC7E # |X| < 15 PI? | ||
5252 | blt.b SCMAIN | ||
5253 | bra.w SREDUCEX | ||
5254 | |||
5255 | |||
5256 | #--THIS IS THE USUAL CASE, |X| <= 15 PI. | ||
5257 | #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. | ||
5258 | SCMAIN: | ||
5259 | fmov.x %fp0,%fp1 | ||
5260 | |||
5261 | fmul.d TWOBYPI(%pc),%fp1 # X*2/PI | ||
5262 | |||
5263 | lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32 | ||
5264 | |||
5265 | fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER | ||
5266 | |||
5267 | mov.l INT(%a6),%d1 | ||
5268 | asl.l &4,%d1 | ||
5269 | add.l %d1,%a1 # ADDRESS OF N*PIBY2, IN Y1, Y2 | ||
5270 | |||
5271 | fsub.x (%a1)+,%fp0 # X-Y1 | ||
5272 | fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y2 | ||
5273 | |||
5274 | SCCONT: | ||
5275 | #--continuation point from REDUCEX | ||
5276 | |||
5277 | mov.l INT(%a6),%d1 | ||
5278 | ror.l &1,%d1 | ||
5279 | cmp.l %d1,&0 # D0 < 0 IFF N IS ODD | ||
5280 | bge.w NEVEN | ||
5281 | |||
5282 | SNODD: | ||
5283 | #--REGISTERS SAVED SO FAR: D0, A0, FP2. | ||
5284 | fmovm.x &0x04,-(%sp) # save fp2 | ||
5285 | |||
5286 | fmov.x %fp0,RPRIME(%a6) | ||
5287 | fmul.x %fp0,%fp0 # FP0 IS S = R*R | ||
5288 | fmov.d SINA7(%pc),%fp1 # A7 | ||
5289 | fmov.d COSB8(%pc),%fp2 # B8 | ||
5290 | fmul.x %fp0,%fp1 # SA7 | ||
5291 | fmul.x %fp0,%fp2 # SB8 | ||
5292 | |||
5293 | mov.l %d2,-(%sp) | ||
5294 | mov.l %d1,%d2 | ||
5295 | ror.l &1,%d2 | ||
5296 | and.l &0x80000000,%d2 | ||
5297 | eor.l %d1,%d2 | ||
5298 | and.l &0x80000000,%d2 | ||
5299 | |||
5300 | fadd.d SINA6(%pc),%fp1 # A6+SA7 | ||
5301 | fadd.d COSB7(%pc),%fp2 # B7+SB8 | ||
5302 | |||
5303 | fmul.x %fp0,%fp1 # S(A6+SA7) | ||
5304 | eor.l %d2,RPRIME(%a6) | ||
5305 | mov.l (%sp)+,%d2 | ||
5306 | fmul.x %fp0,%fp2 # S(B7+SB8) | ||
5307 | ror.l &1,%d1 | ||
5308 | and.l &0x80000000,%d1 | ||
5309 | mov.l &0x3F800000,POSNEG1(%a6) | ||
5310 | eor.l %d1,POSNEG1(%a6) | ||
5311 | |||
5312 | fadd.d SINA5(%pc),%fp1 # A5+S(A6+SA7) | ||
5313 | fadd.d COSB6(%pc),%fp2 # B6+S(B7+SB8) | ||
5314 | |||
5315 | fmul.x %fp0,%fp1 # S(A5+S(A6+SA7)) | ||
5316 | fmul.x %fp0,%fp2 # S(B6+S(B7+SB8)) | ||
5317 | fmov.x %fp0,SPRIME(%a6) | ||
5318 | |||
5319 | fadd.d SINA4(%pc),%fp1 # A4+S(A5+S(A6+SA7)) | ||
5320 | eor.l %d1,SPRIME(%a6) | ||
5321 | fadd.d COSB5(%pc),%fp2 # B5+S(B6+S(B7+SB8)) | ||
5322 | |||
5323 | fmul.x %fp0,%fp1 # S(A4+...) | ||
5324 | fmul.x %fp0,%fp2 # S(B5+...) | ||
5325 | |||
5326 | fadd.d SINA3(%pc),%fp1 # A3+S(A4+...) | ||
5327 | fadd.d COSB4(%pc),%fp2 # B4+S(B5+...) | ||
5328 | |||
5329 | fmul.x %fp0,%fp1 # S(A3+...) | ||
5330 | fmul.x %fp0,%fp2 # S(B4+...) | ||
5331 | |||
5332 | fadd.x SINA2(%pc),%fp1 # A2+S(A3+...) | ||
5333 | fadd.x COSB3(%pc),%fp2 # B3+S(B4+...) | ||
5334 | |||
5335 | fmul.x %fp0,%fp1 # S(A2+...) | ||
5336 | fmul.x %fp0,%fp2 # S(B3+...) | ||
5337 | |||
5338 | fadd.x SINA1(%pc),%fp1 # A1+S(A2+...) | ||
5339 | fadd.x COSB2(%pc),%fp2 # B2+S(B3+...) | ||
5340 | |||
5341 | fmul.x %fp0,%fp1 # S(A1+...) | ||
5342 | fmul.x %fp2,%fp0 # S(B2+...) | ||
5343 | |||
5344 | fmul.x RPRIME(%a6),%fp1 # R'S(A1+...) | ||
5345 | fadd.s COSB1(%pc),%fp0 # B1+S(B2...) | ||
5346 | fmul.x SPRIME(%a6),%fp0 # S'(B1+S(B2+...)) | ||
5347 | |||
5348 | fmovm.x (%sp)+,&0x20 # restore fp2 | ||
5349 | |||
5350 | fmov.l %d0,%fpcr | ||
5351 | fadd.x RPRIME(%a6),%fp1 # COS(X) | ||
5352 | bsr sto_cos # store cosine result | ||
5353 | fadd.s POSNEG1(%a6),%fp0 # SIN(X) | ||
5354 | bra t_inx2 | ||
5355 | |||
5356 | NEVEN: | ||
5357 | #--REGISTERS SAVED SO FAR: FP2. | ||
5358 | fmovm.x &0x04,-(%sp) # save fp2 | ||
5359 | |||
5360 | fmov.x %fp0,RPRIME(%a6) | ||
5361 | fmul.x %fp0,%fp0 # FP0 IS S = R*R | ||
5362 | |||
5363 | fmov.d COSB8(%pc),%fp1 # B8 | ||
5364 | fmov.d SINA7(%pc),%fp2 # A7 | ||
5365 | |||
5366 | fmul.x %fp0,%fp1 # SB8 | ||
5367 | fmov.x %fp0,SPRIME(%a6) | ||
5368 | fmul.x %fp0,%fp2 # SA7 | ||
5369 | |||
5370 | ror.l &1,%d1 | ||
5371 | and.l &0x80000000,%d1 | ||
5372 | |||
5373 | fadd.d COSB7(%pc),%fp1 # B7+SB8 | ||
5374 | fadd.d SINA6(%pc),%fp2 # A6+SA7 | ||
5375 | |||
5376 | eor.l %d1,RPRIME(%a6) | ||
5377 | eor.l %d1,SPRIME(%a6) | ||
5378 | |||
5379 | fmul.x %fp0,%fp1 # S(B7+SB8) | ||
5380 | |||
5381 | or.l &0x3F800000,%d1 | ||
5382 | mov.l %d1,POSNEG1(%a6) | ||
5383 | |||
5384 | fmul.x %fp0,%fp2 # S(A6+SA7) | ||
5385 | |||
5386 | fadd.d COSB6(%pc),%fp1 # B6+S(B7+SB8) | ||
5387 | fadd.d SINA5(%pc),%fp2 # A5+S(A6+SA7) | ||
5388 | |||
5389 | fmul.x %fp0,%fp1 # S(B6+S(B7+SB8)) | ||
5390 | fmul.x %fp0,%fp2 # S(A5+S(A6+SA7)) | ||
5391 | |||
5392 | fadd.d COSB5(%pc),%fp1 # B5+S(B6+S(B7+SB8)) | ||
5393 | fadd.d SINA4(%pc),%fp2 # A4+S(A5+S(A6+SA7)) | ||
5394 | |||
5395 | fmul.x %fp0,%fp1 # S(B5+...) | ||
5396 | fmul.x %fp0,%fp2 # S(A4+...) | ||
5397 | |||
5398 | fadd.d COSB4(%pc),%fp1 # B4+S(B5+...) | ||
5399 | fadd.d SINA3(%pc),%fp2 # A3+S(A4+...) | ||
5400 | |||
5401 | fmul.x %fp0,%fp1 # S(B4+...) | ||
5402 | fmul.x %fp0,%fp2 # S(A3+...) | ||
5403 | |||
5404 | fadd.x COSB3(%pc),%fp1 # B3+S(B4+...) | ||
5405 | fadd.x SINA2(%pc),%fp2 # A2+S(A3+...) | ||
5406 | |||
5407 | fmul.x %fp0,%fp1 # S(B3+...) | ||
5408 | fmul.x %fp0,%fp2 # S(A2+...) | ||
5409 | |||
5410 | fadd.x COSB2(%pc),%fp1 # B2+S(B3+...) | ||
5411 | fadd.x SINA1(%pc),%fp2 # A1+S(A2+...) | ||
5412 | |||
5413 | fmul.x %fp0,%fp1 # S(B2+...) | ||
5414 | fmul.x %fp2,%fp0 # s(a1+...) | ||
5415 | |||
5416 | |||
5417 | fadd.s COSB1(%pc),%fp1 # B1+S(B2...) | ||
5418 | fmul.x RPRIME(%a6),%fp0 # R'S(A1+...) | ||
5419 | fmul.x SPRIME(%a6),%fp1 # S'(B1+S(B2+...)) | ||
5420 | |||
5421 | fmovm.x (%sp)+,&0x20 # restore fp2 | ||
5422 | |||
5423 | fmov.l %d0,%fpcr | ||
5424 | fadd.s POSNEG1(%a6),%fp1 # COS(X) | ||
5425 | bsr sto_cos # store cosine result | ||
5426 | fadd.x RPRIME(%a6),%fp0 # SIN(X) | ||
5427 | bra t_inx2 | ||
5428 | |||
5429 | ################################################ | ||
5430 | |||
5431 | SCBORS: | ||
5432 | cmp.l %d1,&0x3FFF8000 | ||
5433 | bgt.w SREDUCEX | ||
5434 | |||
5435 | ################################################ | ||
5436 | |||
5437 | SCSM: | ||
5438 | # mov.w &0x0000,XDCARE(%a6) | ||
5439 | fmov.s &0x3F800000,%fp1 | ||
5440 | |||
5441 | fmov.l %d0,%fpcr | ||
5442 | fsub.s &0x00800000,%fp1 | ||
5443 | bsr sto_cos # store cosine result | ||
5444 | fmov.l %fpcr,%d0 # d0 must have fpcr,too | ||
5445 | mov.b &FMOV_OP,%d1 # last inst is MOVE | ||
5446 | fmov.x X(%a6),%fp0 | ||
5447 | bra t_catch | ||
5448 | |||
5449 | ############################################## | ||
5450 | |||
5451 | global ssincosd | ||
5452 | #--SIN AND COS OF X FOR DENORMALIZED X | ||
5453 | ssincosd: | ||
5454 | mov.l %d0,-(%sp) # save d0 | ||
5455 | fmov.s &0x3F800000,%fp1 | ||
5456 | bsr sto_cos # store cosine result | ||
5457 | mov.l (%sp)+,%d0 # restore d0 | ||
5458 | bra t_extdnrm | ||
5459 | |||
5460 | ############################################ | ||
5461 | |||
5462 | #--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW. | ||
5463 | #--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING | ||
5464 | #--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE. | ||
5465 | SREDUCEX: | ||
5466 | fmovm.x &0x3c,-(%sp) # save {fp2-fp5} | ||
5467 | mov.l %d2,-(%sp) # save d2 | ||
5468 | fmov.s &0x00000000,%fp1 # fp1 = 0 | ||
5469 | |||
5470 | #--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that | ||
5471 | #--there is a danger of unwanted overflow in first LOOP iteration. In this | ||
5472 | #--case, reduce argument by one remainder step to make subsequent reduction | ||
5473 | #--safe. | ||
5474 | cmp.l %d1,&0x7ffeffff # is arg dangerously large? | ||
5475 | bne.b SLOOP # no | ||
5476 | |||
5477 | # yes; create 2**16383*PI/2 | ||
5478 | mov.w &0x7ffe,FP_SCR0_EX(%a6) | ||
5479 | mov.l &0xc90fdaa2,FP_SCR0_HI(%a6) | ||
5480 | clr.l FP_SCR0_LO(%a6) | ||
5481 | |||
5482 | # create low half of 2**16383*PI/2 at FP_SCR1 | ||
5483 | mov.w &0x7fdc,FP_SCR1_EX(%a6) | ||
5484 | mov.l &0x85a308d3,FP_SCR1_HI(%a6) | ||
5485 | clr.l FP_SCR1_LO(%a6) | ||
5486 | |||
5487 | ftest.x %fp0 # test sign of argument | ||
5488 | fblt.w sred_neg | ||
5489 | |||
5490 | or.b &0x80,FP_SCR0_EX(%a6) # positive arg | ||
5491 | or.b &0x80,FP_SCR1_EX(%a6) | ||
5492 | sred_neg: | ||
5493 | fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact | ||
5494 | fmov.x %fp0,%fp1 # save high result in fp1 | ||
5495 | fadd.x FP_SCR1(%a6),%fp0 # low part of reduction | ||
5496 | fsub.x %fp0,%fp1 # determine low component of result | ||
5497 | fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument. | ||
5498 | |||
5499 | #--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4. | ||
5500 | #--integer quotient will be stored in N | ||
5501 | #--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1) | ||
5502 | SLOOP: | ||
5503 | fmov.x %fp0,INARG(%a6) # +-2**K * F, 1 <= F < 2 | ||
5504 | mov.w INARG(%a6),%d1 | ||
5505 | mov.l %d1,%a1 # save a copy of D0 | ||
5506 | and.l &0x00007FFF,%d1 | ||
5507 | sub.l &0x00003FFF,%d1 # d0 = K | ||
5508 | cmp.l %d1,&28 | ||
5509 | ble.b SLASTLOOP | ||
5510 | SCONTLOOP: | ||
5511 | sub.l &27,%d1 # d0 = L := K-27 | ||
5512 | mov.b &0,ENDFLAG(%a6) | ||
5513 | bra.b SWORK | ||
5514 | SLASTLOOP: | ||
5515 | clr.l %d1 # d0 = L := 0 | ||
5516 | mov.b &1,ENDFLAG(%a6) | ||
5517 | |||
5518 | SWORK: | ||
5519 | #--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN | ||
5520 | #--THAT INT( X * (2/PI) / 2**(L) ) < 2**29. | ||
5521 | |||
5522 | #--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63), | ||
5523 | #--2**L * (PIby2_1), 2**L * (PIby2_2) | ||
5524 | |||
5525 | mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI | ||
5526 | sub.l %d1,%d2 # BIASED EXP OF 2**(-L)*(2/PI) | ||
5527 | |||
5528 | mov.l &0xA2F9836E,FP_SCR0_HI(%a6) | ||
5529 | mov.l &0x4E44152A,FP_SCR0_LO(%a6) | ||
5530 | mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2**(-L)*(2/PI) | ||
5531 | |||
5532 | fmov.x %fp0,%fp2 | ||
5533 | fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2**(-L)*(2/PI) | ||
5534 | |||
5535 | #--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN | ||
5536 | #--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N | ||
5537 | #--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT | ||
5538 | #--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE | ||
5539 | #--US THE DESIRED VALUE IN FLOATING POINT. | ||
5540 | mov.l %a1,%d2 | ||
5541 | swap %d2 | ||
5542 | and.l &0x80000000,%d2 | ||
5543 | or.l &0x5F000000,%d2 # d2 = SIGN(INARG)*2**63 IN SGL | ||
5544 | mov.l %d2,TWOTO63(%a6) | ||
5545 | fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED | ||
5546 | fsub.s TWOTO63(%a6),%fp2 # fp2 = N | ||
5547 | # fint.x %fp2 | ||
5548 | |||
5549 | #--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2 | ||
5550 | mov.l %d1,%d2 # d2 = L | ||
5551 | |||
5552 | add.l &0x00003FFF,%d2 # BIASED EXP OF 2**L * (PI/2) | ||
5553 | mov.w %d2,FP_SCR0_EX(%a6) | ||
5554 | mov.l &0xC90FDAA2,FP_SCR0_HI(%a6) | ||
5555 | clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2**(L) * Piby2_1 | ||
5556 | |||
5557 | add.l &0x00003FDD,%d1 | ||
5558 | mov.w %d1,FP_SCR1_EX(%a6) | ||
5559 | mov.l &0x85A308D3,FP_SCR1_HI(%a6) | ||
5560 | clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2**(L) * Piby2_2 | ||
5561 | |||
5562 | mov.b ENDFLAG(%a6),%d1 | ||
5563 | |||
5564 | #--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and | ||
5565 | #--P2 = 2**(L) * Piby2_2 | ||
5566 | fmov.x %fp2,%fp4 # fp4 = N | ||
5567 | fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P1 | ||
5568 | fmov.x %fp2,%fp5 # fp5 = N | ||
5569 | fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P2 | ||
5570 | fmov.x %fp4,%fp3 # fp3 = W = N*P1 | ||
5571 | |||
5572 | #--we want P+p = W+w but |p| <= half ulp of P | ||
5573 | #--Then, we need to compute A := R-P and a := r-p | ||
5574 | fadd.x %fp5,%fp3 # fp3 = P | ||
5575 | fsub.x %fp3,%fp4 # fp4 = W-P | ||
5576 | |||
5577 | fsub.x %fp3,%fp0 # fp0 = A := R - P | ||
5578 | fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w | ||
5579 | |||
5580 | fmov.x %fp0,%fp3 # fp3 = A | ||
5581 | fsub.x %fp4,%fp1 # fp1 = a := r - p | ||
5582 | |||
5583 | #--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but | ||
5584 | #--|r| <= half ulp of R. | ||
5585 | fadd.x %fp1,%fp0 # fp0 = R := A+a | ||
5586 | #--No need to calculate r if this is the last loop | ||
5587 | cmp.b %d1,&0 | ||
5588 | bgt.w SRESTORE | ||
5589 | |||
5590 | #--Need to calculate r | ||
5591 | fsub.x %fp0,%fp3 # fp3 = A-R | ||
5592 | fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a | ||
5593 | bra.w SLOOP | ||
5594 | |||
5595 | SRESTORE: | ||
5596 | fmov.l %fp2,INT(%a6) | ||
5597 | mov.l (%sp)+,%d2 # restore d2 | ||
5598 | fmovm.x (%sp)+,&0x3c # restore {fp2-fp5} | ||
5599 | |||
5600 | mov.l ADJN(%a6),%d1 | ||
5601 | cmp.l %d1,&4 | ||
5602 | |||
5603 | blt.w SINCONT | ||
5604 | bra.w SCCONT | ||
5605 | |||
5606 | ######################################################################### | ||
5607 | # stan(): computes the tangent of a normalized input # | ||
5608 | # stand(): computes the tangent of a denormalized input # | ||
5609 | # # | ||
5610 | # INPUT *************************************************************** # | ||
5611 | # a0 = pointer to extended precision input # | ||
5612 | # d0 = round precision,mode # | ||
5613 | # # | ||
5614 | # OUTPUT ************************************************************** # | ||
5615 | # fp0 = tan(X) # | ||
5616 | # # | ||
5617 | # ACCURACY and MONOTONICITY ******************************************* # | ||
5618 | # The returned result is within 3 ulp in 64 significant bit, i.e. # | ||
5619 | # within 0.5001 ulp to 53 bits if the result is subsequently # | ||
5620 | # rounded to double precision. The result is provably monotonic # | ||
5621 | # in double precision. # | ||
5622 | # # | ||
5623 | # ALGORITHM *********************************************************** # | ||
5624 | # # | ||
5625 | # 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. # | ||
5626 | # # | ||
5627 | # 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let # | ||
5628 | # k = N mod 2, so in particular, k = 0 or 1. # | ||
5629 | # # | ||
5630 | # 3. If k is odd, go to 5. # | ||
5631 | # # | ||
5632 | # 4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a # | ||
5633 | # rational function U/V where # | ||
5634 | # U = r + r*s*(P1 + s*(P2 + s*P3)), and # | ||
5635 | # V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r. # | ||
5636 | # Exit. # | ||
5637 | # # | ||
5638 | # 4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by # | ||
5639 | # a rational function U/V where # | ||
5640 | # U = r + r*s*(P1 + s*(P2 + s*P3)), and # | ||
5641 | # V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r, # | ||
5642 | # -Cot(r) = -V/U. Exit. # | ||
5643 | # # | ||
5644 | # 6. If |X| > 1, go to 8. # | ||
5645 | # # | ||
5646 | # 7. (|X|<2**(-40)) Tan(X) = X. Exit. # | ||
5647 | # # | ||
5648 | # 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back # | ||
5649 | # to 2. # | ||
5650 | # # | ||
5651 | ######################################################################### | ||
5652 | |||
5653 | TANQ4: | ||
5654 | long 0x3EA0B759,0xF50F8688 | ||
5655 | TANP3: | ||
5656 | long 0xBEF2BAA5,0xA8924F04 | ||
5657 | |||
5658 | TANQ3: | ||
5659 | long 0xBF346F59,0xB39BA65F,0x00000000,0x00000000 | ||
5660 | |||
5661 | TANP2: | ||
5662 | long 0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000 | ||
5663 | |||
5664 | TANQ2: | ||
5665 | long 0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000 | ||
5666 | |||
5667 | TANP1: | ||
5668 | long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000 | ||
5669 | |||
5670 | TANQ1: | ||
5671 | long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000 | ||
5672 | |||
5673 | INVTWOPI: | ||
5674 | long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000 | ||
5675 | |||
5676 | TWOPI1: | ||
5677 | long 0x40010000,0xC90FDAA2,0x00000000,0x00000000 | ||
5678 | TWOPI2: | ||
5679 | long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000 | ||
5680 | |||
5681 | #--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING | ||
5682 | #--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT | ||
5683 | #--MOST 69 BITS LONG. | ||
5684 | # global PITBL | ||
5685 | PITBL: | ||
5686 | long 0xC0040000,0xC90FDAA2,0x2168C235,0x21800000 | ||
5687 | long 0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000 | ||
5688 | long 0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000 | ||
5689 | long 0xC0040000,0xB6365E22,0xEE46F000,0x21480000 | ||
5690 | long 0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000 | ||
5691 | long 0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000 | ||
5692 | long 0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000 | ||
5693 | long 0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000 | ||
5694 | long 0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000 | ||
5695 | long 0xC0040000,0x90836524,0x88034B96,0x20B00000 | ||
5696 | long 0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000 | ||
5697 | long 0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000 | ||
5698 | long 0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000 | ||
5699 | long 0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000 | ||
5700 | long 0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000 | ||
5701 | long 0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000 | ||
5702 | long 0xC0030000,0xC90FDAA2,0x2168C235,0x21000000 | ||
5703 | long 0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000 | ||
5704 | long 0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000 | ||
5705 | long 0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000 | ||
5706 | long 0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000 | ||
5707 | long 0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000 | ||
5708 | long 0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000 | ||
5709 | long 0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000 | ||
5710 | long 0xC0020000,0xC90FDAA2,0x2168C235,0x20800000 | ||
5711 | long 0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000 | ||
5712 | long 0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000 | ||
5713 | long 0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000 | ||
5714 | long 0xC0010000,0xC90FDAA2,0x2168C235,0x20000000 | ||
5715 | long 0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000 | ||
5716 | long 0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000 | ||
5717 | long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000 | ||
5718 | long 0x00000000,0x00000000,0x00000000,0x00000000 | ||
5719 | long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000 | ||
5720 | long 0x40000000,0xC90FDAA2,0x2168C235,0x9F800000 | ||
5721 | long 0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000 | ||
5722 | long 0x40010000,0xC90FDAA2,0x2168C235,0xA0000000 | ||
5723 | long 0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000 | ||
5724 | long 0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000 | ||
5725 | long 0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000 | ||
5726 | long 0x40020000,0xC90FDAA2,0x2168C235,0xA0800000 | ||
5727 | long 0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000 | ||
5728 | long 0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000 | ||
5729 | long 0x40030000,0x8A3AE64F,0x76F80584,0x21080000 | ||
5730 | long 0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000 | ||
5731 | long 0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000 | ||
5732 | long 0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000 | ||
5733 | long 0x40030000,0xBC7EDCF7,0xFF523611,0x21680000 | ||
5734 | long 0x40030000,0xC90FDAA2,0x2168C235,0xA1000000 | ||
5735 | long 0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000 | ||
5736 | long 0x40030000,0xE231D5F6,0x6595DA7B,0x21300000 | ||
5737 | long 0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000 | ||
5738 | long 0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000 | ||
5739 | long 0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000 | ||
5740 | long 0x40040000,0x8A3AE64F,0x76F80584,0x21880000 | ||
5741 | long 0x40040000,0x90836524,0x88034B96,0xA0B00000 | ||
5742 | long 0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000 | ||
5743 | long 0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000 | ||
5744 | long 0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000 | ||
5745 | long 0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000 | ||
5746 | long 0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000 | ||
5747 | long 0x40040000,0xB6365E22,0xEE46F000,0xA1480000 | ||
5748 | long 0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000 | ||
5749 | long 0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000 | ||
5750 | long 0x40040000,0xC90FDAA2,0x2168C235,0xA1800000 | ||
5751 | |||
5752 | set INARG,FP_SCR0 | ||
5753 | |||
5754 | set TWOTO63,L_SCR1 | ||
5755 | set INT,L_SCR1 | ||
5756 | set ENDFLAG,L_SCR2 | ||
5757 | |||
5758 | global stan | ||
5759 | stan: | ||
5760 | fmov.x (%a0),%fp0 # LOAD INPUT | ||
5761 | |||
5762 | mov.l (%a0),%d1 | ||
5763 | mov.w 4(%a0),%d1 | ||
5764 | and.l &0x7FFFFFFF,%d1 | ||
5765 | |||
5766 | cmp.l %d1,&0x3FD78000 # |X| >= 2**(-40)? | ||
5767 | bge.b TANOK1 | ||
5768 | bra.w TANSM | ||
5769 | TANOK1: | ||
5770 | cmp.l %d1,&0x4004BC7E # |X| < 15 PI? | ||
5771 | blt.b TANMAIN | ||
5772 | bra.w REDUCEX | ||
5773 | |||
5774 | TANMAIN: | ||
5775 | #--THIS IS THE USUAL CASE, |X| <= 15 PI. | ||
5776 | #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. | ||
5777 | fmov.x %fp0,%fp1 | ||
5778 | fmul.d TWOBYPI(%pc),%fp1 # X*2/PI | ||
5779 | |||
5780 | lea.l PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32 | ||
5781 | |||
5782 | fmov.l %fp1,%d1 # CONVERT TO INTEGER | ||
5783 | |||
5784 | asl.l &4,%d1 | ||
5785 | add.l %d1,%a1 # ADDRESS N*PIBY2 IN Y1, Y2 | ||
5786 | |||
5787 | fsub.x (%a1)+,%fp0 # X-Y1 | ||
5788 | |||
5789 | fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y2 | ||
5790 | |||
5791 | ror.l &5,%d1 | ||
5792 | and.l &0x80000000,%d1 # D0 WAS ODD IFF D0 < 0 | ||
5793 | |||
5794 | TANCONT: | ||
5795 | fmovm.x &0x0c,-(%sp) # save fp2,fp3 | ||
5796 | |||
5797 | cmp.l %d1,&0 | ||
5798 | blt.w NODD | ||
5799 | |||
5800 | fmov.x %fp0,%fp1 | ||
5801 | fmul.x %fp1,%fp1 # S = R*R | ||
5802 | |||
5803 | fmov.d TANQ4(%pc),%fp3 | ||
5804 | fmov.d TANP3(%pc),%fp2 | ||
5805 | |||
5806 | fmul.x %fp1,%fp3 # SQ4 | ||
5807 | fmul.x %fp1,%fp2 # SP3 | ||
5808 | |||
5809 | fadd.d TANQ3(%pc),%fp3 # Q3+SQ4 | ||
5810 | fadd.x TANP2(%pc),%fp2 # P2+SP3 | ||
5811 | |||
5812 | fmul.x %fp1,%fp3 # S(Q3+SQ4) | ||
5813 | fmul.x %fp1,%fp2 # S(P2+SP3) | ||
5814 | |||
5815 | fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4) | ||
5816 | fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3) | ||
5817 | |||
5818 | fmul.x %fp1,%fp3 # S(Q2+S(Q3+SQ4)) | ||
5819 | fmul.x %fp1,%fp2 # S(P1+S(P2+SP3)) | ||
5820 | |||
5821 | fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4)) | ||
5822 | fmul.x %fp0,%fp2 # RS(P1+S(P2+SP3)) | ||
5823 | |||
5824 | fmul.x %fp3,%fp1 # S(Q1+S(Q2+S(Q3+SQ4))) | ||
5825 | |||
5826 | fadd.x %fp2,%fp0 # R+RS(P1+S(P2+SP3)) | ||
5827 | |||
5828 | fadd.s &0x3F800000,%fp1 # 1+S(Q1+...) | ||
5829 | |||
5830 | fmovm.x (%sp)+,&0x30 # restore fp2,fp3 | ||
5831 | |||
5832 | fmov.l %d0,%fpcr # restore users round mode,prec | ||
5833 | fdiv.x %fp1,%fp0 # last inst - possible exception set | ||
5834 | bra t_inx2 | ||
5835 | |||
5836 | NODD: | ||
5837 | fmov.x %fp0,%fp1 | ||
5838 | fmul.x %fp0,%fp0 # S = R*R | ||
5839 | |||
5840 | fmov.d TANQ4(%pc),%fp3 | ||
5841 | fmov.d TANP3(%pc),%fp2 | ||
5842 | |||
5843 | fmul.x %fp0,%fp3 # SQ4 | ||
5844 | fmul.x %fp0,%fp2 # SP3 | ||
5845 | |||
5846 | fadd.d TANQ3(%pc),%fp3 # Q3+SQ4 | ||
5847 | fadd.x TANP2(%pc),%fp2 # P2+SP3 | ||
5848 | |||
5849 | fmul.x %fp0,%fp3 # S(Q3+SQ4) | ||
5850 | fmul.x %fp0,%fp2 # S(P2+SP3) | ||
5851 | |||
5852 | fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4) | ||
5853 | fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3) | ||
5854 | |||
5855 | fmul.x %fp0,%fp3 # S(Q2+S(Q3+SQ4)) | ||
5856 | fmul.x %fp0,%fp2 # S(P1+S(P2+SP3)) | ||
5857 | |||
5858 | fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4)) | ||
5859 | fmul.x %fp1,%fp2 # RS(P1+S(P2+SP3)) | ||
5860 | |||
5861 | fmul.x %fp3,%fp0 # S(Q1+S(Q2+S(Q3+SQ4))) | ||
5862 | |||
5863 | fadd.x %fp2,%fp1 # R+RS(P1+S(P2+SP3)) | ||
5864 | fadd.s &0x3F800000,%fp0 # 1+S(Q1+...) | ||
5865 | |||
5866 | fmovm.x (%sp)+,&0x30 # restore fp2,fp3 | ||
5867 | |||
5868 | fmov.x %fp1,-(%sp) | ||
5869 | eor.l &0x80000000,(%sp) | ||
5870 | |||
5871 | fmov.l %d0,%fpcr # restore users round mode,prec | ||
5872 | fdiv.x (%sp)+,%fp0 # last inst - possible exception set | ||
5873 | bra t_inx2 | ||
5874 | |||
5875 | TANBORS: | ||
5876 | #--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION. | ||
5877 | #--IF |X| < 2**(-40), RETURN X OR 1. | ||
5878 | cmp.l %d1,&0x3FFF8000 | ||
5879 | bgt.b REDUCEX | ||
5880 | |||
5881 | TANSM: | ||
5882 | fmov.x %fp0,-(%sp) | ||
5883 | fmov.l %d0,%fpcr # restore users round mode,prec | ||
5884 | mov.b &FMOV_OP,%d1 # last inst is MOVE | ||
5885 | fmov.x (%sp)+,%fp0 # last inst - posibble exception set | ||
5886 | bra t_catch | ||
5887 | |||
5888 | global stand | ||
5889 | #--TAN(X) = X FOR DENORMALIZED X | ||
5890 | stand: | ||
5891 | bra t_extdnrm | ||
5892 | |||
5893 | #--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW. | ||
5894 | #--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING | ||
5895 | #--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE. | ||
5896 | REDUCEX: | ||
5897 | fmovm.x &0x3c,-(%sp) # save {fp2-fp5} | ||
5898 | mov.l %d2,-(%sp) # save d2 | ||
5899 | fmov.s &0x00000000,%fp1 # fp1 = 0 | ||
5900 | |||
5901 | #--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that | ||
5902 | #--there is a danger of unwanted overflow in first LOOP iteration. In this | ||
5903 | #--case, reduce argument by one remainder step to make subsequent reduction | ||
5904 | #--safe. | ||
5905 | cmp.l %d1,&0x7ffeffff # is arg dangerously large? | ||
5906 | bne.b LOOP # no | ||
5907 | |||
5908 | # yes; create 2**16383*PI/2 | ||
5909 | mov.w &0x7ffe,FP_SCR0_EX(%a6) | ||
5910 | mov.l &0xc90fdaa2,FP_SCR0_HI(%a6) | ||
5911 | clr.l FP_SCR0_LO(%a6) | ||
5912 | |||
5913 | # create low half of 2**16383*PI/2 at FP_SCR1 | ||
5914 | mov.w &0x7fdc,FP_SCR1_EX(%a6) | ||
5915 | mov.l &0x85a308d3,FP_SCR1_HI(%a6) | ||
5916 | clr.l FP_SCR1_LO(%a6) | ||
5917 | |||
5918 | ftest.x %fp0 # test sign of argument | ||
5919 | fblt.w red_neg | ||
5920 | |||
5921 | or.b &0x80,FP_SCR0_EX(%a6) # positive arg | ||
5922 | or.b &0x80,FP_SCR1_EX(%a6) | ||
5923 | red_neg: | ||
5924 | fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact | ||
5925 | fmov.x %fp0,%fp1 # save high result in fp1 | ||
5926 | fadd.x FP_SCR1(%a6),%fp0 # low part of reduction | ||
5927 | fsub.x %fp0,%fp1 # determine low component of result | ||
5928 | fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument. | ||
5929 | |||
5930 | #--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4. | ||
5931 | #--integer quotient will be stored in N | ||
5932 | #--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1) | ||
5933 | LOOP: | ||
5934 | fmov.x %fp0,INARG(%a6) # +-2**K * F, 1 <= F < 2 | ||
5935 | mov.w INARG(%a6),%d1 | ||
5936 | mov.l %d1,%a1 # save a copy of D0 | ||
5937 | and.l &0x00007FFF,%d1 | ||
5938 | sub.l &0x00003FFF,%d1 # d0 = K | ||
5939 | cmp.l %d1,&28 | ||
5940 | ble.b LASTLOOP | ||
5941 | CONTLOOP: | ||
5942 | sub.l &27,%d1 # d0 = L := K-27 | ||
5943 | mov.b &0,ENDFLAG(%a6) | ||
5944 | bra.b WORK | ||
5945 | LASTLOOP: | ||
5946 | clr.l %d1 # d0 = L := 0 | ||
5947 | mov.b &1,ENDFLAG(%a6) | ||
5948 | |||
5949 | WORK: | ||
5950 | #--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN | ||
5951 | #--THAT INT( X * (2/PI) / 2**(L) ) < 2**29. | ||
5952 | |||
5953 | #--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63), | ||
5954 | #--2**L * (PIby2_1), 2**L * (PIby2_2) | ||
5955 | |||
5956 | mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI | ||
5957 | sub.l %d1,%d2 # BIASED EXP OF 2**(-L)*(2/PI) | ||
5958 | |||
5959 | mov.l &0xA2F9836E,FP_SCR0_HI(%a6) | ||
5960 | mov.l &0x4E44152A,FP_SCR0_LO(%a6) | ||
5961 | mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2**(-L)*(2/PI) | ||
5962 | |||
5963 | fmov.x %fp0,%fp2 | ||
5964 | fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2**(-L)*(2/PI) | ||
5965 | |||
5966 | #--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN | ||
5967 | #--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N | ||
5968 | #--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT | ||
5969 | #--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE | ||
5970 | #--US THE DESIRED VALUE IN FLOATING POINT. | ||
5971 | mov.l %a1,%d2 | ||
5972 | swap %d2 | ||
5973 | and.l &0x80000000,%d2 | ||
5974 | or.l &0x5F000000,%d2 # d2 = SIGN(INARG)*2**63 IN SGL | ||
5975 | mov.l %d2,TWOTO63(%a6) | ||
5976 | fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED | ||
5977 | fsub.s TWOTO63(%a6),%fp2 # fp2 = N | ||
5978 | # fintrz.x %fp2,%fp2 | ||
5979 | |||
5980 | #--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2 | ||
5981 | mov.l %d1,%d2 # d2 = L | ||
5982 | |||
5983 | add.l &0x00003FFF,%d2 # BIASED EXP OF 2**L * (PI/2) | ||
5984 | mov.w %d2,FP_SCR0_EX(%a6) | ||
5985 | mov.l &0xC90FDAA2,FP_SCR0_HI(%a6) | ||
5986 | clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2**(L) * Piby2_1 | ||
5987 | |||
5988 | add.l &0x00003FDD,%d1 | ||
5989 | mov.w %d1,FP_SCR1_EX(%a6) | ||
5990 | mov.l &0x85A308D3,FP_SCR1_HI(%a6) | ||
5991 | clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2**(L) * Piby2_2 | ||
5992 | |||
5993 | mov.b ENDFLAG(%a6),%d1 | ||
5994 | |||
5995 | #--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and | ||
5996 | #--P2 = 2**(L) * Piby2_2 | ||
5997 | fmov.x %fp2,%fp4 # fp4 = N | ||
5998 | fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P1 | ||
5999 | fmov.x %fp2,%fp5 # fp5 = N | ||
6000 | fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P2 | ||
6001 | fmov.x %fp4,%fp3 # fp3 = W = N*P1 | ||
6002 | |||
6003 | #--we want P+p = W+w but |p| <= half ulp of P | ||
6004 | #--Then, we need to compute A := R-P and a := r-p | ||
6005 | fadd.x %fp5,%fp3 # fp3 = P | ||
6006 | fsub.x %fp3,%fp4 # fp4 = W-P | ||
6007 | |||
6008 | fsub.x %fp3,%fp0 # fp0 = A := R - P | ||
6009 | fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w | ||
6010 | |||
6011 | fmov.x %fp0,%fp3 # fp3 = A | ||
6012 | fsub.x %fp4,%fp1 # fp1 = a := r - p | ||
6013 | |||
6014 | #--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but | ||
6015 | #--|r| <= half ulp of R. | ||
6016 | fadd.x %fp1,%fp0 # fp0 = R := A+a | ||
6017 | #--No need to calculate r if this is the last loop | ||
6018 | cmp.b %d1,&0 | ||
6019 | bgt.w RESTORE | ||
6020 | |||
6021 | #--Need to calculate r | ||
6022 | fsub.x %fp0,%fp3 # fp3 = A-R | ||
6023 | fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a | ||
6024 | bra.w LOOP | ||
6025 | |||
6026 | RESTORE: | ||
6027 | fmov.l %fp2,INT(%a6) | ||
6028 | mov.l (%sp)+,%d2 # restore d2 | ||
6029 | fmovm.x (%sp)+,&0x3c # restore {fp2-fp5} | ||
6030 | |||
6031 | mov.l INT(%a6),%d1 | ||
6032 | ror.l &1,%d1 | ||
6033 | |||
6034 | bra.w TANCONT | ||
6035 | |||
6036 | ######################################################################### | ||
6037 | # satan(): computes the arctangent of a normalized number # | ||
6038 | # satand(): computes the arctangent of a denormalized number # | ||
6039 | # # | ||
6040 | # INPUT *************************************************************** # | ||
6041 | # a0 = pointer to extended precision input # | ||
6042 | # d0 = round precision,mode # | ||
6043 | # # | ||
6044 | # OUTPUT ************************************************************** # | ||
6045 | # fp0 = arctan(X) # | ||
6046 | # # | ||
6047 | # ACCURACY and MONOTONICITY ******************************************* # | ||
6048 | # The returned result is within 2 ulps in 64 significant bit, # | ||
6049 | # i.e. within 0.5001 ulp to 53 bits if the result is subsequently # | ||
6050 | # rounded to double precision. The result is provably monotonic # | ||
6051 | # in double precision. # | ||
6052 | # # | ||
6053 | # ALGORITHM *********************************************************** # | ||
6054 | # Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5. # | ||
6055 | # # | ||
6056 | # Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. # | ||
6057 | # Note that k = -4, -3,..., or 3. # | ||
6058 | # Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 # | ||
6059 | # significant bits of X with a bit-1 attached at the 6-th # | ||
6060 | # bit position. Define u to be u = (X-F) / (1 + X*F). # | ||
6061 | # # | ||
6062 | # Step 3. Approximate arctan(u) by a polynomial poly. # | ||
6063 | # # | ||
6064 | # Step 4. Return arctan(F) + poly, arctan(F) is fetched from a # | ||
6065 | # table of values calculated beforehand. Exit. # | ||
6066 | # # | ||
6067 | # Step 5. If |X| >= 16, go to Step 7. # | ||
6068 | # # | ||
6069 | # Step 6. Approximate arctan(X) by an odd polynomial in X. Exit. # | ||
6070 | # # | ||
6071 | # Step 7. Define X' = -1/X. Approximate arctan(X') by an odd # | ||
6072 | # polynomial in X'. # | ||
6073 | # Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit. # | ||
6074 | # # | ||
6075 | ######################################################################### | ||
6076 | |||
6077 | ATANA3: long 0xBFF6687E,0x314987D8 | ||
6078 | ATANA2: long 0x4002AC69,0x34A26DB3 | ||
6079 | ATANA1: long 0xBFC2476F,0x4E1DA28E | ||
6080 | |||
6081 | ATANB6: long 0x3FB34444,0x7F876989 | ||
6082 | ATANB5: long 0xBFB744EE,0x7FAF45DB | ||
6083 | ATANB4: long 0x3FBC71C6,0x46940220 | ||
6084 | ATANB3: long 0xBFC24924,0x921872F9 | ||
6085 | ATANB2: long 0x3FC99999,0x99998FA9 | ||
6086 | ATANB1: long 0xBFD55555,0x55555555 | ||
6087 | |||
6088 | ATANC5: long 0xBFB70BF3,0x98539E6A | ||
6089 | ATANC4: long 0x3FBC7187,0x962D1D7D | ||
6090 | ATANC3: long 0xBFC24924,0x827107B8 | ||
6091 | ATANC2: long 0x3FC99999,0x9996263E | ||
6092 | ATANC1: long 0xBFD55555,0x55555536 | ||
6093 | |||
6094 | PPIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000 | ||
6095 | NPIBY2: long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000 | ||
6096 | |||
6097 | PTINY: long 0x00010000,0x80000000,0x00000000,0x00000000 | ||
6098 | NTINY: long 0x80010000,0x80000000,0x00000000,0x00000000 | ||
6099 | |||
6100 | ATANTBL: | ||
6101 | long 0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000 | ||
6102 | long 0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000 | ||
6103 | long 0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000 | ||
6104 | long 0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000 | ||
6105 | long 0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000 | ||
6106 | long 0x3FFB0000,0xAB98E943,0x62765619,0x00000000 | ||
6107 | long 0x3FFB0000,0xB389E502,0xF9C59862,0x00000000 | ||
6108 | long 0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000 | ||
6109 | long 0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000 | ||
6110 | long 0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000 | ||
6111 | long 0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000 | ||
6112 | long 0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000 | ||
6113 | long 0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000 | ||
6114 | long 0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000 | ||
6115 | long 0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000 | ||
6116 | long 0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000 | ||
6117 | long 0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000 | ||
6118 | long 0x3FFC0000,0x8B232A08,0x304282D8,0x00000000 | ||
6119 | long 0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000 | ||
6120 | long 0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000 | ||
6121 | long 0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000 | ||
6122 | long 0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000 | ||
6123 | long 0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000 | ||
6124 | long 0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000 | ||
6125 | long 0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000 | ||
6126 | long 0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000 | ||
6127 | long 0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000 | ||
6128 | long 0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000 | ||
6129 | long 0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000 | ||
6130 | long 0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000 | ||
6131 | long 0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000 | ||
6132 | long 0x3FFC0000,0xF7170A28,0xECC06666,0x00000000 | ||
6133 | long 0x3FFD0000,0x812FD288,0x332DAD32,0x00000000 | ||
6134 | long 0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000 | ||
6135 | long 0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000 | ||
6136 | long 0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000 | ||
6137 | long 0x3FFD0000,0x9EB68949,0x3889A227,0x00000000 | ||
6138 | long 0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000 | ||
6139 | long 0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000 | ||
6140 | long 0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000 | ||
6141 | long 0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000 | ||
6142 | long 0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000 | ||
6143 | long 0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000 | ||
6144 | long 0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000 | ||
6145 | long 0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000 | ||
6146 | long 0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000 | ||
6147 | long 0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000 | ||
6148 | long 0x3FFD0000,0xEA2D764F,0x64315989,0x00000000 | ||
6149 | long 0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000 | ||
6150 | long 0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000 | ||
6151 | long 0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000 | ||
6152 | long 0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000 | ||
6153 | long 0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000 | ||
6154 | long 0x3FFE0000,0x97731420,0x365E538C,0x00000000 | ||
6155 | long 0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000 | ||
6156 | long 0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000 | ||
6157 | long 0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000 | ||
6158 | long 0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000 | ||
6159 | long 0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000 | ||
6160 | long 0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000 | ||
6161 | long 0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000 | ||
6162 | long 0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000 | ||
6163 | long 0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000 | ||
6164 | long 0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000 | ||
6165 | long 0x3FFE0000,0xCD000549,0xADEC7159,0x00000000 | ||
6166 | long 0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000 | ||
6167 | long 0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000 | ||
6168 | long 0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000 | ||
6169 | long 0x3FFE0000,0xE8771129,0xC4353259,0x00000000 | ||
6170 | long 0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000 | ||
6171 | long 0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000 | ||
6172 | long 0x3FFE0000,0xF919039D,0x758B8D41,0x00000000 | ||
6173 | long 0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000 | ||
6174 | long 0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000 | ||
6175 | long 0x3FFF0000,0x83889E35,0x49D108E1,0x00000000 | ||
6176 | long 0x3FFF0000,0x859CFA76,0x511D724B,0x00000000 | ||
6177 | long 0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000 | ||
6178 | long 0x3FFF0000,0x89732FD1,0x9557641B,0x00000000 | ||
6179 | long 0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000 | ||
6180 | long 0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000 | ||
6181 | long 0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000 | ||
6182 | long 0x3FFF0000,0x922DA7D7,0x91888487,0x00000000 | ||
6183 | long 0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000 | ||
6184 | long 0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000 | ||
6185 | long 0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000 | ||
6186 | long 0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000 | ||
6187 | long 0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000 | ||
6188 | long 0x3FFF0000,0x9F100575,0x006CC571,0x00000000 | ||
6189 | long 0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000 | ||
6190 | long 0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000 | ||
6191 | long 0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000 | ||
6192 | long 0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000 | ||
6193 | long 0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000 | ||
6194 | long 0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000 | ||
6195 | long 0x3FFF0000,0xA83A5153,0x0956168F,0x00000000 | ||
6196 | long 0x3FFF0000,0xA93A2007,0x7539546E,0x00000000 | ||
6197 | long 0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000 | ||
6198 | long 0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000 | ||
6199 | long 0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000 | ||
6200 | long 0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000 | ||
6201 | long 0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000 | ||
6202 | long 0x3FFF0000,0xB1846515,0x0F71496A,0x00000000 | ||
6203 | long 0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000 | ||
6204 | long 0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000 | ||
6205 | long 0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000 | ||
6206 | long 0x3FFF0000,0xB525529D,0x562246BD,0x00000000 | ||
6207 | long 0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000 | ||
6208 | long 0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000 | ||
6209 | long 0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000 | ||
6210 | long 0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000 | ||
6211 | long 0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000 | ||
6212 | long 0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000 | ||
6213 | long 0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000 | ||
6214 | long 0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000 | ||
6215 | long 0x3FFF0000,0xBB471285,0x7637E17D,0x00000000 | ||
6216 | long 0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000 | ||
6217 | long 0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000 | ||
6218 | long 0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000 | ||
6219 | long 0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000 | ||
6220 | long 0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000 | ||
6221 | long 0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000 | ||
6222 | long 0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000 | ||
6223 | long 0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000 | ||
6224 | long 0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000 | ||
6225 | long 0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000 | ||
6226 | long 0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000 | ||
6227 | long 0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000 | ||
6228 | long 0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000 | ||
6229 | |||
6230 | set X,FP_SCR0 | ||
6231 | set XDCARE,X+2 | ||
6232 | set XFRAC,X+4 | ||
6233 | set XFRACLO,X+8 | ||
6234 | |||
6235 | set ATANF,FP_SCR1 | ||
6236 | set ATANFHI,ATANF+4 | ||
6237 | set ATANFLO,ATANF+8 | ||
6238 | |||
6239 | global satan | ||
6240 | #--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S | ||
6241 | satan: | ||
6242 | fmov.x (%a0),%fp0 # LOAD INPUT | ||
6243 | |||
6244 | mov.l (%a0),%d1 | ||
6245 | mov.w 4(%a0),%d1 | ||
6246 | fmov.x %fp0,X(%a6) | ||
6247 | and.l &0x7FFFFFFF,%d1 | ||
6248 | |||
6249 | cmp.l %d1,&0x3FFB8000 # |X| >= 1/16? | ||
6250 | bge.b ATANOK1 | ||
6251 | bra.w ATANSM | ||
6252 | |||
6253 | ATANOK1: | ||
6254 | cmp.l %d1,&0x4002FFFF # |X| < 16 ? | ||
6255 | ble.b ATANMAIN | ||
6256 | bra.w ATANBIG | ||
6257 | |||
6258 | #--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE | ||
6259 | #--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ). | ||
6260 | #--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN | ||
6261 | #--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE | ||
6262 | #--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS | ||
6263 | #--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR | ||
6264 | #--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO | ||
6265 | #--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE | ||
6266 | #--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL | ||
6267 | #--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE | ||
6268 | #--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION | ||
6269 | #--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION | ||
6270 | #--WILL INVOLVE A VERY LONG POLYNOMIAL. | ||
6271 | |||
6272 | #--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS | ||
6273 | #--WE CHOSE F TO BE +-2^K * 1.BBBB1 | ||
6274 | #--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE | ||
6275 | #--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE | ||
6276 | #--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS | ||
6277 | #-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|). | ||
6278 | |||
6279 | ATANMAIN: | ||
6280 | |||
6281 | and.l &0xF8000000,XFRAC(%a6) # FIRST 5 BITS | ||
6282 | or.l &0x04000000,XFRAC(%a6) # SET 6-TH BIT TO 1 | ||
6283 | mov.l &0x00000000,XFRACLO(%a6) # LOCATION OF X IS NOW F | ||
6284 | |||
6285 | fmov.x %fp0,%fp1 # FP1 IS X | ||
6286 | fmul.x X(%a6),%fp1 # FP1 IS X*F, NOTE THAT X*F > 0 | ||
6287 | fsub.x X(%a6),%fp0 # FP0 IS X-F | ||
6288 | fadd.s &0x3F800000,%fp1 # FP1 IS 1 + X*F | ||
6289 | fdiv.x %fp1,%fp0 # FP0 IS U = (X-F)/(1+X*F) | ||
6290 | |||
6291 | #--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|) | ||
6292 | #--CREATE ATAN(F) AND STORE IT IN ATANF, AND | ||
6293 | #--SAVE REGISTERS FP2. | ||
6294 | |||
6295 | mov.l %d2,-(%sp) # SAVE d2 TEMPORARILY | ||
6296 | mov.l %d1,%d2 # THE EXP AND 16 BITS OF X | ||
6297 | and.l &0x00007800,%d1 # 4 VARYING BITS OF F'S FRACTION | ||
6298 | and.l &0x7FFF0000,%d2 # EXPONENT OF F | ||
6299 | sub.l &0x3FFB0000,%d2 # K+4 | ||
6300 | asr.l &1,%d2 | ||
6301 | add.l %d2,%d1 # THE 7 BITS IDENTIFYING F | ||
6302 | asr.l &7,%d1 # INDEX INTO TBL OF ATAN(|F|) | ||
6303 | lea ATANTBL(%pc),%a1 | ||
6304 | add.l %d1,%a1 # ADDRESS OF ATAN(|F|) | ||
6305 | mov.l (%a1)+,ATANF(%a6) | ||
6306 | mov.l (%a1)+,ATANFHI(%a6) | ||
6307 | mov.l (%a1)+,ATANFLO(%a6) # ATANF IS NOW ATAN(|F|) | ||
6308 | mov.l X(%a6),%d1 # LOAD SIGN AND EXPO. AGAIN | ||
6309 | and.l &0x80000000,%d1 # SIGN(F) | ||
6310 | or.l %d1,ATANF(%a6) # ATANF IS NOW SIGN(F)*ATAN(|F|) | ||
6311 | mov.l (%sp)+,%d2 # RESTORE d2 | ||
6312 | |||
6313 | #--THAT'S ALL I HAVE TO DO FOR NOW, | ||
6314 | #--BUT ALAS, THE DIVIDE IS STILL CRANKING! | ||
6315 | |||
6316 | #--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS | ||
6317 | #--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U | ||
6318 | #--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT. | ||
6319 | #--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3)) | ||
6320 | #--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3. | ||
6321 | #--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT | ||
6322 | #--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED | ||
6323 | |||
6324 | fmovm.x &0x04,-(%sp) # save fp2 | ||
6325 | |||
6326 | fmov.x %fp0,%fp1 | ||
6327 | fmul.x %fp1,%fp1 | ||
6328 | fmov.d ATANA3(%pc),%fp2 | ||
6329 | fadd.x %fp1,%fp2 # A3+V | ||
6330 | fmul.x %fp1,%fp2 # V*(A3+V) | ||
6331 | fmul.x %fp0,%fp1 # U*V | ||
6332 | fadd.d ATANA2(%pc),%fp2 # A2+V*(A3+V) | ||
6333 | fmul.d ATANA1(%pc),%fp1 # A1*U*V | ||
6334 | fmul.x %fp2,%fp1 # A1*U*V*(A2+V*(A3+V)) | ||
6335 | fadd.x %fp1,%fp0 # ATAN(U), FP1 RELEASED | ||
6336 | |||
6337 | fmovm.x (%sp)+,&0x20 # restore fp2 | ||
6338 | |||
6339 | fmov.l %d0,%fpcr # restore users rnd mode,prec | ||
6340 | fadd.x ATANF(%a6),%fp0 # ATAN(X) | ||
6341 | bra t_inx2 | ||
6342 | |||
6343 | ATANBORS: | ||
6344 | #--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED. | ||
6345 | #--FP0 IS X AND |X| <= 1/16 OR |X| >= 16. | ||
6346 | cmp.l %d1,&0x3FFF8000 | ||
6347 | bgt.w ATANBIG # I.E. |X| >= 16 | ||
6348 | |||
6349 | ATANSM: | ||
6350 | #--|X| <= 1/16 | ||
6351 | #--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE | ||
6352 | #--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6))))) | ||
6353 | #--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] ) | ||
6354 | #--WHERE Y = X*X, AND Z = Y*Y. | ||
6355 | |||
6356 | cmp.l %d1,&0x3FD78000 | ||
6357 | blt.w ATANTINY | ||
6358 | |||
6359 | #--COMPUTE POLYNOMIAL | ||
6360 | fmovm.x &0x0c,-(%sp) # save fp2/fp3 | ||
6361 | |||
6362 | fmul.x %fp0,%fp0 # FPO IS Y = X*X | ||
6363 | |||
6364 | fmov.x %fp0,%fp1 | ||
6365 | fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y | ||
6366 | |||
6367 | fmov.d ATANB6(%pc),%fp2 | ||
6368 | fmov.d ATANB5(%pc),%fp3 | ||
6369 | |||
6370 | fmul.x %fp1,%fp2 # Z*B6 | ||
6371 | fmul.x %fp1,%fp3 # Z*B5 | ||
6372 | |||
6373 | fadd.d ATANB4(%pc),%fp2 # B4+Z*B6 | ||
6374 | fadd.d ATANB3(%pc),%fp3 # B3+Z*B5 | ||
6375 | |||
6376 | fmul.x %fp1,%fp2 # Z*(B4+Z*B6) | ||
6377 | fmul.x %fp3,%fp1 # Z*(B3+Z*B5) | ||
6378 | |||
6379 | fadd.d ATANB2(%pc),%fp2 # B2+Z*(B4+Z*B6) | ||
6380 | fadd.d ATANB1(%pc),%fp1 # B1+Z*(B3+Z*B5) | ||
6381 | |||
6382 | fmul.x %fp0,%fp2 # Y*(B2+Z*(B4+Z*B6)) | ||
6383 | fmul.x X(%a6),%fp0 # X*Y | ||
6384 | |||
6385 | fadd.x %fp2,%fp1 # [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))] | ||
6386 | |||
6387 | fmul.x %fp1,%fp0 # X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) | ||
6388 | |||
6389 | fmovm.x (%sp)+,&0x30 # restore fp2/fp3 | ||
6390 | |||
6391 | fmov.l %d0,%fpcr # restore users rnd mode,prec | ||
6392 | fadd.x X(%a6),%fp0 | ||
6393 | bra t_inx2 | ||
6394 | |||
6395 | ATANTINY: | ||
6396 | #--|X| < 2^(-40), ATAN(X) = X | ||
6397 | |||
6398 | fmov.l %d0,%fpcr # restore users rnd mode,prec | ||
6399 | mov.b &FMOV_OP,%d1 # last inst is MOVE | ||
6400 | fmov.x X(%a6),%fp0 # last inst - possible exception set | ||
6401 | |||
6402 | bra t_catch | ||
6403 | |||
6404 | ATANBIG: | ||
6405 | #--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE, | ||
6406 | #--RETURN SIGN(X)*PI/2 + ATAN(-1/X). | ||
6407 | cmp.l %d1,&0x40638000 | ||
6408 | bgt.w ATANHUGE | ||
6409 | |||
6410 | #--APPROXIMATE ATAN(-1/X) BY | ||
6411 | #--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X' | ||
6412 | #--THIS CAN BE RE-WRITTEN AS | ||
6413 | #--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y. | ||
6414 | |||
6415 | fmovm.x &0x0c,-(%sp) # save fp2/fp3 | ||
6416 | |||
6417 | fmov.s &0xBF800000,%fp1 # LOAD -1 | ||
6418 | fdiv.x %fp0,%fp1 # FP1 IS -1/X | ||
6419 | |||
6420 | #--DIVIDE IS STILL CRANKING | ||
6421 | |||
6422 | fmov.x %fp1,%fp0 # FP0 IS X' | ||
6423 | fmul.x %fp0,%fp0 # FP0 IS Y = X'*X' | ||
6424 | fmov.x %fp1,X(%a6) # X IS REALLY X' | ||
6425 | |||
6426 | fmov.x %fp0,%fp1 | ||
6427 | fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y | ||
6428 | |||
6429 | fmov.d ATANC5(%pc),%fp3 | ||
6430 | fmov.d ATANC4(%pc),%fp2 | ||
6431 | |||
6432 | fmul.x %fp1,%fp3 # Z*C5 | ||
6433 | fmul.x %fp1,%fp2 # Z*B4 | ||
6434 | |||
6435 | fadd.d ATANC3(%pc),%fp3 # C3+Z*C5 | ||
6436 | fadd.d ATANC2(%pc),%fp2 # C2+Z*C4 | ||
6437 | |||
6438 | fmul.x %fp3,%fp1 # Z*(C3+Z*C5), FP3 RELEASED | ||
6439 | fmul.x %fp0,%fp2 # Y*(C2+Z*C4) | ||
6440 | |||
6441 | fadd.d ATANC1(%pc),%fp1 # C1+Z*(C3+Z*C5) | ||
6442 | fmul.x X(%a6),%fp0 # X'*Y | ||
6443 | |||
6444 | fadd.x %fp2,%fp1 # [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)] | ||
6445 | |||
6446 | fmul.x %fp1,%fp0 # X'*Y*([B1+Z*(B3+Z*B5)] | ||
6447 | # ... +[Y*(B2+Z*(B4+Z*B6))]) | ||
6448 | fadd.x X(%a6),%fp0 | ||
6449 | |||
6450 | fmovm.x (%sp)+,&0x30 # restore fp2/fp3 | ||
6451 | |||
6452 | fmov.l %d0,%fpcr # restore users rnd mode,prec | ||
6453 | tst.b (%a0) | ||
6454 | bpl.b pos_big | ||
6455 | |||
6456 | neg_big: | ||
6457 | fadd.x NPIBY2(%pc),%fp0 | ||
6458 | bra t_minx2 | ||
6459 | |||
6460 | pos_big: | ||
6461 | fadd.x PPIBY2(%pc),%fp0 | ||
6462 | bra t_pinx2 | ||
6463 | |||
6464 | ATANHUGE: | ||
6465 | #--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY | ||
6466 | tst.b (%a0) | ||
6467 | bpl.b pos_huge | ||
6468 | |||
6469 | neg_huge: | ||
6470 | fmov.x NPIBY2(%pc),%fp0 | ||
6471 | fmov.l %d0,%fpcr | ||
6472 | fadd.x PTINY(%pc),%fp0 | ||
6473 | bra t_minx2 | ||
6474 | |||
6475 | pos_huge: | ||
6476 | fmov.x PPIBY2(%pc),%fp0 | ||
6477 | fmov.l %d0,%fpcr | ||
6478 | fadd.x NTINY(%pc),%fp0 | ||
6479 | bra t_pinx2 | ||
6480 | |||
6481 | global satand | ||
6482 | #--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT | ||
6483 | satand: | ||
6484 | bra t_extdnrm | ||
6485 | |||
6486 | ######################################################################### | ||
6487 | # sasin(): computes the inverse sine of a normalized input # | ||
6488 | # sasind(): computes the inverse sine of a denormalized input # | ||
6489 | # # | ||
6490 | # INPUT *************************************************************** # | ||
6491 | # a0 = pointer to extended precision input # | ||
6492 | # d0 = round precision,mode # | ||
6493 | # # | ||
6494 | # OUTPUT ************************************************************** # | ||
6495 | # fp0 = arcsin(X) # | ||
6496 | # # | ||
6497 | # ACCURACY and MONOTONICITY ******************************************* # | ||
6498 | # The returned result is within 3 ulps in 64 significant bit, # | ||
6499 | # i.e. within 0.5001 ulp to 53 bits if the result is subsequently # | ||
6500 | # rounded to double precision. The result is provably monotonic # | ||
6501 | # in double precision. # | ||
6502 | # # | ||
6503 | # ALGORITHM *********************************************************** # | ||
6504 | # # | ||
6505 | # ASIN # | ||
6506 | # 1. If |X| >= 1, go to 3. # | ||
6507 | # # | ||
6508 | # 2. (|X| < 1) Calculate asin(X) by # | ||
6509 | # z := sqrt( [1-X][1+X] ) # | ||
6510 | # asin(X) = atan( x / z ). # | ||
6511 | # Exit. # | ||
6512 | # # | ||
6513 | # 3. If |X| > 1, go to 5. # | ||
6514 | # # | ||
6515 | # 4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.# | ||
6516 | # # | ||
6517 | # 5. (|X| > 1) Generate an invalid operation by 0 * infinity. # | ||
6518 | # Exit. # | ||
6519 | # # | ||
6520 | ######################################################################### | ||
6521 | |||
6522 | global sasin | ||
6523 | sasin: | ||
6524 | fmov.x (%a0),%fp0 # LOAD INPUT | ||
6525 | |||
6526 | mov.l (%a0),%d1 | ||
6527 | mov.w 4(%a0),%d1 | ||
6528 | and.l &0x7FFFFFFF,%d1 | ||
6529 | cmp.l %d1,&0x3FFF8000 | ||
6530 | bge.b ASINBIG | ||
6531 | |||
6532 | # This catch is added here for the '060 QSP. Originally, the call to | ||
6533 | # satan() would handle this case by causing the exception which would | ||
6534 | # not be caught until gen_except(). Now, with the exceptions being | ||
6535 | # detected inside of satan(), the exception would have been handled there | ||
6536 | # instead of inside sasin() as expected. | ||
6537 | cmp.l %d1,&0x3FD78000 | ||
6538 | blt.w ASINTINY | ||
6539 | |||
6540 | #--THIS IS THE USUAL CASE, |X| < 1 | ||
6541 | #--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) ) | ||
6542 | |||
6543 | ASINMAIN: | ||
6544 | fmov.s &0x3F800000,%fp1 | ||
6545 | fsub.x %fp0,%fp1 # 1-X | ||
6546 | fmovm.x &0x4,-(%sp) # {fp2} | ||
6547 | fmov.s &0x3F800000,%fp2 | ||
6548 | fadd.x %fp0,%fp2 # 1+X | ||
6549 | fmul.x %fp2,%fp1 # (1+X)(1-X) | ||
6550 | fmovm.x (%sp)+,&0x20 # {fp2} | ||
6551 | fsqrt.x %fp1 # SQRT([1-X][1+X]) | ||
6552 | fdiv.x %fp1,%fp0 # X/SQRT([1-X][1+X]) | ||
6553 | fmovm.x &0x01,-(%sp) # save X/SQRT(...) | ||
6554 | lea (%sp),%a0 # pass ptr to X/SQRT(...) | ||
6555 | bsr satan | ||
6556 | add.l &0xc,%sp # clear X/SQRT(...) from stack | ||
6557 | bra t_inx2 | ||
6558 | |||
6559 | ASINBIG: | ||
6560 | fabs.x %fp0 # |X| | ||
6561 | fcmp.s %fp0,&0x3F800000 | ||
6562 | fbgt t_operr # cause an operr exception | ||
6563 | |||
6564 | #--|X| = 1, ASIN(X) = +- PI/2. | ||
6565 | ASINONE: | ||
6566 | fmov.x PIBY2(%pc),%fp0 | ||
6567 | mov.l (%a0),%d1 | ||
6568 | and.l &0x80000000,%d1 # SIGN BIT OF X | ||
6569 | or.l &0x3F800000,%d1 # +-1 IN SGL FORMAT | ||
6570 | mov.l %d1,-(%sp) # push SIGN(X) IN SGL-FMT | ||
6571 | fmov.l %d0,%fpcr | ||
6572 | fmul.s (%sp)+,%fp0 | ||
6573 | bra t_inx2 | ||
6574 | |||
6575 | #--|X| < 2^(-40), ATAN(X) = X | ||
6576 | ASINTINY: | ||
6577 | fmov.l %d0,%fpcr # restore users rnd mode,prec | ||
6578 | mov.b &FMOV_OP,%d1 # last inst is MOVE | ||
6579 | fmov.x (%a0),%fp0 # last inst - possible exception | ||
6580 | bra t_catch | ||
6581 | |||
6582 | global sasind | ||
6583 | #--ASIN(X) = X FOR DENORMALIZED X | ||
6584 | sasind: | ||
6585 | bra t_extdnrm | ||
6586 | |||
6587 | ######################################################################### | ||
6588 | # sacos(): computes the inverse cosine of a normalized input # | ||
6589 | # sacosd(): computes the inverse cosine of a denormalized input # | ||
6590 | # # | ||
6591 | # INPUT *************************************************************** # | ||
6592 | # a0 = pointer to extended precision input # | ||
6593 | # d0 = round precision,mode # | ||
6594 | # # | ||
6595 | # OUTPUT ************************************************************** # | ||
6596 | # fp0 = arccos(X) # | ||
6597 | # # | ||
6598 | # ACCURACY and MONOTONICITY ******************************************* # | ||
6599 | # The returned result is within 3 ulps in 64 significant bit, # | ||
6600 | # i.e. within 0.5001 ulp to 53 bits if the result is subsequently # | ||
6601 | # rounded to double precision. The result is provably monotonic # | ||
6602 | # in double precision. # | ||
6603 | # # | ||
6604 | # ALGORITHM *********************************************************** # | ||
6605 | # # | ||
6606 | # ACOS # | ||
6607 | # 1. If |X| >= 1, go to 3. # | ||
6608 | # # | ||
6609 | # 2. (|X| < 1) Calculate acos(X) by # | ||
6610 | # z := (1-X) / (1+X) # | ||
6611 | # acos(X) = 2 * atan( sqrt(z) ). # | ||
6612 | # Exit. # | ||
6613 | # # | ||
6614 | # 3. If |X| > 1, go to 5. # | ||
6615 | # # | ||
6616 | # 4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit. # | ||
6617 | # # | ||
6618 | # 5. (|X| > 1) Generate an invalid operation by 0 * infinity. # | ||
6619 | # Exit. # | ||
6620 | # # | ||
6621 | ######################################################################### | ||
6622 | |||
6623 | global sacos | ||
6624 | sacos: | ||
6625 | fmov.x (%a0),%fp0 # LOAD INPUT | ||
6626 | |||
6627 | mov.l (%a0),%d1 # pack exp w/ upper 16 fraction | ||
6628 | mov.w 4(%a0),%d1 | ||
6629 | and.l &0x7FFFFFFF,%d1 | ||
6630 | cmp.l %d1,&0x3FFF8000 | ||
6631 | bge.b ACOSBIG | ||
6632 | |||
6633 | #--THIS IS THE USUAL CASE, |X| < 1 | ||
6634 | #--ACOS(X) = 2 * ATAN( SQRT( (1-X)/(1+X) ) ) | ||
6635 | |||
6636 | ACOSMAIN: | ||
6637 | fmov.s &0x3F800000,%fp1 | ||
6638 | fadd.x %fp0,%fp1 # 1+X | ||
6639 | fneg.x %fp0 # -X | ||
6640 | fadd.s &0x3F800000,%fp0 # 1-X | ||
6641 | fdiv.x %fp1,%fp0 # (1-X)/(1+X) | ||
6642 | fsqrt.x %fp0 # SQRT((1-X)/(1+X)) | ||
6643 | mov.l %d0,-(%sp) # save original users fpcr | ||
6644 | clr.l %d0 | ||
6645 | fmovm.x &0x01,-(%sp) # save SQRT(...) to stack | ||
6646 | lea (%sp),%a0 # pass ptr to sqrt | ||
6647 | bsr satan # ATAN(SQRT([1-X]/[1+X])) | ||
6648 | add.l &0xc,%sp # clear SQRT(...) from stack | ||
6649 | |||
6650 | fmov.l (%sp)+,%fpcr # restore users round prec,mode | ||
6651 | fadd.x %fp0,%fp0 # 2 * ATAN( STUFF ) | ||
6652 | bra t_pinx2 | ||
6653 | |||
6654 | ACOSBIG: | ||
6655 | fabs.x %fp0 | ||
6656 | fcmp.s %fp0,&0x3F800000 | ||
6657 | fbgt t_operr # cause an operr exception | ||
6658 | |||
6659 | #--|X| = 1, ACOS(X) = 0 OR PI | ||
6660 | tst.b (%a0) # is X positive or negative? | ||
6661 | bpl.b ACOSP1 | ||
6662 | |||
6663 | #--X = -1 | ||
6664 | #Returns PI and inexact exception | ||
6665 | ACOSM1: | ||
6666 | fmov.x PI(%pc),%fp0 # load PI | ||
6667 | fmov.l %d0,%fpcr # load round mode,prec | ||
6668 | fadd.s &0x00800000,%fp0 # add a small value | ||
6669 | bra t_pinx2 | ||
6670 | |||
6671 | ACOSP1: | ||
6672 | bra ld_pzero # answer is positive zero | ||
6673 | |||
6674 | global sacosd | ||
6675 | #--ACOS(X) = PI/2 FOR DENORMALIZED X | ||
6676 | sacosd: | ||
6677 | fmov.l %d0,%fpcr # load user's rnd mode/prec | ||
6678 | fmov.x PIBY2(%pc),%fp0 | ||
6679 | bra t_pinx2 | ||
6680 | |||
6681 | ######################################################################### | ||
6682 | # setox(): computes the exponential for a normalized input # | ||
6683 | # setoxd(): computes the exponential for a denormalized input # | ||
6684 | # setoxm1(): computes the exponential minus 1 for a normalized input # | ||
6685 | # setoxm1d(): computes the exponential minus 1 for a denormalized input # | ||
6686 | # # | ||
6687 | # INPUT *************************************************************** # | ||
6688 | # a0 = pointer to extended precision input # | ||
6689 | # d0 = round precision,mode # | ||
6690 | # # | ||
6691 | # OUTPUT ************************************************************** # | ||
6692 | # fp0 = exp(X) or exp(X)-1 # | ||
6693 | # # | ||
6694 | # ACCURACY and MONOTONICITY ******************************************* # | ||
6695 | # The returned result is within 0.85 ulps in 64 significant bit, # | ||
6696 | # i.e. within 0.5001 ulp to 53 bits if the result is subsequently # | ||
6697 | # rounded to double precision. The result is provably monotonic # | ||
6698 | # in double precision. # | ||
6699 | # # | ||
6700 | # ALGORITHM and IMPLEMENTATION **************************************** # | ||
6701 | # # | ||
6702 | # setoxd # | ||
6703 | # ------ # | ||
6704 | # Step 1. Set ans := 1.0 # | ||
6705 | # # | ||
6706 | # Step 2. Return ans := ans + sign(X)*2^(-126). Exit. # | ||
6707 | # Notes: This will always generate one exception -- inexact. # | ||
6708 | # # | ||
6709 | # # | ||
6710 | # setox # | ||
6711 | # ----- # | ||
6712 | # # | ||
6713 | # Step 1. Filter out extreme cases of input argument. # | ||
6714 | # 1.1 If |X| >= 2^(-65), go to Step 1.3. # | ||
6715 | # 1.2 Go to Step 7. # | ||
6716 | # 1.3 If |X| < 16380 log(2), go to Step 2. # | ||
6717 | # 1.4 Go to Step 8. # | ||
6718 | # Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.# | ||
6719 | # To avoid the use of floating-point comparisons, a # | ||
6720 | # compact representation of |X| is used. This format is a # | ||
6721 | # 32-bit integer, the upper (more significant) 16 bits # | ||
6722 | # are the sign and biased exponent field of |X|; the # | ||
6723 | # lower 16 bits are the 16 most significant fraction # | ||
6724 | # (including the explicit bit) bits of |X|. Consequently, # | ||
6725 | # the comparisons in Steps 1.1 and 1.3 can be performed # | ||
6726 | # by integer comparison. Note also that the constant # | ||
6727 | # 16380 log(2) used in Step 1.3 is also in the compact # | ||
6728 | # form. Thus taking the branch to Step 2 guarantees # | ||
6729 | # |X| < 16380 log(2). There is no harm to have a small # | ||
6730 | # number of cases where |X| is less than, but close to, # | ||
6731 | # 16380 log(2) and the branch to Step 9 is taken. # | ||
6732 | # # | ||
6733 | # Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). # | ||
6734 | # 2.1 Set AdjFlag := 0 (indicates the branch 1.3 -> 2 # | ||
6735 | # was taken) # | ||
6736 | # 2.2 N := round-to-nearest-integer( X * 64/log2 ). # | ||
6737 | # 2.3 Calculate J = N mod 64; so J = 0,1,2,..., # | ||
6738 | # or 63. # | ||
6739 | # 2.4 Calculate M = (N - J)/64; so N = 64M + J. # | ||
6740 | # 2.5 Calculate the address of the stored value of # | ||
6741 | # 2^(J/64). # | ||
6742 | # 2.6 Create the value Scale = 2^M. # | ||
6743 | # Notes: The calculation in 2.2 is really performed by # | ||
6744 | # Z := X * constant # | ||
6745 | # N := round-to-nearest-integer(Z) # | ||
6746 | # where # | ||
6747 | # constant := single-precision( 64/log 2 ). # | ||
6748 | # # | ||
6749 | # Using a single-precision constant avoids memory # | ||
6750 | # access. Another effect of using a single-precision # | ||
6751 | # "constant" is that the calculated value Z is # | ||
6752 | # # | ||
6753 | # Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24). # | ||
6754 | # # | ||
6755 | # This error has to be considered later in Steps 3 and 4. # | ||
6756 | # # | ||
6757 | # Step 3. Calculate X - N*log2/64. # | ||
6758 | # 3.1 R := X + N*L1, # | ||
6759 | # where L1 := single-precision(-log2/64). # | ||
6760 | # 3.2 R := R + N*L2, # | ||
6761 | # L2 := extended-precision(-log2/64 - L1).# | ||
6762 | # Notes: a) The way L1 and L2 are chosen ensures L1+L2 # | ||
6763 | # approximate the value -log2/64 to 88 bits of accuracy. # | ||
6764 | # b) N*L1 is exact because N is no longer than 22 bits # | ||
6765 | # and L1 is no longer than 24 bits. # | ||
6766 | # c) The calculation X+N*L1 is also exact due to # | ||
6767 | # cancellation. Thus, R is practically X+N(L1+L2) to full # | ||
6768 | # 64 bits. # | ||
6769 | # d) It is important to estimate how large can |R| be # | ||
6770 | # after Step 3.2. # | ||
6771 | # # | ||
6772 | # N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24) # | ||
6773 | # X*64/log2 (1+eps) = N + f, |f| <= 0.5 # | ||
6774 | # X*64/log2 - N = f - eps*X 64/log2 # | ||
6775 | # X - N*log2/64 = f*log2/64 - eps*X # | ||
6776 | # # | ||
6777 | # # | ||
6778 | # Now |X| <= 16446 log2, thus # | ||
6779 | # # | ||
6780 | # |X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64 # | ||
6781 | # <= 0.57 log2/64. # | ||
6782 | # This bound will be used in Step 4. # | ||
6783 | # # | ||
6784 | # Step 4. Approximate exp(R)-1 by a polynomial # | ||
6785 | # p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) # | ||
6786 | # Notes: a) In order to reduce memory access, the coefficients # | ||
6787 | # are made as "short" as possible: A1 (which is 1/2), A4 # | ||
6788 | # and A5 are single precision; A2 and A3 are double # | ||
6789 | # precision. # | ||
6790 | # b) Even with the restrictions above, # | ||
6791 | # |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062. # | ||
6792 | # Note that 0.0062 is slightly bigger than 0.57 log2/64. # | ||
6793 | # c) To fully utilize the pipeline, p is separated into # | ||
6794 | # two independent pieces of roughly equal complexities # | ||
6795 | # p = [ R + R*S*(A2 + S*A4) ] + # | ||
6796 | # [ S*(A1 + S*(A3 + S*A5)) ] # | ||
6797 | # where S = R*R. # | ||
6798 | # # | ||
6799 | # Step 5. Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by # | ||
6800 | # ans := T + ( T*p + t) # | ||
6801 | # where T and t are the stored values for 2^(J/64). # | ||
6802 | # Notes: 2^(J/64) is stored as T and t where T+t approximates # | ||
6803 | # 2^(J/64) to roughly 85 bits; T is in extended precision # | ||
6804 | # and t is in single precision. Note also that T is # | ||
6805 | # rounded to 62 bits so that the last two bits of T are # | ||
6806 | # zero. The reason for such a special form is that T-1, # | ||
6807 | # T-2, and T-8 will all be exact --- a property that will # | ||
6808 | # give much more accurate computation of the function # | ||
6809 | # EXPM1. # | ||
6810 | # # | ||
6811 | # Step 6. Reconstruction of exp(X) # | ||
6812 | # exp(X) = 2^M * 2^(J/64) * exp(R). # | ||
6813 | # 6.1 If AdjFlag = 0, go to 6.3 # | ||
6814 | # 6.2 ans := ans * AdjScale # | ||
6815 | # 6.3 Restore the user FPCR # | ||
6816 | # 6.4 Return ans := ans * Scale. Exit. # | ||
6817 | # Notes: If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R, # | ||
6818 | # |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will # | ||
6819 | # neither overflow nor underflow. If AdjFlag = 1, that # | ||
6820 | # means that # | ||
6821 | # X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380. # | ||
6822 | # Hence, exp(X) may overflow or underflow or neither. # | ||
6823 | # When that is the case, AdjScale = 2^(M1) where M1 is # | ||
6824 | # approximately M. Thus 6.2 will never cause # | ||
6825 | # over/underflow. Possible exception in 6.4 is overflow # | ||
6826 | # or underflow. The inexact exception is not generated in # | ||
6827 | # 6.4. Although one can argue that the inexact flag # | ||
6828 | # should always be raised, to simulate that exception # | ||
6829 | # cost to much than the flag is worth in practical uses. # | ||
6830 | # # | ||
6831 | # Step 7. Return 1 + X. # | ||
6832 | # 7.1 ans := X # | ||
6833 | # 7.2 Restore user FPCR. # | ||
6834 | # 7.3 Return ans := 1 + ans. Exit # | ||
6835 | # Notes: For non-zero X, the inexact exception will always be # | ||
6836 | # raised by 7.3. That is the only exception raised by 7.3.# | ||
6837 | # Note also that we use the FMOVEM instruction to move X # | ||
6838 | # in Step 7.1 to avoid unnecessary trapping. (Although # | ||
6839 | # the FMOVEM may not seem relevant since X is normalized, # | ||
6840 | # the precaution will be useful in the library version of # | ||
6841 | # this code where the separate entry for denormalized # | ||
6842 | # inputs will be done away with.) # | ||
6843 | # # | ||
6844 | # Step 8. Handle exp(X) where |X| >= 16380log2. # | ||
6845 | # 8.1 If |X| > 16480 log2, go to Step 9. # | ||
6846 | # (mimic 2.2 - 2.6) # | ||
6847 | # 8.2 N := round-to-integer( X * 64/log2 ) # | ||
6848 | # 8.3 Calculate J = N mod 64, J = 0,1,...,63 # | ||
6849 | # 8.4 K := (N-J)/64, M1 := truncate(K/2), M = K-M1, # | ||
6850 | # AdjFlag := 1. # | ||
6851 | # 8.5 Calculate the address of the stored value # | ||
6852 | # 2^(J/64). # | ||
6853 | # 8.6 Create the values Scale = 2^M, AdjScale = 2^M1. # | ||
6854 | # 8.7 Go to Step 3. # | ||
6855 | # Notes: Refer to notes for 2.2 - 2.6. # | ||
6856 | # # | ||
6857 | # Step 9. Handle exp(X), |X| > 16480 log2. # | ||
6858 | # 9.1 If X < 0, go to 9.3 # | ||
6859 | # 9.2 ans := Huge, go to 9.4 # | ||
6860 | # 9.3 ans := Tiny. # | ||
6861 | # 9.4 Restore user FPCR. # | ||
6862 | # 9.5 Return ans := ans * ans. Exit. # | ||
6863 | # Notes: Exp(X) will surely overflow or underflow, depending on # | ||
6864 | # X's sign. "Huge" and "Tiny" are respectively large/tiny # | ||
6865 | # extended-precision numbers whose square over/underflow # | ||
6866 | # with an inexact result. Thus, 9.5 always raises the # | ||
6867 | # inexact together with either overflow or underflow. # | ||
6868 | # # | ||
6869 | # setoxm1d # | ||
6870 | # -------- # | ||
6871 | # # | ||
6872 | # Step 1. Set ans := 0 # | ||
6873 | # # | ||
6874 | # Step 2. Return ans := X + ans. Exit. # | ||
6875 | # Notes: This will return X with the appropriate rounding # | ||
6876 | # precision prescribed by the user FPCR. # | ||
6877 | # # | ||
6878 | # setoxm1 # | ||
6879 | # ------- # | ||
6880 | # # | ||
6881 | # Step 1. Check |X| # | ||
6882 | # 1.1 If |X| >= 1/4, go to Step 1.3. # | ||
6883 | # 1.2 Go to Step 7. # | ||
6884 | # 1.3 If |X| < 70 log(2), go to Step 2. # | ||
6885 | # 1.4 Go to Step 10. # | ||
6886 | # Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.# | ||
6887 | # However, it is conceivable |X| can be small very often # | ||
6888 | # because EXPM1 is intended to evaluate exp(X)-1 # | ||
6889 | # accurately when |X| is small. For further details on # | ||
6890 | # the comparisons, see the notes on Step 1 of setox. # | ||
6891 | # # | ||
6892 | # Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). # | ||
6893 | # 2.1 N := round-to-nearest-integer( X * 64/log2 ). # | ||
6894 | # 2.2 Calculate J = N mod 64; so J = 0,1,2,..., # | ||
6895 | # or 63. # | ||
6896 | # 2.3 Calculate M = (N - J)/64; so N = 64M + J. # | ||
6897 | # 2.4 Calculate the address of the stored value of # | ||
6898 | # 2^(J/64). # | ||
6899 | # 2.5 Create the values Sc = 2^M and # | ||
6900 | # OnebySc := -2^(-M). # | ||
6901 | # Notes: See the notes on Step 2 of setox. # | ||
6902 | # # | ||
6903 | # Step 3. Calculate X - N*log2/64. # | ||
6904 | # 3.1 R := X + N*L1, # | ||
6905 | # where L1 := single-precision(-log2/64). # | ||
6906 | # 3.2 R := R + N*L2, # | ||
6907 | # L2 := extended-precision(-log2/64 - L1).# | ||
6908 | # Notes: Applying the analysis of Step 3 of setox in this case # | ||
6909 | # shows that |R| <= 0.0055 (note that |X| <= 70 log2 in # | ||
6910 | # this case). # | ||
6911 | # # | ||
6912 | # Step 4. Approximate exp(R)-1 by a polynomial # | ||
6913 | # p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6))))) # | ||
6914 | # Notes: a) In order to reduce memory access, the coefficients # | ||
6915 | # are made as "short" as possible: A1 (which is 1/2), A5 # | ||
6916 | # and A6 are single precision; A2, A3 and A4 are double # | ||
6917 | # precision. # | ||
6918 | # b) Even with the restriction above, # | ||
6919 | # |p - (exp(R)-1)| < |R| * 2^(-72.7) # | ||
6920 | # for all |R| <= 0.0055. # | ||
6921 | # c) To fully utilize the pipeline, p is separated into # | ||
6922 | # two independent pieces of roughly equal complexity # | ||
6923 | # p = [ R*S*(A2 + S*(A4 + S*A6)) ] + # | ||
6924 | # [ R + S*(A1 + S*(A3 + S*A5)) ] # | ||
6925 | # where S = R*R. # | ||
6926 | # # | ||
6927 | # Step 5. Compute 2^(J/64)*p by # | ||
6928 | # p := T*p # | ||
6929 | # where T and t are the stored values for 2^(J/64). # | ||
6930 | # Notes: 2^(J/64) is stored as T and t where T+t approximates # | ||
6931 | # 2^(J/64) to roughly 85 bits; T is in extended precision # | ||
6932 | # and t is in single precision. Note also that T is # | ||
6933 | # rounded to 62 bits so that the last two bits of T are # | ||
6934 | # zero. The reason for such a special form is that T-1, # | ||
6935 | # T-2, and T-8 will all be exact --- a property that will # | ||
6936 | # be exploited in Step 6 below. The total relative error # | ||
6937 | # in p is no bigger than 2^(-67.7) compared to the final # | ||
6938 | # result. # | ||
6939 | # # | ||
6940 | # Step 6. Reconstruction of exp(X)-1 # | ||
6941 | # exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ). # | ||
6942 | # 6.1 If M <= 63, go to Step 6.3. # | ||
6943 | # 6.2 ans := T + (p + (t + OnebySc)). Go to 6.6 # | ||
6944 | # 6.3 If M >= -3, go to 6.5. # | ||
6945 | # 6.4 ans := (T + (p + t)) + OnebySc. Go to 6.6 # | ||
6946 | # 6.5 ans := (T + OnebySc) + (p + t). # | ||
6947 | # 6.6 Restore user FPCR. # | ||
6948 | # 6.7 Return ans := Sc * ans. Exit. # | ||
6949 | # Notes: The various arrangements of the expressions give # | ||
6950 | # accurate evaluations. # | ||
6951 | # # | ||
6952 | # Step 7. exp(X)-1 for |X| < 1/4. # | ||
6953 | # 7.1 If |X| >= 2^(-65), go to Step 9. # | ||
6954 | # 7.2 Go to Step 8. # | ||
6955 | # # | ||
6956 | # Step 8. Calculate exp(X)-1, |X| < 2^(-65). # | ||
6957 | # 8.1 If |X| < 2^(-16312), goto 8.3 # | ||
6958 | # 8.2 Restore FPCR; return ans := X - 2^(-16382). # | ||
6959 | # Exit. # | ||
6960 | # 8.3 X := X * 2^(140). # | ||
6961 | # 8.4 Restore FPCR; ans := ans - 2^(-16382). # | ||
6962 | # Return ans := ans*2^(140). Exit # | ||
6963 | # Notes: The idea is to return "X - tiny" under the user # | ||
6964 | # precision and rounding modes. To avoid unnecessary # | ||
6965 | # inefficiency, we stay away from denormalized numbers # | ||
6966 | # the best we can. For |X| >= 2^(-16312), the # | ||
6967 | # straightforward 8.2 generates the inexact exception as # | ||
6968 | # the case warrants. # | ||
6969 | # # | ||
6970 | # Step 9. Calculate exp(X)-1, |X| < 1/4, by a polynomial # | ||
6971 | # p = X + X*X*(B1 + X*(B2 + ... + X*B12)) # | ||
6972 | # Notes: a) In order to reduce memory access, the coefficients # | ||
6973 | # are made as "short" as possible: B1 (which is 1/2), B9 # | ||
6974 | # to B12 are single precision; B3 to B8 are double # | ||
6975 | # precision; and B2 is double extended. # | ||
6976 | # b) Even with the restriction above, # | ||
6977 | # |p - (exp(X)-1)| < |X| 2^(-70.6) # | ||
6978 | # for all |X| <= 0.251. # | ||
6979 | # Note that 0.251 is slightly bigger than 1/4. # | ||
6980 | # c) To fully preserve accuracy, the polynomial is # | ||
6981 | # computed as # | ||
6982 | # X + ( S*B1 + Q ) where S = X*X and # | ||
6983 | # Q = X*S*(B2 + X*(B3 + ... + X*B12)) # | ||
6984 | # d) To fully utilize the pipeline, Q is separated into # | ||
6985 | # two independent pieces of roughly equal complexity # | ||
6986 | # Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] + # | ||
6987 | # [ S*S*(B3 + S*(B5 + ... + S*B11)) ] # | ||
6988 | # # | ||
6989 | # Step 10. Calculate exp(X)-1 for |X| >= 70 log 2. # | ||
6990 | # 10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all # | ||
6991 | # practical purposes. Therefore, go to Step 1 of setox. # | ||
6992 | # 10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical # | ||
6993 | # purposes. # | ||
6994 | # ans := -1 # | ||
6995 | # Restore user FPCR # | ||
6996 | # Return ans := ans + 2^(-126). Exit. # | ||
6997 | # Notes: 10.2 will always create an inexact and return -1 + tiny # | ||
6998 | # in the user rounding precision and mode. # | ||
6999 | # # | ||
7000 | ######################################################################### | ||
7001 | |||
7002 | L2: long 0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000 | ||
7003 | |||
7004 | EEXPA3: long 0x3FA55555,0x55554CC1 | ||
7005 | EEXPA2: long 0x3FC55555,0x55554A54 | ||
7006 | |||
7007 | EM1A4: long 0x3F811111,0x11174385 | ||
7008 | EM1A3: long 0x3FA55555,0x55554F5A | ||
7009 | |||
7010 | EM1A2: long 0x3FC55555,0x55555555,0x00000000,0x00000000 | ||
7011 | |||
7012 | EM1B8: long 0x3EC71DE3,0xA5774682 | ||
7013 | EM1B7: long 0x3EFA01A0,0x19D7CB68 | ||
7014 | |||
7015 | EM1B6: long 0x3F2A01A0,0x1A019DF3 | ||
7016 | EM1B5: long 0x3F56C16C,0x16C170E2 | ||
7017 | |||
7018 | EM1B4: long 0x3F811111,0x11111111 | ||
7019 | EM1B3: long 0x3FA55555,0x55555555 | ||
7020 | |||
7021 | EM1B2: long 0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB | ||
7022 | long 0x00000000 | ||
7023 | |||
7024 | TWO140: long 0x48B00000,0x00000000 | ||
7025 | TWON140: | ||
7026 | long 0x37300000,0x00000000 | ||
7027 | |||
7028 | EEXPTBL: | ||
7029 | long 0x3FFF0000,0x80000000,0x00000000,0x00000000 | ||
7030 | long 0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B | ||
7031 | long 0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9 | ||
7032 | long 0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369 | ||
7033 | long 0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C | ||
7034 | long 0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F | ||
7035 | long 0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729 | ||
7036 | long 0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF | ||
7037 | long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF | ||
7038 | long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA | ||
7039 | long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051 | ||
7040 | long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029 | ||
7041 | long 0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494 | ||
7042 | long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0 | ||
7043 | long 0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D | ||
7044 | long 0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537 | ||
7045 | long 0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD | ||
7046 | long 0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087 | ||
7047 | long 0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818 | ||
7048 | long 0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D | ||
7049 | long 0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890 | ||
7050 | long 0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C | ||
7051 | long 0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05 | ||
7052 | long 0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126 | ||
7053 | long 0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140 | ||
7054 | long 0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA | ||
7055 | long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A | ||
7056 | long 0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC | ||
7057 | long 0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC | ||
7058 | long 0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610 | ||
7059 | long 0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90 | ||
7060 | long 0x3FFF0000,0xB311C412,0xA9112488,0x201F678A | ||
7061 | long 0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13 | ||
7062 | long 0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30 | ||
7063 | long 0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC | ||
7064 | long 0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6 | ||
7065 | long 0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70 | ||
7066 | long 0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518 | ||
7067 | long 0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41 | ||
7068 | long 0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B | ||
7069 | long 0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568 | ||
7070 | long 0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E | ||
7071 | long 0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03 | ||
7072 | long 0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D | ||
7073 | long 0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4 | ||
7074 | long 0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C | ||
7075 | long 0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9 | ||
7076 | long 0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21 | ||
7077 | long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F | ||
7078 | long 0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F | ||
7079 | long 0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207 | ||
7080 | long 0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175 | ||
7081 | long 0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B | ||
7082 | long 0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5 | ||
7083 | long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A | ||
7084 | long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22 | ||
7085 | long 0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945 | ||
7086 | long 0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B | ||
7087 | long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3 | ||
7088 | long 0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05 | ||
7089 | long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19 | ||
7090 | long 0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5 | ||
7091 | long 0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22 | ||
7092 | long 0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A | ||
7093 | |||
7094 | set ADJFLAG,L_SCR2 | ||
7095 | set SCALE,FP_SCR0 | ||
7096 | set ADJSCALE,FP_SCR1 | ||
7097 | set SC,FP_SCR0 | ||
7098 | set ONEBYSC,FP_SCR1 | ||
7099 | |||
7100 | global setox | ||
7101 | setox: | ||
7102 | #--entry point for EXP(X), here X is finite, non-zero, and not NaN's | ||
7103 | |||
7104 | #--Step 1. | ||
7105 | mov.l (%a0),%d1 # load part of input X | ||
7106 | and.l &0x7FFF0000,%d1 # biased expo. of X | ||
7107 | cmp.l %d1,&0x3FBE0000 # 2^(-65) | ||
7108 | bge.b EXPC1 # normal case | ||
7109 | bra EXPSM | ||
7110 | |||
7111 | EXPC1: | ||
7112 | #--The case |X| >= 2^(-65) | ||
7113 | mov.w 4(%a0),%d1 # expo. and partial sig. of |X| | ||
7114 | cmp.l %d1,&0x400CB167 # 16380 log2 trunc. 16 bits | ||
7115 | blt.b EXPMAIN # normal case | ||
7116 | bra EEXPBIG | ||
7117 | |||
7118 | EXPMAIN: | ||
7119 | #--Step 2. | ||
7120 | #--This is the normal branch: 2^(-65) <= |X| < 16380 log2. | ||
7121 | fmov.x (%a0),%fp0 # load input from (a0) | ||
7122 | |||
7123 | fmov.x %fp0,%fp1 | ||
7124 | fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X | ||
7125 | fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3} | ||
7126 | mov.l &0,ADJFLAG(%a6) | ||
7127 | fmov.l %fp0,%d1 # N = int( X * 64/log2 ) | ||
7128 | lea EEXPTBL(%pc),%a1 | ||
7129 | fmov.l %d1,%fp0 # convert to floating-format | ||
7130 | |||
7131 | mov.l %d1,L_SCR1(%a6) # save N temporarily | ||
7132 | and.l &0x3F,%d1 # D0 is J = N mod 64 | ||
7133 | lsl.l &4,%d1 | ||
7134 | add.l %d1,%a1 # address of 2^(J/64) | ||
7135 | mov.l L_SCR1(%a6),%d1 | ||
7136 | asr.l &6,%d1 # D0 is M | ||
7137 | add.w &0x3FFF,%d1 # biased expo. of 2^(M) | ||
7138 | mov.w L2(%pc),L_SCR1(%a6) # prefetch L2, no need in CB | ||
7139 | |||
7140 | EXPCONT1: | ||
7141 | #--Step 3. | ||
7142 | #--fp1,fp2 saved on the stack. fp0 is N, fp1 is X, | ||
7143 | #--a0 points to 2^(J/64), D0 is biased expo. of 2^(M) | ||
7144 | fmov.x %fp0,%fp2 | ||
7145 | fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64) | ||
7146 | fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/64 | ||
7147 | fadd.x %fp1,%fp0 # X + N*L1 | ||
7148 | fadd.x %fp2,%fp0 # fp0 is R, reduced arg. | ||
7149 | |||
7150 | #--Step 4. | ||
7151 | #--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL | ||
7152 | #-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) | ||
7153 | #--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R | ||
7154 | #--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))] | ||
7155 | |||
7156 | fmov.x %fp0,%fp1 | ||
7157 | fmul.x %fp1,%fp1 # fp1 IS S = R*R | ||
7158 | |||
7159 | fmov.s &0x3AB60B70,%fp2 # fp2 IS A5 | ||
7160 | |||
7161 | fmul.x %fp1,%fp2 # fp2 IS S*A5 | ||
7162 | fmov.x %fp1,%fp3 | ||
7163 | fmul.s &0x3C088895,%fp3 # fp3 IS S*A4 | ||
7164 | |||
7165 | fadd.d EEXPA3(%pc),%fp2 # fp2 IS A3+S*A5 | ||
7166 | fadd.d EEXPA2(%pc),%fp3 # fp3 IS A2+S*A4 | ||
7167 | |||
7168 | fmul.x %fp1,%fp2 # fp2 IS S*(A3+S*A5) | ||
7169 | mov.w %d1,SCALE(%a6) # SCALE is 2^(M) in extended | ||
7170 | mov.l &0x80000000,SCALE+4(%a6) | ||
7171 | clr.l SCALE+8(%a6) | ||
7172 | |||
7173 | fmul.x %fp1,%fp3 # fp3 IS S*(A2+S*A4) | ||
7174 | |||
7175 | fadd.s &0x3F000000,%fp2 # fp2 IS A1+S*(A3+S*A5) | ||
7176 | fmul.x %fp0,%fp3 # fp3 IS R*S*(A2+S*A4) | ||
7177 | |||
7178 | fmul.x %fp1,%fp2 # fp2 IS S*(A1+S*(A3+S*A5)) | ||
7179 | fadd.x %fp3,%fp0 # fp0 IS R+R*S*(A2+S*A4), | ||
7180 | |||
7181 | fmov.x (%a1)+,%fp1 # fp1 is lead. pt. of 2^(J/64) | ||
7182 | fadd.x %fp2,%fp0 # fp0 is EXP(R) - 1 | ||
7183 | |||
7184 | #--Step 5 | ||
7185 | #--final reconstruction process | ||
7186 | #--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) ) | ||
7187 | |||
7188 | fmul.x %fp1,%fp0 # 2^(J/64)*(Exp(R)-1) | ||
7189 | fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3} | ||
7190 | fadd.s (%a1),%fp0 # accurate 2^(J/64) | ||
7191 | |||
7192 | fadd.x %fp1,%fp0 # 2^(J/64) + 2^(J/64)*... | ||
7193 | mov.l ADJFLAG(%a6),%d1 | ||
7194 | |||
7195 | #--Step 6 | ||
7196 | tst.l %d1 | ||
7197 | beq.b NORMAL | ||
7198 | ADJUST: | ||
7199 | fmul.x ADJSCALE(%a6),%fp0 | ||
7200 | NORMAL: | ||
7201 | fmov.l %d0,%fpcr # restore user FPCR | ||
7202 | mov.b &FMUL_OP,%d1 # last inst is MUL | ||
7203 | fmul.x SCALE(%a6),%fp0 # multiply 2^(M) | ||
7204 | bra t_catch | ||
7205 | |||
7206 | EXPSM: | ||
7207 | #--Step 7 | ||
7208 | fmovm.x (%a0),&0x80 # load X | ||
7209 | fmov.l %d0,%fpcr | ||
7210 | fadd.s &0x3F800000,%fp0 # 1+X in user mode | ||
7211 | bra t_pinx2 | ||
7212 | |||
7213 | EEXPBIG: | ||
7214 | #--Step 8 | ||
7215 | cmp.l %d1,&0x400CB27C # 16480 log2 | ||
7216 | bgt.b EXP2BIG | ||
7217 | #--Steps 8.2 -- 8.6 | ||
7218 | fmov.x (%a0),%fp0 # load input from (a0) | ||
7219 | |||
7220 | fmov.x %fp0,%fp1 | ||
7221 | fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X | ||
7222 | fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3} | ||
7223 | mov.l &1,ADJFLAG(%a6) | ||
7224 | fmov.l %fp0,%d1 # N = int( X * 64/log2 ) | ||
7225 | lea EEXPTBL(%pc),%a1 | ||
7226 | fmov.l %d1,%fp0 # convert to floating-format | ||
7227 | mov.l %d1,L_SCR1(%a6) # save N temporarily | ||
7228 | and.l &0x3F,%d1 # D0 is J = N mod 64 | ||
7229 | lsl.l &4,%d1 | ||
7230 | add.l %d1,%a1 # address of 2^(J/64) | ||
7231 | mov.l L_SCR1(%a6),%d1 | ||
7232 | asr.l &6,%d1 # D0 is K | ||
7233 | mov.l %d1,L_SCR1(%a6) # save K temporarily | ||
7234 | asr.l &1,%d1 # D0 is M1 | ||
7235 | sub.l %d1,L_SCR1(%a6) # a1 is M | ||
7236 | add.w &0x3FFF,%d1 # biased expo. of 2^(M1) | ||
7237 | mov.w %d1,ADJSCALE(%a6) # ADJSCALE := 2^(M1) | ||
7238 | mov.l &0x80000000,ADJSCALE+4(%a6) | ||
7239 | clr.l ADJSCALE+8(%a6) | ||
7240 | mov.l L_SCR1(%a6),%d1 # D0 is M | ||
7241 | add.w &0x3FFF,%d1 # biased expo. of 2^(M) | ||
7242 | bra.w EXPCONT1 # go back to Step 3 | ||
7243 | |||
7244 | EXP2BIG: | ||
7245 | #--Step 9 | ||
7246 | tst.b (%a0) # is X positive or negative? | ||
7247 | bmi t_unfl2 | ||
7248 | bra t_ovfl2 | ||
7249 | |||
7250 | global setoxd | ||
7251 | setoxd: | ||
7252 | #--entry point for EXP(X), X is denormalized | ||
7253 | mov.l (%a0),-(%sp) | ||
7254 | andi.l &0x80000000,(%sp) | ||
7255 | ori.l &0x00800000,(%sp) # sign(X)*2^(-126) | ||
7256 | |||
7257 | fmov.s &0x3F800000,%fp0 | ||
7258 | |||
7259 | fmov.l %d0,%fpcr | ||
7260 | fadd.s (%sp)+,%fp0 | ||
7261 | bra t_pinx2 | ||
7262 | |||
7263 | global setoxm1 | ||
7264 | setoxm1: | ||
7265 | #--entry point for EXPM1(X), here X is finite, non-zero, non-NaN | ||
7266 | |||
7267 | #--Step 1. | ||
7268 | #--Step 1.1 | ||
7269 | mov.l (%a0),%d1 # load part of input X | ||
7270 | and.l &0x7FFF0000,%d1 # biased expo. of X | ||
7271 | cmp.l %d1,&0x3FFD0000 # 1/4 | ||
7272 | bge.b EM1CON1 # |X| >= 1/4 | ||
7273 | bra EM1SM | ||
7274 | |||
7275 | EM1CON1: | ||
7276 | #--Step 1.3 | ||
7277 | #--The case |X| >= 1/4 | ||
7278 | mov.w 4(%a0),%d1 # expo. and partial sig. of |X| | ||
7279 | cmp.l %d1,&0x4004C215 # 70log2 rounded up to 16 bits | ||
7280 | ble.b EM1MAIN # 1/4 <= |X| <= 70log2 | ||
7281 | bra EM1BIG | ||
7282 | |||
7283 | EM1MAIN: | ||
7284 | #--Step 2. | ||
7285 | #--This is the case: 1/4 <= |X| <= 70 log2. | ||
7286 | fmov.x (%a0),%fp0 # load input from (a0) | ||
7287 | |||
7288 | fmov.x %fp0,%fp1 | ||
7289 | fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X | ||
7290 | fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3} | ||
7291 | fmov.l %fp0,%d1 # N = int( X * 64/log2 ) | ||
7292 | lea EEXPTBL(%pc),%a1 | ||
7293 | fmov.l %d1,%fp0 # convert to floating-format | ||
7294 | |||
7295 | mov.l %d1,L_SCR1(%a6) # save N temporarily | ||
7296 | and.l &0x3F,%d1 # D0 is J = N mod 64 | ||
7297 | lsl.l &4,%d1 | ||
7298 | add.l %d1,%a1 # address of 2^(J/64) | ||
7299 | mov.l L_SCR1(%a6),%d1 | ||
7300 | asr.l &6,%d1 # D0 is M | ||
7301 | mov.l %d1,L_SCR1(%a6) # save a copy of M | ||
7302 | |||
7303 | #--Step 3. | ||
7304 | #--fp1,fp2 saved on the stack. fp0 is N, fp1 is X, | ||
7305 | #--a0 points to 2^(J/64), D0 and a1 both contain M | ||
7306 | fmov.x %fp0,%fp2 | ||
7307 | fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64) | ||
7308 | fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/64 | ||
7309 | fadd.x %fp1,%fp0 # X + N*L1 | ||
7310 | fadd.x %fp2,%fp0 # fp0 is R, reduced arg. | ||
7311 | add.w &0x3FFF,%d1 # D0 is biased expo. of 2^M | ||
7312 | |||
7313 | #--Step 4. | ||
7314 | #--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL | ||
7315 | #-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6))))) | ||
7316 | #--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R | ||
7317 | #--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))] | ||
7318 | |||
7319 | fmov.x %fp0,%fp1 | ||
7320 | fmul.x %fp1,%fp1 # fp1 IS S = R*R | ||
7321 | |||
7322 | fmov.s &0x3950097B,%fp2 # fp2 IS a6 | ||
7323 | |||
7324 | fmul.x %fp1,%fp2 # fp2 IS S*A6 | ||
7325 | fmov.x %fp1,%fp3 | ||
7326 | fmul.s &0x3AB60B6A,%fp3 # fp3 IS S*A5 | ||
7327 | |||
7328 | fadd.d EM1A4(%pc),%fp2 # fp2 IS A4+S*A6 | ||
7329 | fadd.d EM1A3(%pc),%fp3 # fp3 IS A3+S*A5 | ||
7330 | mov.w %d1,SC(%a6) # SC is 2^(M) in extended | ||
7331 | mov.l &0x80000000,SC+4(%a6) | ||
7332 | clr.l SC+8(%a6) | ||
7333 | |||
7334 | fmul.x %fp1,%fp2 # fp2 IS S*(A4+S*A6) | ||
7335 | mov.l L_SCR1(%a6),%d1 # D0 is M | ||
7336 | neg.w %d1 # D0 is -M | ||
7337 | fmul.x %fp1,%fp3 # fp3 IS S*(A3+S*A5) | ||
7338 | add.w &0x3FFF,%d1 # biased expo. of 2^(-M) | ||
7339 | fadd.d EM1A2(%pc),%fp2 # fp2 IS A2+S*(A4+S*A6) | ||
7340 | fadd.s &0x3F000000,%fp3 # fp3 IS A1+S*(A3+S*A5) | ||
7341 | |||
7342 | fmul.x %fp1,%fp2 # fp2 IS S*(A2+S*(A4+S*A6)) | ||
7343 | or.w &0x8000,%d1 # signed/expo. of -2^(-M) | ||
7344 | mov.w %d1,ONEBYSC(%a6) # OnebySc is -2^(-M) | ||
7345 | mov.l &0x80000000,ONEBYSC+4(%a6) | ||
7346 | clr.l ONEBYSC+8(%a6) | ||
7347 | fmul.x %fp3,%fp1 # fp1 IS S*(A1+S*(A3+S*A5)) | ||
7348 | |||
7349 | fmul.x %fp0,%fp2 # fp2 IS R*S*(A2+S*(A4+S*A6)) | ||
7350 | fadd.x %fp1,%fp0 # fp0 IS R+S*(A1+S*(A3+S*A5)) | ||
7351 | |||
7352 | fadd.x %fp2,%fp0 # fp0 IS EXP(R)-1 | ||
7353 | |||
7354 | fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3} | ||
7355 | |||
7356 | #--Step 5 | ||
7357 | #--Compute 2^(J/64)*p | ||
7358 | |||
7359 | fmul.x (%a1),%fp0 # 2^(J/64)*(Exp(R)-1) | ||
7360 | |||
7361 | #--Step 6 | ||
7362 | #--Step 6.1 | ||
7363 | mov.l L_SCR1(%a6),%d1 # retrieve M | ||
7364 | cmp.l %d1,&63 | ||
7365 | ble.b MLE63 | ||
7366 | #--Step 6.2 M >= 64 | ||
7367 | fmov.s 12(%a1),%fp1 # fp1 is t | ||
7368 | fadd.x ONEBYSC(%a6),%fp1 # fp1 is t+OnebySc | ||
7369 | fadd.x %fp1,%fp0 # p+(t+OnebySc), fp1 released | ||
7370 | fadd.x (%a1),%fp0 # T+(p+(t+OnebySc)) | ||
7371 | bra EM1SCALE | ||
7372 | MLE63: | ||
7373 | #--Step 6.3 M <= 63 | ||
7374 | cmp.l %d1,&-3 | ||
7375 | bge.b MGEN3 | ||
7376 | MLTN3: | ||
7377 | #--Step 6.4 M <= -4 | ||
7378 | fadd.s 12(%a1),%fp0 # p+t | ||
7379 | fadd.x (%a1),%fp0 # T+(p+t) | ||
7380 | fadd.x ONEBYSC(%a6),%fp0 # OnebySc + (T+(p+t)) | ||
7381 | bra EM1SCALE | ||
7382 | MGEN3: | ||
7383 | #--Step 6.5 -3 <= M <= 63 | ||
7384 | fmov.x (%a1)+,%fp1 # fp1 is T | ||
7385 | fadd.s (%a1),%fp0 # fp0 is p+t | ||
7386 | fadd.x ONEBYSC(%a6),%fp1 # fp1 is T+OnebySc | ||
7387 | fadd.x %fp1,%fp0 # (T+OnebySc)+(p+t) | ||
7388 | |||
7389 | EM1SCALE: | ||
7390 | #--Step 6.6 | ||
7391 | fmov.l %d0,%fpcr | ||
7392 | fmul.x SC(%a6),%fp0 | ||
7393 | bra t_inx2 | ||
7394 | |||
7395 | EM1SM: | ||
7396 | #--Step 7 |X| < 1/4. | ||
7397 | cmp.l %d1,&0x3FBE0000 # 2^(-65) | ||
7398 | bge.b EM1POLY | ||
7399 | |||
7400 | EM1TINY: | ||
7401 | #--Step 8 |X| < 2^(-65) | ||
7402 | cmp.l %d1,&0x00330000 # 2^(-16312) | ||
7403 | blt.b EM12TINY | ||
7404 | #--Step 8.2 | ||
7405 | mov.l &0x80010000,SC(%a6) # SC is -2^(-16382) | ||
7406 | mov.l &0x80000000,SC+4(%a6) | ||
7407 | clr.l SC+8(%a6) | ||
7408 | fmov.x (%a0),%fp0 | ||
7409 | fmov.l %d0,%fpcr | ||
7410 | mov.b &FADD_OP,%d1 # last inst is ADD | ||
7411 | fadd.x SC(%a6),%fp0 | ||
7412 | bra t_catch | ||
7413 | |||
7414 | EM12TINY: | ||
7415 | #--Step 8.3 | ||
7416 | fmov.x (%a0),%fp0 | ||
7417 | fmul.d TWO140(%pc),%fp0 | ||
7418 | mov.l &0x80010000,SC(%a6) | ||
7419 | mov.l &0x80000000,SC+4(%a6) | ||
7420 | clr.l SC+8(%a6) | ||
7421 | fadd.x SC(%a6),%fp0 | ||
7422 | fmov.l %d0,%fpcr | ||
7423 | mov.b &FMUL_OP,%d1 # last inst is MUL | ||
7424 | fmul.d TWON140(%pc),%fp0 | ||
7425 | bra t_catch | ||
7426 | |||
7427 | EM1POLY: | ||
7428 | #--Step 9 exp(X)-1 by a simple polynomial | ||
7429 | fmov.x (%a0),%fp0 # fp0 is X | ||
7430 | fmul.x %fp0,%fp0 # fp0 is S := X*X | ||
7431 | fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3} | ||
7432 | fmov.s &0x2F30CAA8,%fp1 # fp1 is B12 | ||
7433 | fmul.x %fp0,%fp1 # fp1 is S*B12 | ||
7434 | fmov.s &0x310F8290,%fp2 # fp2 is B11 | ||
7435 | fadd.s &0x32D73220,%fp1 # fp1 is B10+S*B12 | ||
7436 | |||
7437 | fmul.x %fp0,%fp2 # fp2 is S*B11 | ||
7438 | fmul.x %fp0,%fp1 # fp1 is S*(B10 + ... | ||
7439 | |||
7440 | fadd.s &0x3493F281,%fp2 # fp2 is B9+S*... | ||
7441 | fadd.d EM1B8(%pc),%fp1 # fp1 is B8+S*... | ||
7442 | |||
7443 | fmul.x %fp0,%fp2 # fp2 is S*(B9+... | ||
7444 | fmul.x %fp0,%fp1 # fp1 is S*(B8+... | ||
7445 | |||
7446 | fadd.d EM1B7(%pc),%fp2 # fp2 is B7+S*... | ||
7447 | fadd.d EM1B6(%pc),%fp1 # fp1 is B6+S*... | ||
7448 | |||
7449 | fmul.x %fp0,%fp2 # fp2 is S*(B7+... | ||
7450 | fmul.x %fp0,%fp1 # fp1 is S*(B6+... | ||
7451 | |||
7452 | fadd.d EM1B5(%pc),%fp2 # fp2 is B5+S*... | ||
7453 | fadd.d EM1B4(%pc),%fp1 # fp1 is B4+S*... | ||
7454 | |||
7455 | fmul.x %fp0,%fp2 # fp2 is S*(B5+... | ||
7456 | fmul.x %fp0,%fp1 # fp1 is S*(B4+... | ||
7457 | |||
7458 | fadd.d EM1B3(%pc),%fp2 # fp2 is B3+S*... | ||
7459 | fadd.x EM1B2(%pc),%fp1 # fp1 is B2+S*... | ||
7460 | |||
7461 | fmul.x %fp0,%fp2 # fp2 is S*(B3+... | ||
7462 | fmul.x %fp0,%fp1 # fp1 is S*(B2+... | ||
7463 | |||
7464 | fmul.x %fp0,%fp2 # fp2 is S*S*(B3+...) | ||
7465 | fmul.x (%a0),%fp1 # fp1 is X*S*(B2... | ||
7466 | |||
7467 | fmul.s &0x3F000000,%fp0 # fp0 is S*B1 | ||
7468 | fadd.x %fp2,%fp1 # fp1 is Q | ||
7469 | |||
7470 | fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3} | ||
7471 | |||
7472 | fadd.x %fp1,%fp0 # fp0 is S*B1+Q | ||
7473 | |||
7474 | fmov.l %d0,%fpcr | ||
7475 | fadd.x (%a0),%fp0 | ||
7476 | bra t_inx2 | ||
7477 | |||
7478 | EM1BIG: | ||
7479 | #--Step 10 |X| > 70 log2 | ||
7480 | mov.l (%a0),%d1 | ||
7481 | cmp.l %d1,&0 | ||
7482 | bgt.w EXPC1 | ||
7483 | #--Step 10.2 | ||
7484 | fmov.s &0xBF800000,%fp0 # fp0 is -1 | ||
7485 | fmov.l %d0,%fpcr | ||
7486 | fadd.s &0x00800000,%fp0 # -1 + 2^(-126) | ||
7487 | bra t_minx2 | ||
7488 | |||
7489 | global setoxm1d | ||
7490 | setoxm1d: | ||
7491 | #--entry point for EXPM1(X), here X is denormalized | ||
7492 | #--Step 0. | ||
7493 | bra t_extdnrm | ||
7494 | |||
7495 | ######################################################################### | ||
7496 | # sgetexp(): returns the exponent portion of the input argument. # | ||
7497 | # The exponent bias is removed and the exponent value is # | ||
7498 | # returned as an extended precision number in fp0. # | ||
7499 | # sgetexpd(): handles denormalized numbers. # | ||
7500 | # # | ||
7501 | # sgetman(): extracts the mantissa of the input argument. The # | ||
7502 | # mantissa is converted to an extended precision number w/ # | ||
7503 | # an exponent of $3fff and is returned in fp0. The range of # | ||
7504 | # the result is [1.0 - 2.0). # | ||
7505 | # sgetmand(): handles denormalized numbers. # | ||
7506 | # # | ||
7507 | # INPUT *************************************************************** # | ||
7508 | # a0 = pointer to extended precision input # | ||
7509 | # # | ||
7510 | # OUTPUT ************************************************************** # | ||
7511 | # fp0 = exponent(X) or mantissa(X) # | ||
7512 | # # | ||
7513 | ######################################################################### | ||
7514 | |||
7515 | global sgetexp | ||
7516 | sgetexp: | ||
7517 | mov.w SRC_EX(%a0),%d0 # get the exponent | ||
7518 | bclr &0xf,%d0 # clear the sign bit | ||
7519 | subi.w &0x3fff,%d0 # subtract off the bias | ||
7520 | fmov.w %d0,%fp0 # return exp in fp0 | ||
7521 | blt.b sgetexpn # it's negative | ||
7522 | rts | ||
7523 | |||
7524 | sgetexpn: | ||
7525 | mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit | ||
7526 | rts | ||
7527 | |||
7528 | global sgetexpd | ||
7529 | sgetexpd: | ||
7530 | bsr.l norm # normalize | ||
7531 | neg.w %d0 # new exp = -(shft amt) | ||
7532 | subi.w &0x3fff,%d0 # subtract off the bias | ||
7533 | fmov.w %d0,%fp0 # return exp in fp0 | ||
7534 | mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit | ||
7535 | rts | ||
7536 | |||
7537 | global sgetman | ||
7538 | sgetman: | ||
7539 | mov.w SRC_EX(%a0),%d0 # get the exp | ||
7540 | ori.w &0x7fff,%d0 # clear old exp | ||
7541 | bclr &0xe,%d0 # make it the new exp +-3fff | ||
7542 | |||
7543 | # here, we build the result in a tmp location so as not to disturb the input | ||
7544 | mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy to tmp loc | ||
7545 | mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy to tmp loc | ||
7546 | mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent | ||
7547 | fmov.x FP_SCR0(%a6),%fp0 # put new value back in fp0 | ||
7548 | bmi.b sgetmann # it's negative | ||
7549 | rts | ||
7550 | |||
7551 | sgetmann: | ||
7552 | mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit | ||
7553 | rts | ||
7554 | |||
7555 | # | ||
7556 | # For denormalized numbers, shift the mantissa until the j-bit = 1, | ||
7557 | # then load the exponent with +/1 $3fff. | ||
7558 | # | ||
7559 | global sgetmand | ||
7560 | sgetmand: | ||
7561 | bsr.l norm # normalize exponent | ||
7562 | bra.b sgetman | ||
7563 | |||
7564 | ######################################################################### | ||
7565 | # scosh(): computes the hyperbolic cosine of a normalized input # | ||
7566 | # scoshd(): computes the hyperbolic cosine of a denormalized input # | ||
7567 | # # | ||
7568 | # INPUT *************************************************************** # | ||
7569 | # a0 = pointer to extended precision input # | ||
7570 | # d0 = round precision,mode # | ||
7571 | # # | ||
7572 | # OUTPUT ************************************************************** # | ||
7573 | # fp0 = cosh(X) # | ||
7574 | # # | ||
7575 | # ACCURACY and MONOTONICITY ******************************************* # | ||
7576 | # The returned result is within 3 ulps in 64 significant bit, # | ||
7577 | # i.e. within 0.5001 ulp to 53 bits if the result is subsequently # | ||
7578 | # rounded to double precision. The result is provably monotonic # | ||
7579 | # in double precision. # | ||
7580 | # # | ||
7581 | # ALGORITHM *********************************************************** # | ||
7582 | # # | ||
7583 | # COSH # | ||
7584 | # 1. If |X| > 16380 log2, go to 3. # | ||
7585 | # # | ||
7586 | # 2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae # | ||
7587 | # y = |X|, z = exp(Y), and # | ||
7588 | # cosh(X) = (1/2)*( z + 1/z ). # | ||
7589 | # Exit. # | ||
7590 | # # | ||
7591 | # 3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5. # | ||
7592 | # # | ||
7593 | # 4. (16380 log2 < |X| <= 16480 log2) # | ||
7594 | # cosh(X) = sign(X) * exp(|X|)/2. # | ||
7595 | # However, invoking exp(|X|) may cause premature # | ||
7596 | # overflow. Thus, we calculate sinh(X) as follows: # | ||
7597 | # Y := |X| # | ||
7598 | # Fact := 2**(16380) # | ||
7599 | # Y' := Y - 16381 log2 # | ||
7600 | # cosh(X) := Fact * exp(Y'). # | ||
7601 | # Exit. # | ||
7602 | # # | ||
7603 | # 5. (|X| > 16480 log2) sinh(X) must overflow. Return # | ||
7604 | # Huge*Huge to generate overflow and an infinity with # | ||
7605 | # the appropriate sign. Huge is the largest finite number # | ||
7606 | # in extended format. Exit. # | ||
7607 | # # | ||
7608 | ######################################################################### | ||
7609 | |||
7610 | TWO16380: | ||
7611 | long 0x7FFB0000,0x80000000,0x00000000,0x00000000 | ||
7612 | |||
7613 | global scosh | ||
7614 | scosh: | ||
7615 | fmov.x (%a0),%fp0 # LOAD INPUT | ||
7616 | |||
7617 | mov.l (%a0),%d1 | ||
7618 | mov.w 4(%a0),%d1 | ||
7619 | and.l &0x7FFFFFFF,%d1 | ||
7620 | cmp.l %d1,&0x400CB167 | ||
7621 | bgt.b COSHBIG | ||
7622 | |||
7623 | #--THIS IS THE USUAL CASE, |X| < 16380 LOG2 | ||
7624 | #--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) ) | ||
7625 | |||
7626 | fabs.x %fp0 # |X| | ||
7627 | |||
7628 | mov.l %d0,-(%sp) | ||
7629 | clr.l %d0 | ||
7630 | fmovm.x &0x01,-(%sp) # save |X| to stack | ||
7631 | lea (%sp),%a0 # pass ptr to |X| | ||
7632 | bsr setox # FP0 IS EXP(|X|) | ||
7633 | add.l &0xc,%sp # erase |X| from stack | ||
7634 | fmul.s &0x3F000000,%fp0 # (1/2)EXP(|X|) | ||
7635 | mov.l (%sp)+,%d0 | ||
7636 | |||
7637 | fmov.s &0x3E800000,%fp1 # (1/4) | ||
7638 | fdiv.x %fp0,%fp1 # 1/(2 EXP(|X|)) | ||
7639 | |||
7640 | fmov.l %d0,%fpcr | ||
7641 | mov.b &FADD_OP,%d1 # last inst is ADD | ||
7642 | fadd.x %fp1,%fp0 | ||
7643 | bra t_catch | ||
7644 | |||
7645 | COSHBIG: | ||
7646 | cmp.l %d1,&0x400CB2B3 | ||
7647 | bgt.b COSHHUGE | ||
7648 | |||
7649 | fabs.x %fp0 | ||
7650 | fsub.d T1(%pc),%fp0 # (|X|-16381LOG2_LEAD) | ||
7651 | fsub.d T2(%pc),%fp0 # |X| - 16381 LOG2, ACCURATE | ||
7652 | |||
7653 | mov.l %d0,-(%sp) | ||
7654 | clr.l %d0 | ||
7655 | fmovm.x &0x01,-(%sp) # save fp0 to stack | ||
7656 | lea (%sp),%a0 # pass ptr to fp0 | ||
7657 | bsr setox | ||
7658 | add.l &0xc,%sp # clear fp0 from stack | ||
7659 | mov.l (%sp)+,%d0 | ||
7660 | |||
7661 | fmov.l %d0,%fpcr | ||
7662 | mov.b &FMUL_OP,%d1 # last inst is MUL | ||
7663 | fmul.x TWO16380(%pc),%fp0 | ||
7664 | bra t_catch | ||
7665 | |||
7666 | COSHHUGE: | ||
7667 | bra t_ovfl2 | ||
7668 | |||
7669 | global scoshd | ||
7670 | #--COSH(X) = 1 FOR DENORMALIZED X | ||
7671 | scoshd: | ||
7672 | fmov.s &0x3F800000,%fp0 | ||
7673 | |||
7674 | fmov.l %d0,%fpcr | ||
7675 | fadd.s &0x00800000,%fp0 | ||
7676 | bra t_pinx2 | ||
7677 | |||
7678 | ######################################################################### | ||
7679 | # ssinh(): computes the hyperbolic sine of a normalized input # | ||
7680 | # ssinhd(): computes the hyperbolic sine of a denormalized input # | ||
7681 | # # | ||
7682 | # INPUT *************************************************************** # | ||
7683 | # a0 = pointer to extended precision input # | ||
7684 | # d0 = round precision,mode # | ||
7685 | # # | ||
7686 | # OUTPUT ************************************************************** # | ||
7687 | # fp0 = sinh(X) # | ||
7688 | # # | ||
7689 | # ACCURACY and MONOTONICITY ******************************************* # | ||
7690 | # The returned result is within 3 ulps in 64 significant bit, # | ||
7691 | # i.e. within 0.5001 ulp to 53 bits if the result is subsequently # | ||
7692 | # rounded to double precision. The result is provably monotonic # | ||
7693 | # in double precision. # | ||
7694 | # # | ||
7695 | # ALGORITHM *********************************************************** # | ||
7696 | # # | ||
7697 | # SINH # | ||
7698 | # 1. If |X| > 16380 log2, go to 3. # | ||
7699 | # # | ||
7700 | # 2. (|X| <= 16380 log2) Sinh(X) is obtained by the formula # | ||
7701 | # y = |X|, sgn = sign(X), and z = expm1(Y), # | ||
7702 | # sinh(X) = sgn*(1/2)*( z + z/(1+z) ). # | ||
7703 | # Exit. # | ||
7704 | # # | ||
7705 | # 3. If |X| > 16480 log2, go to 5. # | ||
7706 | # # | ||
7707 | # 4. (16380 log2 < |X| <= 16480 log2) # | ||
7708 | # sinh(X) = sign(X) * exp(|X|)/2. # | ||
7709 | # However, invoking exp(|X|) may cause premature overflow. # | ||
7710 | # Thus, we calculate sinh(X) as follows: # | ||
7711 | # Y := |X| # | ||
7712 | # sgn := sign(X) # | ||
7713 | # sgnFact := sgn * 2**(16380) # | ||
7714 | # Y' := Y - 16381 log2 # | ||
7715 | # sinh(X) := sgnFact * exp(Y'). # | ||
7716 | # Exit. # | ||
7717 | # # | ||
7718 | # 5. (|X| > 16480 log2) sinh(X) must overflow. Return # | ||
7719 | # sign(X)*Huge*Huge to generate overflow and an infinity with # | ||
7720 | # the appropriate sign. Huge is the largest finite number in # | ||
7721 | # extended format. Exit. # | ||
7722 | # # | ||
7723 | ######################################################################### | ||
7724 | |||
7725 | global ssinh | ||
7726 | ssinh: | ||
7727 | fmov.x (%a0),%fp0 # LOAD INPUT | ||
7728 | |||
7729 | mov.l (%a0),%d1 | ||
7730 | mov.w 4(%a0),%d1 | ||
7731 | mov.l %d1,%a1 # save (compacted) operand | ||
7732 | and.l &0x7FFFFFFF,%d1 | ||
7733 | cmp.l %d1,&0x400CB167 | ||
7734 | bgt.b SINHBIG | ||
7735 | |||
7736 | #--THIS IS THE USUAL CASE, |X| < 16380 LOG2 | ||
7737 | #--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) ) | ||
7738 | |||
7739 | fabs.x %fp0 # Y = |X| | ||
7740 | |||
7741 | movm.l &0x8040,-(%sp) # {a1/d0} | ||
7742 | fmovm.x &0x01,-(%sp) # save Y on stack | ||
7743 | lea (%sp),%a0 # pass ptr to Y | ||
7744 | clr.l %d0 | ||
7745 | bsr setoxm1 # FP0 IS Z = EXPM1(Y) | ||
7746 | add.l &0xc,%sp # clear Y from stack | ||
7747 | fmov.l &0,%fpcr | ||
7748 | movm.l (%sp)+,&0x0201 # {a1/d0} | ||
7749 | |||
7750 | fmov.x %fp0,%fp1 | ||
7751 | fadd.s &0x3F800000,%fp1 # 1+Z | ||
7752 | fmov.x %fp0,-(%sp) | ||
7753 | fdiv.x %fp1,%fp0 # Z/(1+Z) | ||
7754 | mov.l %a1,%d1 | ||
7755 | and.l &0x80000000,%d1 | ||
7756 | or.l &0x3F000000,%d1 | ||
7757 | fadd.x (%sp)+,%fp0 | ||
7758 | mov.l %d1,-(%sp) | ||
7759 | |||
7760 | fmov.l %d0,%fpcr | ||
7761 | mov.b &FMUL_OP,%d1 # last inst is MUL | ||
7762 | fmul.s (%sp)+,%fp0 # last fp inst - possible exceptions set | ||
7763 | bra t_catch | ||
7764 | |||
7765 | SINHBIG: | ||
7766 | cmp.l %d1,&0x400CB2B3 | ||
7767 | bgt t_ovfl | ||
7768 | fabs.x %fp0 | ||
7769 | fsub.d T1(%pc),%fp0 # (|X|-16381LOG2_LEAD) | ||
7770 | mov.l &0,-(%sp) | ||
7771 | mov.l &0x80000000,-(%sp) | ||
7772 | mov.l %a1,%d1 | ||
7773 | and.l &0x80000000,%d1 | ||
7774 | or.l &0x7FFB0000,%d1 | ||
7775 | mov.l %d1,-(%sp) # EXTENDED FMT | ||
7776 | fsub.d T2(%pc),%fp0 # |X| - 16381 LOG2, ACCURATE | ||
7777 | |||
7778 | mov.l %d0,-(%sp) | ||
7779 | clr.l %d0 | ||
7780 | fmovm.x &0x01,-(%sp) # save fp0 on stack | ||
7781 | lea (%sp),%a0 # pass ptr to fp0 | ||
7782 | bsr setox | ||
7783 | add.l &0xc,%sp # clear fp0 from stack | ||
7784 | |||
7785 | mov.l (%sp)+,%d0 | ||
7786 | fmov.l %d0,%fpcr | ||
7787 | mov.b &FMUL_OP,%d1 # last inst is MUL | ||
7788 | fmul.x (%sp)+,%fp0 # possible exception | ||
7789 | bra t_catch | ||
7790 | |||
7791 | global ssinhd | ||
7792 | #--SINH(X) = X FOR DENORMALIZED X | ||
7793 | ssinhd: | ||
7794 | bra t_extdnrm | ||
7795 | |||
7796 | ######################################################################### | ||
7797 | # stanh(): computes the hyperbolic tangent of a normalized input # | ||
7798 | # stanhd(): computes the hyperbolic tangent of a denormalized input # | ||
7799 | # # | ||
7800 | # INPUT *************************************************************** # | ||
7801 | # a0 = pointer to extended precision input # | ||
7802 | # d0 = round precision,mode # | ||
7803 | # # | ||
7804 | # OUTPUT ************************************************************** # | ||
7805 | # fp0 = tanh(X) # | ||
7806 | # # | ||
7807 | # ACCURACY and MONOTONICITY ******************************************* # | ||
7808 | # The returned result is within 3 ulps in 64 significant bit, # | ||
7809 | # i.e. within 0.5001 ulp to 53 bits if the result is subsequently # | ||
7810 | # rounded to double precision. The result is provably monotonic # | ||
7811 | # in double precision. # | ||
7812 | # # | ||
7813 | # ALGORITHM *********************************************************** # | ||
7814 | # # | ||
7815 | # TANH # | ||
7816 | # 1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3. # | ||
7817 | # # | ||
7818 | # 2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by # | ||
7819 | # sgn := sign(X), y := 2|X|, z := expm1(Y), and # | ||
7820 | # tanh(X) = sgn*( z/(2+z) ). # | ||
7821 | # Exit. # | ||
7822 | # # | ||
7823 | # 3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1, # | ||
7824 | # go to 7. # | ||
7825 | # # | ||
7826 | # 4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6. # | ||
7827 | # # | ||
7828 | # 5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by # | ||
7829 | # sgn := sign(X), y := 2|X|, z := exp(Y), # | ||
7830 | # tanh(X) = sgn - [ sgn*2/(1+z) ]. # | ||
7831 | # Exit. # | ||
7832 | # # | ||
7833 | # 6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we # | ||
7834 | # calculate Tanh(X) by # | ||
7835 | # sgn := sign(X), Tiny := 2**(-126), # | ||
7836 | # tanh(X) := sgn - sgn*Tiny. # | ||
7837 | # Exit. # | ||
7838 | # # | ||
7839 | # 7. (|X| < 2**(-40)). Tanh(X) = X. Exit. # | ||
7840 | # # | ||
7841 | ######################################################################### | ||
7842 | |||
7843 | set X,FP_SCR0 | ||
7844 | set XFRAC,X+4 | ||
7845 | |||
7846 | set SGN,L_SCR3 | ||
7847 | |||
7848 | set V,FP_SCR0 | ||
7849 | |||
7850 | global stanh | ||
7851 | stanh: | ||
7852 | fmov.x (%a0),%fp0 # LOAD INPUT | ||
7853 | |||
7854 | fmov.x %fp0,X(%a6) | ||
7855 | mov.l (%a0),%d1 | ||
7856 | mov.w 4(%a0),%d1 | ||
7857 | mov.l %d1,X(%a6) | ||
7858 | and.l &0x7FFFFFFF,%d1 | ||
7859 | cmp.l %d1, &0x3fd78000 # is |X| < 2^(-40)? | ||
7860 | blt.w TANHBORS # yes | ||
7861 | cmp.l %d1, &0x3fffddce # is |X| > (5/2)LOG2? | ||
7862 | bgt.w TANHBORS # yes | ||
7863 | |||
7864 | #--THIS IS THE USUAL CASE | ||
7865 | #--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2). | ||
7866 | |||
7867 | mov.l X(%a6),%d1 | ||
7868 | mov.l %d1,SGN(%a6) | ||
7869 | and.l &0x7FFF0000,%d1 | ||
7870 | add.l &0x00010000,%d1 # EXPONENT OF 2|X| | ||
7871 | mov.l %d1,X(%a6) | ||
7872 | and.l &0x80000000,SGN(%a6) | ||
7873 | fmov.x X(%a6),%fp0 # FP0 IS Y = 2|X| | ||
7874 | |||
7875 | mov.l %d0,-(%sp) | ||
7876 | clr.l %d0 | ||
7877 | fmovm.x &0x1,-(%sp) # save Y on stack | ||
7878 | lea (%sp),%a0 # pass ptr to Y | ||
7879 | bsr setoxm1 # FP0 IS Z = EXPM1(Y) | ||
7880 | add.l &0xc,%sp # clear Y from stack | ||
7881 | mov.l (%sp)+,%d0 | ||
7882 | |||
7883 | fmov.x %fp0,%fp1 | ||
7884 | fadd.s &0x40000000,%fp1 # Z+2 | ||
7885 | mov.l SGN(%a6),%d1 | ||
7886 | fmov.x %fp1,V(%a6) | ||
7887 | eor.l %d1,V(%a6) | ||
7888 | |||
7889 | fmov.l %d0,%fpcr # restore users round prec,mode | ||
7890 | fdiv.x V(%a6),%fp0 | ||
7891 | bra t_inx2 | ||
7892 | |||
7893 | TANHBORS: | ||
7894 | cmp.l %d1,&0x3FFF8000 | ||
7895 | blt.w TANHSM | ||
7896 | |||
7897 | cmp.l %d1,&0x40048AA1 | ||
7898 | bgt.w TANHHUGE | ||
7899 | |||
7900 | #-- (5/2) LOG2 < |X| < 50 LOG2, | ||
7901 | #--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X), | ||
7902 | #--TANH(X) = SGN - SGN*2/[EXP(Y)+1]. | ||
7903 | |||
7904 | mov.l X(%a6),%d1 | ||
7905 | mov.l %d1,SGN(%a6) | ||
7906 | and.l &0x7FFF0000,%d1 | ||
7907 | add.l &0x00010000,%d1 # EXPO OF 2|X| | ||
7908 | mov.l %d1,X(%a6) # Y = 2|X| | ||
7909 | and.l &0x80000000,SGN(%a6) | ||
7910 | mov.l SGN(%a6),%d1 | ||
7911 | fmov.x X(%a6),%fp0 # Y = 2|X| | ||
7912 | |||
7913 | mov.l %d0,-(%sp) | ||
7914 | clr.l %d0 | ||
7915 | fmovm.x &0x01,-(%sp) # save Y on stack | ||
7916 | lea (%sp),%a0 # pass ptr to Y | ||
7917 | bsr setox # FP0 IS EXP(Y) | ||
7918 | add.l &0xc,%sp # clear Y from stack | ||
7919 | mov.l (%sp)+,%d0 | ||
7920 | mov.l SGN(%a6),%d1 | ||
7921 | fadd.s &0x3F800000,%fp0 # EXP(Y)+1 | ||
7922 | |||
7923 | eor.l &0xC0000000,%d1 # -SIGN(X)*2 | ||
7924 | fmov.s %d1,%fp1 # -SIGN(X)*2 IN SGL FMT | ||
7925 | fdiv.x %fp0,%fp1 # -SIGN(X)2 / [EXP(Y)+1 ] | ||
7926 | |||
7927 | mov.l SGN(%a6),%d1 | ||
7928 | or.l &0x3F800000,%d1 # SGN | ||
7929 | fmov.s %d1,%fp0 # SGN IN SGL FMT | ||
7930 | |||
7931 | fmov.l %d0,%fpcr # restore users round prec,mode | ||
7932 | mov.b &FADD_OP,%d1 # last inst is ADD | ||
7933 | fadd.x %fp1,%fp0 | ||
7934 | bra t_inx2 | ||
7935 | |||
7936 | TANHSM: | ||
7937 | fmov.l %d0,%fpcr # restore users round prec,mode | ||
7938 | mov.b &FMOV_OP,%d1 # last inst is MOVE | ||
7939 | fmov.x X(%a6),%fp0 # last inst - possible exception set | ||
7940 | bra t_catch | ||
7941 | |||
7942 | #---RETURN SGN(X) - SGN(X)EPS | ||
7943 | TANHHUGE: | ||
7944 | mov.l X(%a6),%d1 | ||
7945 | and.l &0x80000000,%d1 | ||
7946 | or.l &0x3F800000,%d1 | ||
7947 | fmov.s %d1,%fp0 | ||
7948 | and.l &0x80000000,%d1 | ||
7949 | eor.l &0x80800000,%d1 # -SIGN(X)*EPS | ||
7950 | |||
7951 | fmov.l %d0,%fpcr # restore users round prec,mode | ||
7952 | fadd.s %d1,%fp0 | ||
7953 | bra t_inx2 | ||
7954 | |||
7955 | global stanhd | ||
7956 | #--TANH(X) = X FOR DENORMALIZED X | ||
7957 | stanhd: | ||
7958 | bra t_extdnrm | ||
7959 | |||
7960 | ######################################################################### | ||
7961 | # slogn(): computes the natural logarithm of a normalized input # | ||
7962 | # slognd(): computes the natural logarithm of a denormalized input # | ||
7963 | # slognp1(): computes the log(1+X) of a normalized input # | ||
7964 | # slognp1d(): computes the log(1+X) of a denormalized input # | ||
7965 | # # | ||
7966 | # INPUT *************************************************************** # | ||
7967 | # a0 = pointer to extended precision input # | ||
7968 | # d0 = round precision,mode # | ||
7969 | # # | ||
7970 | # OUTPUT ************************************************************** # | ||
7971 | # fp0 = log(X) or log(1+X) # | ||
7972 | # # | ||
7973 | # ACCURACY and MONOTONICITY ******************************************* # | ||
7974 | # The returned result is within 2 ulps in 64 significant bit, # | ||
7975 | # i.e. within 0.5001 ulp to 53 bits if the result is subsequently # | ||
7976 | # rounded to double precision. The result is provably monotonic # | ||
7977 | # in double precision. # | ||
7978 | # # | ||
7979 | # ALGORITHM *********************************************************** # | ||
7980 | # LOGN: # | ||
7981 | # Step 1. If |X-1| < 1/16, approximate log(X) by an odd # | ||
7982 | # polynomial in u, where u = 2(X-1)/(X+1). Otherwise, # | ||
7983 | # move on to Step 2. # | ||
7984 | # # | ||
7985 | # Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first # | ||
7986 | # seven significant bits of Y plus 2**(-7), i.e. # | ||
7987 | # F = 1.xxxxxx1 in base 2 where the six "x" match those # | ||
7988 | # of Y. Note that |Y-F| <= 2**(-7). # | ||
7989 | # # | ||
7990 | # Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a # | ||
7991 | # polynomial in u, log(1+u) = poly. # | ||
7992 | # # | ||
7993 | # Step 4. Reconstruct # | ||
7994 | # log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u) # | ||
7995 | # by k*log(2) + (log(F) + poly). The values of log(F) are # | ||
7996 | # calculated beforehand and stored in the program. # | ||
7997 | # # | ||
7998 | # lognp1: # | ||
7999 | # Step 1: If |X| < 1/16, approximate log(1+X) by an odd # | ||
8000 | # polynomial in u where u = 2X/(2+X). Otherwise, move on # | ||
8001 | # to Step 2. # | ||
8002 | # # | ||
8003 | # Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done # | ||
8004 | # in Step 2 of the algorithm for LOGN and compute # | ||
8005 | # log(1+X) as k*log(2) + log(F) + poly where poly # | ||
8006 | # approximates log(1+u), u = (Y-F)/F. # | ||
8007 | # # | ||
8008 | # Implementation Notes: # | ||
8009 | # Note 1. There are 64 different possible values for F, thus 64 # | ||
8010 | # log(F)'s need to be tabulated. Moreover, the values of # | ||
8011 | # 1/F are also tabulated so that the division in (Y-F)/F # | ||
8012 | # can be performed by a multiplication. # | ||
8013 | # # | ||
8014 | # Note 2. In Step 2 of lognp1, in order to preserved accuracy, # | ||
8015 | # the value Y-F has to be calculated carefully when # | ||
8016 | # 1/2 <= X < 3/2. # | ||
8017 | # # | ||
8018 | # Note 3. To fully exploit the pipeline, polynomials are usually # | ||
8019 | # separated into two parts evaluated independently before # | ||
8020 | # being added up. # | ||
8021 | # # | ||
8022 | ######################################################################### | ||
8023 | LOGOF2: | ||
8024 | long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000 | ||
8025 | |||
8026 | one: | ||
8027 | long 0x3F800000 | ||
8028 | zero: | ||
8029 | long 0x00000000 | ||
8030 | infty: | ||
8031 | long 0x7F800000 | ||
8032 | negone: | ||
8033 | long 0xBF800000 | ||
8034 | |||
8035 | LOGA6: | ||
8036 | long 0x3FC2499A,0xB5E4040B | ||
8037 | LOGA5: | ||
8038 | long 0xBFC555B5,0x848CB7DB | ||
8039 | |||
8040 | LOGA4: | ||
8041 | long 0x3FC99999,0x987D8730 | ||
8042 | LOGA3: | ||
8043 | long 0xBFCFFFFF,0xFF6F7E97 | ||
8044 | |||
8045 | LOGA2: | ||
8046 | long 0x3FD55555,0x555555A4 | ||
8047 | LOGA1: | ||
8048 | long 0xBFE00000,0x00000008 | ||
8049 | |||
8050 | LOGB5: | ||
8051 | long 0x3F175496,0xADD7DAD6 | ||
8052 | LOGB4: | ||
8053 | long 0x3F3C71C2,0xFE80C7E0 | ||
8054 | |||
8055 | LOGB3: | ||
8056 | long 0x3F624924,0x928BCCFF | ||
8057 | LOGB2: | ||
8058 | long 0x3F899999,0x999995EC | ||
8059 | |||
8060 | LOGB1: | ||
8061 | long 0x3FB55555,0x55555555 | ||
8062 | TWO: | ||
8063 | long 0x40000000,0x00000000 | ||
8064 | |||
8065 | LTHOLD: | ||
8066 | long 0x3f990000,0x80000000,0x00000000,0x00000000 | ||
8067 | |||
8068 | LOGTBL: | ||
8069 | long 0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000 | ||
8070 | long 0x3FF70000,0xFF015358,0x833C47E2,0x00000000 | ||
8071 | long 0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000 | ||
8072 | long 0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000 | ||
8073 | long 0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000 | ||
8074 | long 0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000 | ||
8075 | long 0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000 | ||
8076 | long 0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000 | ||
8077 | long 0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000 | ||
8078 | long 0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000 | ||
8079 | long 0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000 | ||
8080 | long 0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000 | ||
8081 | long 0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000 | ||
8082 | long 0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000 | ||
8083 | long 0x3FFE0000,0xE525982A,0xF70C880E,0x00000000 | ||
8084 | long 0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000 | ||
8085 | long 0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000 | ||
8086 | long 0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000 | ||
8087 | long 0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000 | ||
8088 | long 0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000 | ||
8089 | long 0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000 | ||
8090 | long 0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000 | ||
8091 | long 0x3FFE0000,0xD901B203,0x6406C80E,0x00000000 | ||
8092 | long 0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000 | ||
8093 | long 0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000 | ||
8094 | long 0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000 | ||
8095 | long 0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000 | ||
8096 | long 0x3FFC0000,0xC3FD0329,0x06488481,0x00000000 | ||
8097 | long 0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000 | ||
8098 | long 0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000 | ||
8099 | long 0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000 | ||
8100 | long 0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000 | ||
8101 | long 0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000 | ||
8102 | long 0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000 | ||
8103 | long 0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000 | ||
8104 | long 0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000 | ||
8105 | long 0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000 | ||
8106 | long 0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000 | ||
8107 | long 0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000 | ||
8108 | long 0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000 | ||
8109 | long 0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000 | ||
8110 | long 0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000 | ||
8111 | long 0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000 | ||
8112 | long 0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000 | ||
8113 | long 0x3FFE0000,0xBD691047,0x07661AA3,0x00000000 | ||
8114 | long 0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000 | ||
8115 | long 0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000 | ||
8116 | long 0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000 | ||
8117 | long 0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000 | ||
8118 | long 0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000 | ||
8119 | long 0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000 | ||
8120 | long 0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000 | ||
8121 | long 0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000 | ||
8122 | long 0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000 | ||
8123 | long 0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000 | ||
8124 | long 0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000 | ||
8125 | long 0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000 | ||
8126 | long 0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000 | ||
8127 | long 0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000 | ||
8128 | long 0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000 | ||
8129 | long 0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000 | ||
8130 | long 0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000 | ||
8131 | long 0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000 | ||
8132 | long 0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000 | ||
8133 | long 0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000 | ||
8134 | long 0x3FFD0000,0xD2420487,0x2DD85160,0x00000000 | ||
8135 | long 0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000 | ||
8136 | long 0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000 | ||
8137 | long 0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000 | ||
8138 | long 0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000 | ||
8139 | long 0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000 | ||
8140 | long 0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000 | ||
8141 | long 0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000 | ||
8142 | long 0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000 | ||
8143 | long 0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000 | ||
8144 | long 0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000 | ||
8145 | long 0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000 | ||
8146 | long 0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000 | ||
8147 | long 0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000 | ||
8148 | long 0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000 | ||
8149 | long 0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000 | ||
8150 | long 0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000 | ||
8151 | long 0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000 | ||
8152 | long 0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000 | ||
8153 | long 0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000 | ||
8154 | long 0x3FFE0000,0x825EFCED,0x49369330,0x00000000 | ||
8155 | long 0x3FFE0000,0x9868C809,0x868C8098,0x00000000 | ||
8156 | long 0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000 | ||
8157 | long 0x3FFE0000,0x97012E02,0x5C04B809,0x00000000 | ||
8158 | long 0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000 | ||
8159 | long 0x3FFE0000,0x95A02568,0x095A0257,0x00000000 | ||
8160 | long 0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000 | ||
8161 | long 0x3FFE0000,0x94458094,0x45809446,0x00000000 | ||
8162 | long 0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000 | ||
8163 | long 0x3FFE0000,0x92F11384,0x0497889C,0x00000000 | ||
8164 | long 0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000 | ||
8165 | long 0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000 | ||
8166 | long 0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000 | ||
8167 | long 0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000 | ||
8168 | long 0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000 | ||
8169 | long 0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000 | ||
8170 | long 0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000 | ||
8171 | long 0x3FFE0000,0x8DDA5202,0x37694809,0x00000000 | ||
8172 | long 0x3FFE0000,0x9723A1B7,0x20134203,0x00000000 | ||
8173 | long 0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000 | ||
8174 | long 0x3FFE0000,0x995899C8,0x90EB8990,0x00000000 | ||
8175 | long 0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000 | ||
8176 | long 0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000 | ||
8177 | long 0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000 | ||
8178 | long 0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000 | ||
8179 | long 0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000 | ||
8180 | long 0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000 | ||
8181 | long 0x3FFE0000,0x87F78087,0xF78087F8,0x00000000 | ||
8182 | long 0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000 | ||
8183 | long 0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000 | ||
8184 | long 0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000 | ||
8185 | long 0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000 | ||
8186 | long 0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000 | ||
8187 | long 0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000 | ||
8188 | long 0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000 | ||
8189 | long 0x3FFE0000,0x83993052,0x3FBE3368,0x00000000 | ||
8190 | long 0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000 | ||
8191 | long 0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000 | ||
8192 | long 0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000 | ||
8193 | long 0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000 | ||
8194 | long 0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000 | ||
8195 | long 0x3FFE0000,0x80808080,0x80808081,0x00000000 | ||
8196 | long 0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000 | ||
8197 | |||
8198 | set ADJK,L_SCR1 | ||
8199 | |||
8200 | set X,FP_SCR0 | ||
8201 | set XDCARE,X+2 | ||
8202 | set XFRAC,X+4 | ||
8203 | |||
8204 | set F,FP_SCR1 | ||
8205 | set FFRAC,F+4 | ||
8206 | |||
8207 | set KLOG2,FP_SCR0 | ||
8208 | |||
8209 | set SAVEU,FP_SCR0 | ||
8210 | |||
8211 | global slogn | ||
8212 | #--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S | ||
8213 | slogn: | ||
8214 | fmov.x (%a0),%fp0 # LOAD INPUT | ||
8215 | mov.l &0x00000000,ADJK(%a6) | ||
8216 | |||
8217 | LOGBGN: | ||
8218 | #--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS | ||
8219 | #--A FINITE, NON-ZERO, NORMALIZED NUMBER. | ||
8220 | |||
8221 | mov.l (%a0),%d1 | ||
8222 | mov.w 4(%a0),%d1 | ||
8223 | |||
8224 | mov.l (%a0),X(%a6) | ||
8225 | mov.l 4(%a0),X+4(%a6) | ||
8226 | mov.l 8(%a0),X+8(%a6) | ||
8227 | |||
8228 | cmp.l %d1,&0 # CHECK IF X IS NEGATIVE | ||
8229 | blt.w LOGNEG # LOG OF NEGATIVE ARGUMENT IS INVALID | ||
8230 | # X IS POSITIVE, CHECK IF X IS NEAR 1 | ||
8231 | cmp.l %d1,&0x3ffef07d # IS X < 15/16? | ||
8232 | blt.b LOGMAIN # YES | ||
8233 | cmp.l %d1,&0x3fff8841 # IS X > 17/16? | ||
8234 | ble.w LOGNEAR1 # NO | ||
8235 | |||
8236 | LOGMAIN: | ||
8237 | #--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1 | ||
8238 | |||
8239 | #--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY. | ||
8240 | #--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1. | ||
8241 | #--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y) | ||
8242 | #-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F). | ||
8243 | #--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING | ||
8244 | #--LOG(1+U) CAN BE VERY EFFICIENT. | ||
8245 | #--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO | ||
8246 | #--DIVISION IS NEEDED TO CALCULATE (Y-F)/F. | ||
8247 | |||
8248 | #--GET K, Y, F, AND ADDRESS OF 1/F. | ||
8249 | asr.l &8,%d1 | ||
8250 | asr.l &8,%d1 # SHIFTED 16 BITS, BIASED EXPO. OF X | ||
8251 | sub.l &0x3FFF,%d1 # THIS IS K | ||
8252 | add.l ADJK(%a6),%d1 # ADJUST K, ORIGINAL INPUT MAY BE DENORM. | ||
8253 | lea LOGTBL(%pc),%a0 # BASE ADDRESS OF 1/F AND LOG(F) | ||
8254 | fmov.l %d1,%fp1 # CONVERT K TO FLOATING-POINT FORMAT | ||
8255 | |||
8256 | #--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F | ||
8257 | mov.l &0x3FFF0000,X(%a6) # X IS NOW Y, I.E. 2^(-K)*X | ||
8258 | mov.l XFRAC(%a6),FFRAC(%a6) | ||
8259 | and.l &0xFE000000,FFRAC(%a6) # FIRST 7 BITS OF Y | ||
8260 | or.l &0x01000000,FFRAC(%a6) # GET F: ATTACH A 1 AT THE EIGHTH BIT | ||
8261 | mov.l FFRAC(%a6),%d1 # READY TO GET ADDRESS OF 1/F | ||
8262 | and.l &0x7E000000,%d1 | ||
8263 | asr.l &8,%d1 | ||
8264 | asr.l &8,%d1 | ||
8265 | asr.l &4,%d1 # SHIFTED 20, D0 IS THE DISPLACEMENT | ||
8266 | add.l %d1,%a0 # A0 IS THE ADDRESS FOR 1/F | ||
8267 | |||
8268 | fmov.x X(%a6),%fp0 | ||
8269 | mov.l &0x3fff0000,F(%a6) | ||
8270 | clr.l F+8(%a6) | ||
8271 | fsub.x F(%a6),%fp0 # Y-F | ||
8272 | fmovm.x &0xc,-(%sp) # SAVE FP2-3 WHILE FP0 IS NOT READY | ||
8273 | #--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K | ||
8274 | #--REGISTERS SAVED: FPCR, FP1, FP2 | ||
8275 | |||
8276 | LP1CONT1: | ||
8277 | #--AN RE-ENTRY POINT FOR LOGNP1 | ||
8278 | fmul.x (%a0),%fp0 # FP0 IS U = (Y-F)/F | ||
8279 | fmul.x LOGOF2(%pc),%fp1 # GET K*LOG2 WHILE FP0 IS NOT READY | ||
8280 | fmov.x %fp0,%fp2 | ||
8281 | fmul.x %fp2,%fp2 # FP2 IS V=U*U | ||
8282 | fmov.x %fp1,KLOG2(%a6) # PUT K*LOG2 IN MEMEORY, FREE FP1 | ||
8283 | |||
8284 | #--LOG(1+U) IS APPROXIMATED BY | ||
8285 | #--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS | ||
8286 | #--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))] | ||
8287 | |||
8288 | fmov.x %fp2,%fp3 | ||
8289 | fmov.x %fp2,%fp1 | ||
8290 | |||
8291 | fmul.d LOGA6(%pc),%fp1 # V*A6 | ||
8292 | fmul.d LOGA5(%pc),%fp2 # V*A5 | ||
8293 | |||
8294 | fadd.d LOGA4(%pc),%fp1 # A4+V*A6 | ||
8295 | fadd.d LOGA3(%pc),%fp2 # A3+V*A5 | ||
8296 | |||
8297 | fmul.x %fp3,%fp1 # V*(A4+V*A6) | ||
8298 | fmul.x %fp3,%fp2 # V*(A3+V*A5) | ||
8299 | |||
8300 | fadd.d LOGA2(%pc),%fp1 # A2+V*(A4+V*A6) | ||
8301 | fadd.d LOGA1(%pc),%fp2 # A1+V*(A3+V*A5) | ||
8302 | |||
8303 | fmul.x %fp3,%fp1 # V*(A2+V*(A4+V*A6)) | ||
8304 | add.l &16,%a0 # ADDRESS OF LOG(F) | ||
8305 | fmul.x %fp3,%fp2 # V*(A1+V*(A3+V*A5)) | ||
8306 | |||
8307 | fmul.x %fp0,%fp1 # U*V*(A2+V*(A4+V*A6)) | ||
8308 | fadd.x %fp2,%fp0 # U+V*(A1+V*(A3+V*A5)) | ||
8309 | |||
8310 | fadd.x (%a0),%fp1 # LOG(F)+U*V*(A2+V*(A4+V*A6)) | ||
8311 | fmovm.x (%sp)+,&0x30 # RESTORE FP2-3 | ||
8312 | fadd.x %fp1,%fp0 # FP0 IS LOG(F) + LOG(1+U) | ||
8313 | |||
8314 | fmov.l %d0,%fpcr | ||
8315 | fadd.x KLOG2(%a6),%fp0 # FINAL ADD | ||
8316 | bra t_inx2 | ||
8317 | |||
8318 | |||
8319 | LOGNEAR1: | ||
8320 | |||
8321 | # if the input is exactly equal to one, then exit through ld_pzero. | ||
8322 | # if these 2 lines weren't here, the correct answer would be returned | ||
8323 | # but the INEX2 bit would be set. | ||
8324 | fcmp.b %fp0,&0x1 # is it equal to one? | ||
8325 | fbeq.l ld_pzero # yes | ||
8326 | |||
8327 | #--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT. | ||
8328 | fmov.x %fp0,%fp1 | ||
8329 | fsub.s one(%pc),%fp1 # FP1 IS X-1 | ||
8330 | fadd.s one(%pc),%fp0 # FP0 IS X+1 | ||
8331 | fadd.x %fp1,%fp1 # FP1 IS 2(X-1) | ||
8332 | #--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL | ||
8333 | #--IN U, U = 2(X-1)/(X+1) = FP1/FP0 | ||
8334 | |||
8335 | LP1CONT2: | ||
8336 | #--THIS IS AN RE-ENTRY POINT FOR LOGNP1 | ||
8337 | fdiv.x %fp0,%fp1 # FP1 IS U | ||
8338 | fmovm.x &0xc,-(%sp) # SAVE FP2-3 | ||
8339 | #--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3 | ||
8340 | #--LET V=U*U, W=V*V, CALCULATE | ||
8341 | #--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY | ||
8342 | #--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] ) | ||
8343 | fmov.x %fp1,%fp0 | ||
8344 | fmul.x %fp0,%fp0 # FP0 IS V | ||
8345 | fmov.x %fp1,SAVEU(%a6) # STORE U IN MEMORY, FREE FP1 | ||
8346 | fmov.x %fp0,%fp1 | ||
8347 | fmul.x %fp1,%fp1 # FP1 IS W | ||
8348 | |||
8349 | fmov.d LOGB5(%pc),%fp3 | ||
8350 | fmov.d LOGB4(%pc),%fp2 | ||
8351 | |||
8352 | fmul.x %fp1,%fp3 # W*B5 | ||
8353 | fmul.x %fp1,%fp2 # W*B4 | ||
8354 | |||
8355 | fadd.d LOGB3(%pc),%fp3 # B3+W*B5 | ||
8356 | fadd.d LOGB2(%pc),%fp2 # B2+W*B4 | ||
8357 | |||
8358 | fmul.x %fp3,%fp1 # W*(B3+W*B5), FP3 RELEASED | ||
8359 | |||
8360 | fmul.x %fp0,%fp2 # V*(B2+W*B4) | ||
8361 | |||
8362 | fadd.d LOGB1(%pc),%fp1 # B1+W*(B3+W*B5) | ||
8363 | fmul.x SAVEU(%a6),%fp0 # FP0 IS U*V | ||
8364 | |||
8365 | fadd.x %fp2,%fp1 # B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED | ||
8366 | fmovm.x (%sp)+,&0x30 # FP2-3 RESTORED | ||
8367 | |||
8368 | fmul.x %fp1,%fp0 # U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] ) | ||
8369 | |||
8370 | fmov.l %d0,%fpcr | ||
8371 | fadd.x SAVEU(%a6),%fp0 | ||
8372 | bra t_inx2 | ||
8373 | |||
8374 | #--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID | ||
8375 | LOGNEG: | ||
8376 | bra t_operr | ||
8377 | |||
8378 | global slognd | ||
8379 | slognd: | ||
8380 | #--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT | ||
8381 | |||
8382 | mov.l &-100,ADJK(%a6) # INPUT = 2^(ADJK) * FP0 | ||
8383 | |||
8384 | #----normalize the input value by left shifting k bits (k to be determined | ||
8385 | #----below), adjusting exponent and storing -k to ADJK | ||
8386 | #----the value TWOTO100 is no longer needed. | ||
8387 | #----Note that this code assumes the denormalized input is NON-ZERO. | ||
8388 | |||
8389 | movm.l &0x3f00,-(%sp) # save some registers {d2-d7} | ||
8390 | mov.l (%a0),%d3 # D3 is exponent of smallest norm. # | ||
8391 | mov.l 4(%a0),%d4 | ||
8392 | mov.l 8(%a0),%d5 # (D4,D5) is (Hi_X,Lo_X) | ||
8393 | clr.l %d2 # D2 used for holding K | ||
8394 | |||
8395 | tst.l %d4 | ||
8396 | bne.b Hi_not0 | ||
8397 | |||
8398 | Hi_0: | ||
8399 | mov.l %d5,%d4 | ||
8400 | clr.l %d5 | ||
8401 | mov.l &32,%d2 | ||
8402 | clr.l %d6 | ||
8403 | bfffo %d4{&0:&32},%d6 | ||
8404 | lsl.l %d6,%d4 | ||
8405 | add.l %d6,%d2 # (D3,D4,D5) is normalized | ||
8406 | |||
8407 | mov.l %d3,X(%a6) | ||
8408 | mov.l %d4,XFRAC(%a6) | ||
8409 | mov.l %d5,XFRAC+4(%a6) | ||
8410 | neg.l %d2 | ||
8411 | mov.l %d2,ADJK(%a6) | ||
8412 | fmov.x X(%a6),%fp0 | ||
8413 | movm.l (%sp)+,&0xfc # restore registers {d2-d7} | ||
8414 | lea X(%a6),%a0 | ||
8415 | bra.w LOGBGN # begin regular log(X) | ||
8416 | |||
8417 | Hi_not0: | ||
8418 | clr.l %d6 | ||
8419 | bfffo %d4{&0:&32},%d6 # find first 1 | ||
8420 | mov.l %d6,%d2 # get k | ||
8421 | lsl.l %d6,%d4 | ||
8422 | mov.l %d5,%d7 # a copy of D5 | ||
8423 | lsl.l %d6,%d5 | ||
8424 | neg.l %d6 | ||
8425 | add.l &32,%d6 | ||
8426 | lsr.l %d6,%d7 | ||
8427 | or.l %d7,%d4 # (D3,D4,D5) normalized | ||
8428 | |||
8429 | mov.l %d3,X(%a6) | ||
8430 | mov.l %d4,XFRAC(%a6) | ||
8431 | mov.l %d5,XFRAC+4(%a6) | ||
8432 | neg.l %d2 | ||
8433 | mov.l %d2,ADJK(%a6) | ||
8434 | fmov.x X(%a6),%fp0 | ||
8435 | movm.l (%sp)+,&0xfc # restore registers {d2-d7} | ||
8436 | lea X(%a6),%a0 | ||
8437 | bra.w LOGBGN # begin regular log(X) | ||
8438 | |||
8439 | global slognp1 | ||
8440 | #--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S | ||
8441 | slognp1: | ||
8442 | fmov.x (%a0),%fp0 # LOAD INPUT | ||
8443 | fabs.x %fp0 # test magnitude | ||
8444 | fcmp.x %fp0,LTHOLD(%pc) # compare with min threshold | ||
8445 | fbgt.w LP1REAL # if greater, continue | ||
8446 | fmov.l %d0,%fpcr | ||
8447 | mov.b &FMOV_OP,%d1 # last inst is MOVE | ||
8448 | fmov.x (%a0),%fp0 # return signed argument | ||
8449 | bra t_catch | ||
8450 | |||
8451 | LP1REAL: | ||
8452 | fmov.x (%a0),%fp0 # LOAD INPUT | ||
8453 | mov.l &0x00000000,ADJK(%a6) | ||
8454 | fmov.x %fp0,%fp1 # FP1 IS INPUT Z | ||
8455 | fadd.s one(%pc),%fp0 # X := ROUND(1+Z) | ||
8456 | fmov.x %fp0,X(%a6) | ||
8457 | mov.w XFRAC(%a6),XDCARE(%a6) | ||
8458 | mov.l X(%a6),%d1 | ||
8459 | cmp.l %d1,&0 | ||
8460 | ble.w LP1NEG0 # LOG OF ZERO OR -VE | ||
8461 | cmp.l %d1,&0x3ffe8000 # IS BOUNDS [1/2,3/2]? | ||
8462 | blt.w LOGMAIN | ||
8463 | cmp.l %d1,&0x3fffc000 | ||
8464 | bgt.w LOGMAIN | ||
8465 | #--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z, | ||
8466 | #--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE, | ||
8467 | #--SIMPLY INVOKE LOG(X) FOR LOG(1+Z). | ||
8468 | |||
8469 | LP1NEAR1: | ||
8470 | #--NEXT SEE IF EXP(-1/16) < X < EXP(1/16) | ||
8471 | cmp.l %d1,&0x3ffef07d | ||
8472 | blt.w LP1CARE | ||
8473 | cmp.l %d1,&0x3fff8841 | ||
8474 | bgt.w LP1CARE | ||
8475 | |||
8476 | LP1ONE16: | ||
8477 | #--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2) | ||
8478 | #--WHERE U = 2Z/(2+Z) = 2Z/(1+X). | ||
8479 | fadd.x %fp1,%fp1 # FP1 IS 2Z | ||
8480 | fadd.s one(%pc),%fp0 # FP0 IS 1+X | ||
8481 | #--U = FP1/FP0 | ||
8482 | bra.w LP1CONT2 | ||
8483 | |||
8484 | LP1CARE: | ||
8485 | #--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE | ||
8486 | #--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST | ||
8487 | #--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2], | ||
8488 | #--THERE ARE ONLY TWO CASES. | ||
8489 | #--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z | ||
8490 | #--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z | ||
8491 | #--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF | ||
8492 | #--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED. | ||
8493 | |||
8494 | mov.l XFRAC(%a6),FFRAC(%a6) | ||
8495 | and.l &0xFE000000,FFRAC(%a6) | ||
8496 | or.l &0x01000000,FFRAC(%a6) # F OBTAINED | ||
8497 | cmp.l %d1,&0x3FFF8000 # SEE IF 1+Z > 1 | ||
8498 | bge.b KISZERO | ||
8499 | |||
8500 | KISNEG1: | ||
8501 | fmov.s TWO(%pc),%fp0 | ||
8502 | mov.l &0x3fff0000,F(%a6) | ||
8503 | clr.l F+8(%a6) | ||
8504 | fsub.x F(%a6),%fp0 # 2-F | ||
8505 | mov.l FFRAC(%a6),%d1 | ||
8506 | and.l &0x7E000000,%d1 | ||
8507 | asr.l &8,%d1 | ||
8508 | asr.l &8,%d1 | ||
8509 | asr.l &4,%d1 # D0 CONTAINS DISPLACEMENT FOR 1/F | ||
8510 | fadd.x %fp1,%fp1 # GET 2Z | ||
8511 | fmovm.x &0xc,-(%sp) # SAVE FP2 {%fp2/%fp3} | ||
8512 | fadd.x %fp1,%fp0 # FP0 IS Y-F = (2-F)+2Z | ||
8513 | lea LOGTBL(%pc),%a0 # A0 IS ADDRESS OF 1/F | ||
8514 | add.l %d1,%a0 | ||
8515 | fmov.s negone(%pc),%fp1 # FP1 IS K = -1 | ||
8516 | bra.w LP1CONT1 | ||
8517 | |||
8518 | KISZERO: | ||
8519 | fmov.s one(%pc),%fp0 | ||
8520 | mov.l &0x3fff0000,F(%a6) | ||
8521 | clr.l F+8(%a6) | ||
8522 | fsub.x F(%a6),%fp0 # 1-F | ||
8523 | mov.l FFRAC(%a6),%d1 | ||
8524 | and.l &0x7E000000,%d1 | ||
8525 | asr.l &8,%d1 | ||
8526 | asr.l &8,%d1 | ||
8527 | asr.l &4,%d1 | ||
8528 | fadd.x %fp1,%fp0 # FP0 IS Y-F | ||
8529 | fmovm.x &0xc,-(%sp) # FP2 SAVED {%fp2/%fp3} | ||
8530 | lea LOGTBL(%pc),%a0 | ||
8531 | add.l %d1,%a0 # A0 IS ADDRESS OF 1/F | ||
8532 | fmov.s zero(%pc),%fp1 # FP1 IS K = 0 | ||
8533 | bra.w LP1CONT1 | ||
8534 | |||
8535 | LP1NEG0: | ||
8536 | #--FPCR SAVED. D0 IS X IN COMPACT FORM. | ||
8537 | cmp.l %d1,&0 | ||
8538 | blt.b LP1NEG | ||
8539 | LP1ZERO: | ||
8540 | fmov.s negone(%pc),%fp0 | ||
8541 | |||
8542 | fmov.l %d0,%fpcr | ||
8543 | bra t_dz | ||
8544 | |||
8545 | LP1NEG: | ||
8546 | fmov.s zero(%pc),%fp0 | ||
8547 | |||
8548 | fmov.l %d0,%fpcr | ||
8549 | bra t_operr | ||
8550 | |||
8551 | global slognp1d | ||
8552 | #--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT | ||
8553 | # Simply return the denorm | ||
8554 | slognp1d: | ||
8555 | bra t_extdnrm | ||
8556 | |||
8557 | ######################################################################### | ||
8558 | # satanh(): computes the inverse hyperbolic tangent of a norm input # | ||
8559 | # satanhd(): computes the inverse hyperbolic tangent of a denorm input # | ||
8560 | # # | ||
8561 | # INPUT *************************************************************** # | ||
8562 | # a0 = pointer to extended precision input # | ||
8563 | # d0 = round precision,mode # | ||
8564 | # # | ||
8565 | # OUTPUT ************************************************************** # | ||
8566 | # fp0 = arctanh(X) # | ||
8567 | # # | ||
8568 | # ACCURACY and MONOTONICITY ******************************************* # | ||
8569 | # The returned result is within 3 ulps in 64 significant bit, # | ||
8570 | # i.e. within 0.5001 ulp to 53 bits if the result is subsequently # | ||
8571 | # rounded to double precision. The result is provably monotonic # | ||
8572 | # in double precision. # | ||
8573 | # # | ||
8574 | # ALGORITHM *********************************************************** # | ||
8575 | # # | ||
8576 | # ATANH # | ||
8577 | # 1. If |X| >= 1, go to 3. # | ||
8578 | # # | ||
8579 | # 2. (|X| < 1) Calculate atanh(X) by # | ||
8580 | # sgn := sign(X) # | ||
8581 | # y := |X| # | ||
8582 | # z := 2y/(1-y) # | ||
8583 | # atanh(X) := sgn * (1/2) * logp1(z) # | ||
8584 | # Exit. # | ||
8585 | # # | ||
8586 | # 3. If |X| > 1, go to 5. # | ||
8587 | # # | ||
8588 | # 4. (|X| = 1) Generate infinity with an appropriate sign and # | ||
8589 | # divide-by-zero by # | ||
8590 | # sgn := sign(X) # | ||
8591 | # atan(X) := sgn / (+0). # | ||
8592 | # Exit. # | ||
8593 | # # | ||
8594 | # 5. (|X| > 1) Generate an invalid operation by 0 * infinity. # | ||
8595 | # Exit. # | ||
8596 | # # | ||
8597 | ######################################################################### | ||
8598 | |||
8599 | global satanh | ||
8600 | satanh: | ||
8601 | mov.l (%a0),%d1 | ||
8602 | mov.w 4(%a0),%d1 | ||
8603 | and.l &0x7FFFFFFF,%d1 | ||
8604 | cmp.l %d1,&0x3FFF8000 | ||
8605 | bge.b ATANHBIG | ||
8606 | |||
8607 | #--THIS IS THE USUAL CASE, |X| < 1 | ||
8608 | #--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z). | ||
8609 | |||
8610 | fabs.x (%a0),%fp0 # Y = |X| | ||
8611 | fmov.x %fp0,%fp1 | ||
8612 | fneg.x %fp1 # -Y | ||
8613 | fadd.x %fp0,%fp0 # 2Y | ||
8614 | fadd.s &0x3F800000,%fp1 # 1-Y | ||
8615 | fdiv.x %fp1,%fp0 # 2Y/(1-Y) | ||
8616 | mov.l (%a0),%d1 | ||
8617 | and.l &0x80000000,%d1 | ||
8618 | or.l &0x3F000000,%d1 # SIGN(X)*HALF | ||
8619 | mov.l %d1,-(%sp) | ||
8620 | |||
8621 | mov.l %d0,-(%sp) # save rnd prec,mode | ||
8622 | clr.l %d0 # pass ext prec,RN | ||
8623 | fmovm.x &0x01,-(%sp) # save Z on stack | ||
8624 | lea (%sp),%a0 # pass ptr to Z | ||
8625 | bsr slognp1 # LOG1P(Z) | ||
8626 | add.l &0xc,%sp # clear Z from stack | ||
8627 | |||
8628 | mov.l (%sp)+,%d0 # fetch old prec,mode | ||
8629 | fmov.l %d0,%fpcr # load it | ||
8630 | mov.b &FMUL_OP,%d1 # last inst is MUL | ||
8631 | fmul.s (%sp)+,%fp0 | ||
8632 | bra t_catch | ||
8633 | |||
8634 | ATANHBIG: | ||
8635 | fabs.x (%a0),%fp0 # |X| | ||
8636 | fcmp.s %fp0,&0x3F800000 | ||
8637 | fbgt t_operr | ||
8638 | bra t_dz | ||
8639 | |||
8640 | global satanhd | ||
8641 | #--ATANH(X) = X FOR DENORMALIZED X | ||
8642 | satanhd: | ||
8643 | bra t_extdnrm | ||
8644 | |||
8645 | ######################################################################### | ||
8646 | # slog10(): computes the base-10 logarithm of a normalized input # | ||
8647 | # slog10d(): computes the base-10 logarithm of a denormalized input # | ||
8648 | # slog2(): computes the base-2 logarithm of a normalized input # | ||
8649 | # slog2d(): computes the base-2 logarithm of a denormalized input # | ||
8650 | # # | ||
8651 | # INPUT *************************************************************** # | ||
8652 | # a0 = pointer to extended precision input # | ||
8653 | # d0 = round precision,mode # | ||
8654 | # # | ||
8655 | # OUTPUT ************************************************************** # | ||
8656 | # fp0 = log_10(X) or log_2(X) # | ||
8657 | # # | ||
8658 | # ACCURACY and MONOTONICITY ******************************************* # | ||
8659 | # The returned result is within 1.7 ulps in 64 significant bit, # | ||
8660 | # i.e. within 0.5003 ulp to 53 bits if the result is subsequently # | ||
8661 | # rounded to double precision. The result is provably monotonic # | ||
8662 | # in double precision. # | ||
8663 | # # | ||
8664 | # ALGORITHM *********************************************************** # | ||
8665 | # # | ||
8666 | # slog10d: # | ||
8667 | # # | ||
8668 | # Step 0. If X < 0, create a NaN and raise the invalid operation # | ||
8669 | # flag. Otherwise, save FPCR in D1; set FpCR to default. # | ||
8670 | # Notes: Default means round-to-nearest mode, no floating-point # | ||
8671 | # traps, and precision control = double extended. # | ||
8672 | # # | ||
8673 | # Step 1. Call slognd to obtain Y = log(X), the natural log of X. # | ||
8674 | # Notes: Even if X is denormalized, log(X) is always normalized. # | ||
8675 | # # | ||
8676 | # Step 2. Compute log_10(X) = log(X) * (1/log(10)). # | ||
8677 | # 2.1 Restore the user FPCR # | ||
8678 | # 2.2 Return ans := Y * INV_L10. # | ||
8679 | # # | ||
8680 | # slog10: # | ||
8681 | # # | ||
8682 | # Step 0. If X < 0, create a NaN and raise the invalid operation # | ||
8683 | # flag. Otherwise, save FPCR in D1; set FpCR to default. # | ||
8684 | # Notes: Default means round-to-nearest mode, no floating-point # | ||
8685 | # traps, and precision control = double extended. # | ||
8686 | # # | ||
8687 | # Step 1. Call sLogN to obtain Y = log(X), the natural log of X. # | ||
8688 | # # | ||
8689 | # Step 2. Compute log_10(X) = log(X) * (1/log(10)). # | ||
8690 | # 2.1 Restore the user FPCR # | ||
8691 | # 2.2 Return ans := Y * INV_L10. # | ||
8692 | # # | ||
8693 | # sLog2d: # | ||
8694 | # # | ||
8695 | # Step 0. If X < 0, create a NaN and raise the invalid operation # | ||
8696 | # flag. Otherwise, save FPCR in D1; set FpCR to default. # | ||
8697 | # Notes: Default means round-to-nearest mode, no floating-point # | ||
8698 | # traps, and precision control = double extended. # | ||
8699 | # # | ||
8700 | # Step 1. Call slognd to obtain Y = log(X), the natural log of X. # | ||
8701 | # Notes: Even if X is denormalized, log(X) is always normalized. # | ||
8702 | # # | ||
8703 | # Step 2. Compute log_10(X) = log(X) * (1/log(2)). # | ||
8704 | # 2.1 Restore the user FPCR # | ||
8705 | # 2.2 Return ans := Y * INV_L2. # | ||
8706 | # # | ||
8707 | # sLog2: # | ||
8708 | # # | ||
8709 | # Step 0. If X < 0, create a NaN and raise the invalid operation # | ||
8710 | # flag. Otherwise, save FPCR in D1; set FpCR to default. # | ||
8711 | # Notes: Default means round-to-nearest mode, no floating-point # | ||
8712 | # traps, and precision control = double extended. # | ||
8713 | # # | ||
8714 | # Step 1. If X is not an integer power of two, i.e., X != 2^k, # | ||
8715 | # go to Step 3. # | ||
8716 | # # | ||
8717 | # Step 2. Return k. # | ||
8718 | # 2.1 Get integer k, X = 2^k. # | ||
8719 | # 2.2 Restore the user FPCR. # | ||
8720 | # 2.3 Return ans := convert-to-double-extended(k). # | ||
8721 | # # | ||
8722 | # Step 3. Call sLogN to obtain Y = log(X), the natural log of X. # | ||
8723 | # # | ||
8724 | # Step 4. Compute log_2(X) = log(X) * (1/log(2)). # | ||
8725 | # 4.1 Restore the user FPCR # | ||
8726 | # 4.2 Return ans := Y * INV_L2. # | ||
8727 | # # | ||
8728 | ######################################################################### | ||
8729 | |||
8730 | INV_L10: | ||
8731 | long 0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000 | ||
8732 | |||
8733 | INV_L2: | ||
8734 | long 0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000 | ||
8735 | |||
8736 | global slog10 | ||
8737 | #--entry point for Log10(X), X is normalized | ||
8738 | slog10: | ||
8739 | fmov.b &0x1,%fp0 | ||
8740 | fcmp.x %fp0,(%a0) # if operand == 1, | ||
8741 | fbeq.l ld_pzero # return an EXACT zero | ||
8742 | |||
8743 | mov.l (%a0),%d1 | ||
8744 | blt.w invalid | ||
8745 | mov.l %d0,-(%sp) | ||
8746 | clr.l %d0 | ||
8747 | bsr slogn # log(X), X normal. | ||
8748 | fmov.l (%sp)+,%fpcr | ||
8749 | fmul.x INV_L10(%pc),%fp0 | ||
8750 | bra t_inx2 | ||
8751 | |||
8752 | global slog10d | ||
8753 | #--entry point for Log10(X), X is denormalized | ||
8754 | slog10d: | ||
8755 | mov.l (%a0),%d1 | ||
8756 | blt.w invalid | ||
8757 | mov.l %d0,-(%sp) | ||
8758 | clr.l %d0 | ||
8759 | bsr slognd # log(X), X denorm. | ||
8760 | fmov.l (%sp)+,%fpcr | ||
8761 | fmul.x INV_L10(%pc),%fp0 | ||
8762 | bra t_minx2 | ||
8763 | |||
8764 | global slog2 | ||
8765 | #--entry point for Log2(X), X is normalized | ||
8766 | slog2: | ||
8767 | mov.l (%a0),%d1 | ||
8768 | blt.w invalid | ||
8769 | |||
8770 | mov.l 8(%a0),%d1 | ||
8771 | bne.b continue # X is not 2^k | ||
8772 | |||
8773 | mov.l 4(%a0),%d1 | ||
8774 | and.l &0x7FFFFFFF,%d1 | ||
8775 | bne.b continue | ||
8776 | |||
8777 | #--X = 2^k. | ||
8778 | mov.w (%a0),%d1 | ||
8779 | and.l &0x00007FFF,%d1 | ||
8780 | sub.l &0x3FFF,%d1 | ||
8781 | beq.l ld_pzero | ||
8782 | fmov.l %d0,%fpcr | ||
8783 | fmov.l %d1,%fp0 | ||
8784 | bra t_inx2 | ||
8785 | |||
8786 | continue: | ||
8787 | mov.l %d0,-(%sp) | ||
8788 | clr.l %d0 | ||
8789 | bsr slogn # log(X), X normal. | ||
8790 | fmov.l (%sp)+,%fpcr | ||
8791 | fmul.x INV_L2(%pc),%fp0 | ||
8792 | bra t_inx2 | ||
8793 | |||
8794 | invalid: | ||
8795 | bra t_operr | ||
8796 | |||
8797 | global slog2d | ||
8798 | #--entry point for Log2(X), X is denormalized | ||
8799 | slog2d: | ||
8800 | mov.l (%a0),%d1 | ||
8801 | blt.w invalid | ||
8802 | mov.l %d0,-(%sp) | ||
8803 | clr.l %d0 | ||
8804 | bsr slognd # log(X), X denorm. | ||
8805 | fmov.l (%sp)+,%fpcr | ||
8806 | fmul.x INV_L2(%pc),%fp0 | ||
8807 | bra t_minx2 | ||
8808 | |||
8809 | ######################################################################### | ||
8810 | # stwotox(): computes 2**X for a normalized input # | ||
8811 | # stwotoxd(): computes 2**X for a denormalized input # | ||
8812 | # stentox(): computes 10**X for a normalized input # | ||
8813 | # stentoxd(): computes 10**X for a denormalized input # | ||
8814 | # # | ||
8815 | # INPUT *************************************************************** # | ||
8816 | # a0 = pointer to extended precision input # | ||
8817 | # d0 = round precision,mode # | ||
8818 | # # | ||
8819 | # OUTPUT ************************************************************** # | ||
8820 | # fp0 = 2**X or 10**X # | ||
8821 | # # | ||
8822 | # ACCURACY and MONOTONICITY ******************************************* # | ||
8823 | # The returned result is within 2 ulps in 64 significant bit, # | ||
8824 | # i.e. within 0.5001 ulp to 53 bits if the result is subsequently # | ||
8825 | # rounded to double precision. The result is provably monotonic # | ||
8826 | # in double precision. # | ||
8827 | # # | ||
8828 | # ALGORITHM *********************************************************** # | ||
8829 | # # | ||
8830 | # twotox # | ||
8831 | # 1. If |X| > 16480, go to ExpBig. # | ||
8832 | # # | ||
8833 | # 2. If |X| < 2**(-70), go to ExpSm. # | ||
8834 | # # | ||
8835 | # 3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore # | ||
8836 | # decompose N as # | ||
8837 | # N = 64(M + M') + j, j = 0,1,2,...,63. # | ||
8838 | # # | ||
8839 | # 4. Overwrite r := r * log2. Then # | ||
8840 | # 2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). # | ||
8841 | # Go to expr to compute that expression. # | ||
8842 | # # | ||
8843 | # tentox # | ||
8844 | # 1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig. # | ||
8845 | # # | ||
8846 | # 2. If |X| < 2**(-70), go to ExpSm. # | ||
8847 | # # | ||
8848 | # 3. Set y := X*log_2(10)*64 (base 2 log of 10). Set # | ||
8849 | # N := round-to-int(y). Decompose N as # | ||
8850 | # N = 64(M + M') + j, j = 0,1,2,...,63. # | ||
8851 | # # | ||
8852 | # 4. Define r as # | ||
8853 | # r := ((X - N*L1)-N*L2) * L10 # | ||
8854 | # where L1, L2 are the leading and trailing parts of # | ||
8855 | # log_10(2)/64 and L10 is the natural log of 10. Then # | ||
8856 | # 10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). # | ||
8857 | # Go to expr to compute that expression. # | ||
8858 | # # | ||
8859 | # expr # | ||
8860 | # 1. Fetch 2**(j/64) from table as Fact1 and Fact2. # | ||
8861 | # # | ||
8862 | # 2. Overwrite Fact1 and Fact2 by # | ||
8863 | # Fact1 := 2**(M) * Fact1 # | ||
8864 | # Fact2 := 2**(M) * Fact2 # | ||
8865 | # Thus Fact1 + Fact2 = 2**(M) * 2**(j/64). # | ||
8866 | # # | ||
8867 | # 3. Calculate P where 1 + P approximates exp(r): # | ||
8868 | # P = r + r*r*(A1+r*(A2+...+r*A5)). # | ||
8869 | # # | ||
8870 | # 4. Let AdjFact := 2**(M'). Return # | ||
8871 | # AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ). # | ||
8872 | # Exit. # | ||
8873 | # # | ||
8874 | # ExpBig # | ||
8875 | # 1. Generate overflow by Huge * Huge if X > 0; otherwise, # | ||
8876 | # generate underflow by Tiny * Tiny. # | ||
8877 | # # | ||
8878 | # ExpSm # | ||
8879 | # 1. Return 1 + X. # | ||
8880 | # # | ||
8881 | ######################################################################### | ||
8882 | |||
8883 | L2TEN64: | ||
8884 | long 0x406A934F,0x0979A371 # 64LOG10/LOG2 | ||
8885 | L10TWO1: | ||
8886 | long 0x3F734413,0x509F8000 # LOG2/64LOG10 | ||
8887 | |||
8888 | L10TWO2: | ||
8889 | long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000 | ||
8890 | |||
8891 | LOG10: long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000 | ||
8892 | |||
8893 | LOG2: long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000 | ||
8894 | |||
8895 | EXPA5: long 0x3F56C16D,0x6F7BD0B2 | ||
8896 | EXPA4: long 0x3F811112,0x302C712C | ||
8897 | EXPA3: long 0x3FA55555,0x55554CC1 | ||
8898 | EXPA2: long 0x3FC55555,0x55554A54 | ||
8899 | EXPA1: long 0x3FE00000,0x00000000,0x00000000,0x00000000 | ||
8900 | |||
8901 | TEXPTBL: | ||
8902 | long 0x3FFF0000,0x80000000,0x00000000,0x3F738000 | ||
8903 | long 0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA | ||
8904 | long 0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9 | ||
8905 | long 0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9 | ||
8906 | long 0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA | ||
8907 | long 0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C | ||
8908 | long 0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1 | ||
8909 | long 0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA | ||
8910 | long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373 | ||
8911 | long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670 | ||
8912 | long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700 | ||
8913 | long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0 | ||
8914 | long 0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D | ||
8915 | long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319 | ||
8916 | long 0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B | ||
8917 | long 0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5 | ||
8918 | long 0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A | ||
8919 | long 0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B | ||
8920 | long 0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF | ||
8921 | long 0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA | ||
8922 | long 0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD | ||
8923 | long 0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E | ||
8924 | long 0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B | ||
8925 | long 0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB | ||
8926 | long 0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB | ||
8927 | long 0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274 | ||
8928 | long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C | ||
8929 | long 0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00 | ||
8930 | long 0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301 | ||
8931 | long 0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367 | ||
8932 | long 0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F | ||
8933 | long 0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C | ||
8934 | long 0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB | ||
8935 | long 0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB | ||
8936 | long 0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C | ||
8937 | long 0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA | ||
8938 | long 0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD | ||
8939 | long 0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51 | ||
8940 | long 0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A | ||
8941 | long 0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2 | ||
8942 | long 0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB | ||
8943 | long 0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17 | ||
8944 | long 0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C | ||
8945 | long 0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8 | ||
8946 | long 0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53 | ||
8947 | long 0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE | ||
8948 | long 0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124 | ||
8949 | long 0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243 | ||
8950 | long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A | ||
8951 | long 0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61 | ||
8952 | long 0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610 | ||
8953 | long 0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1 | ||
8954 | long 0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12 | ||
8955 | long 0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE | ||
8956 | long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4 | ||
8957 | long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F | ||
8958 | long 0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A | ||
8959 | long 0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A | ||
8960 | long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC | ||
8961 | long 0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F | ||
8962 | long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A | ||
8963 | long 0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795 | ||
8964 | long 0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B | ||
8965 | long 0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581 | ||
8966 | |||
8967 | set INT,L_SCR1 | ||
8968 | |||
8969 | set X,FP_SCR0 | ||
8970 | set XDCARE,X+2 | ||
8971 | set XFRAC,X+4 | ||
8972 | |||
8973 | set ADJFACT,FP_SCR0 | ||
8974 | |||
8975 | set FACT1,FP_SCR0 | ||
8976 | set FACT1HI,FACT1+4 | ||
8977 | set FACT1LOW,FACT1+8 | ||
8978 | |||
8979 | set FACT2,FP_SCR1 | ||
8980 | set FACT2HI,FACT2+4 | ||
8981 | set FACT2LOW,FACT2+8 | ||
8982 | |||
8983 | global stwotox | ||
8984 | #--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S | ||
8985 | stwotox: | ||
8986 | fmovm.x (%a0),&0x80 # LOAD INPUT | ||
8987 | |||
8988 | mov.l (%a0),%d1 | ||
8989 | mov.w 4(%a0),%d1 | ||
8990 | fmov.x %fp0,X(%a6) | ||
8991 | and.l &0x7FFFFFFF,%d1 | ||
8992 | |||
8993 | cmp.l %d1,&0x3FB98000 # |X| >= 2**(-70)? | ||
8994 | bge.b TWOOK1 | ||
8995 | bra.w EXPBORS | ||
8996 | |||
8997 | TWOOK1: | ||
8998 | cmp.l %d1,&0x400D80C0 # |X| > 16480? | ||
8999 | ble.b TWOMAIN | ||
9000 | bra.w EXPBORS | ||
9001 | |||
9002 | TWOMAIN: | ||
9003 | #--USUAL CASE, 2^(-70) <= |X| <= 16480 | ||
9004 | |||
9005 | fmov.x %fp0,%fp1 | ||
9006 | fmul.s &0x42800000,%fp1 # 64 * X | ||
9007 | fmov.l %fp1,INT(%a6) # N = ROUND-TO-INT(64 X) | ||
9008 | mov.l %d2,-(%sp) | ||
9009 | lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64) | ||
9010 | fmov.l INT(%a6),%fp1 # N --> FLOATING FMT | ||
9011 | mov.l INT(%a6),%d1 | ||
9012 | mov.l %d1,%d2 | ||
9013 | and.l &0x3F,%d1 # D0 IS J | ||
9014 | asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64) | ||
9015 | add.l %d1,%a1 # ADDRESS FOR 2^(J/64) | ||
9016 | asr.l &6,%d2 # d2 IS L, N = 64L + J | ||
9017 | mov.l %d2,%d1 | ||
9018 | asr.l &1,%d1 # D0 IS M | ||
9019 | sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J | ||
9020 | add.l &0x3FFF,%d2 | ||
9021 | |||
9022 | #--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64), | ||
9023 | #--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN. | ||
9024 | #--ADJFACT = 2^(M'). | ||
9025 | #--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2. | ||
9026 | |||
9027 | fmovm.x &0x0c,-(%sp) # save fp2/fp3 | ||
9028 | |||
9029 | fmul.s &0x3C800000,%fp1 # (1/64)*N | ||
9030 | mov.l (%a1)+,FACT1(%a6) | ||
9031 | mov.l (%a1)+,FACT1HI(%a6) | ||
9032 | mov.l (%a1)+,FACT1LOW(%a6) | ||
9033 | mov.w (%a1)+,FACT2(%a6) | ||
9034 | |||
9035 | fsub.x %fp1,%fp0 # X - (1/64)*INT(64 X) | ||
9036 | |||
9037 | mov.w (%a1)+,FACT2HI(%a6) | ||
9038 | clr.w FACT2HI+2(%a6) | ||
9039 | clr.l FACT2LOW(%a6) | ||
9040 | add.w %d1,FACT1(%a6) | ||
9041 | fmul.x LOG2(%pc),%fp0 # FP0 IS R | ||
9042 | add.w %d1,FACT2(%a6) | ||
9043 | |||
9044 | bra.w expr | ||
9045 | |||
9046 | EXPBORS: | ||
9047 | #--FPCR, D0 SAVED | ||
9048 | cmp.l %d1,&0x3FFF8000 | ||
9049 | bgt.b TEXPBIG | ||
9050 | |||
9051 | #--|X| IS SMALL, RETURN 1 + X | ||
9052 | |||
9053 | fmov.l %d0,%fpcr # restore users round prec,mode | ||
9054 | fadd.s &0x3F800000,%fp0 # RETURN 1 + X | ||
9055 | bra t_pinx2 | ||
9056 | |||
9057 | TEXPBIG: | ||
9058 | #--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW | ||
9059 | #--REGISTERS SAVE SO FAR ARE FPCR AND D0 | ||
9060 | mov.l X(%a6),%d1 | ||
9061 | cmp.l %d1,&0 | ||
9062 | blt.b EXPNEG | ||
9063 | |||
9064 | bra t_ovfl2 # t_ovfl expects positive value | ||
9065 | |||
9066 | EXPNEG: | ||
9067 | bra t_unfl2 # t_unfl expects positive value | ||
9068 | |||
9069 | global stwotoxd | ||
9070 | stwotoxd: | ||
9071 | #--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT | ||
9072 | |||
9073 | fmov.l %d0,%fpcr # set user's rounding mode/precision | ||
9074 | fmov.s &0x3F800000,%fp0 # RETURN 1 + X | ||
9075 | mov.l (%a0),%d1 | ||
9076 | or.l &0x00800001,%d1 | ||
9077 | fadd.s %d1,%fp0 | ||
9078 | bra t_pinx2 | ||
9079 | |||
9080 | global stentox | ||
9081 | #--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S | ||
9082 | stentox: | ||
9083 | fmovm.x (%a0),&0x80 # LOAD INPUT | ||
9084 | |||
9085 | mov.l (%a0),%d1 | ||
9086 | mov.w 4(%a0),%d1 | ||
9087 | fmov.x %fp0,X(%a6) | ||
9088 | and.l &0x7FFFFFFF,%d1 | ||
9089 | |||
9090 | cmp.l %d1,&0x3FB98000 # |X| >= 2**(-70)? | ||
9091 | bge.b TENOK1 | ||
9092 | bra.w EXPBORS | ||
9093 | |||
9094 | TENOK1: | ||
9095 | cmp.l %d1,&0x400B9B07 # |X| <= 16480*log2/log10 ? | ||
9096 | ble.b TENMAIN | ||
9097 | bra.w EXPBORS | ||
9098 | |||
9099 | TENMAIN: | ||
9100 | #--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10 | ||
9101 | |||
9102 | fmov.x %fp0,%fp1 | ||
9103 | fmul.d L2TEN64(%pc),%fp1 # X*64*LOG10/LOG2 | ||
9104 | fmov.l %fp1,INT(%a6) # N=INT(X*64*LOG10/LOG2) | ||
9105 | mov.l %d2,-(%sp) | ||
9106 | lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64) | ||
9107 | fmov.l INT(%a6),%fp1 # N --> FLOATING FMT | ||
9108 | mov.l INT(%a6),%d1 | ||
9109 | mov.l %d1,%d2 | ||
9110 | and.l &0x3F,%d1 # D0 IS J | ||
9111 | asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64) | ||
9112 | add.l %d1,%a1 # ADDRESS FOR 2^(J/64) | ||
9113 | asr.l &6,%d2 # d2 IS L, N = 64L + J | ||
9114 | mov.l %d2,%d1 | ||
9115 | asr.l &1,%d1 # D0 IS M | ||
9116 | sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J | ||
9117 | add.l &0x3FFF,%d2 | ||
9118 | |||
9119 | #--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64), | ||
9120 | #--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN. | ||
9121 | #--ADJFACT = 2^(M'). | ||
9122 | #--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2. | ||
9123 | fmovm.x &0x0c,-(%sp) # save fp2/fp3 | ||
9124 | |||
9125 | fmov.x %fp1,%fp2 | ||
9126 | |||
9127 | fmul.d L10TWO1(%pc),%fp1 # N*(LOG2/64LOG10)_LEAD | ||
9128 | mov.l (%a1)+,FACT1(%a6) | ||
9129 | |||
9130 | fmul.x L10TWO2(%pc),%fp2 # N*(LOG2/64LOG10)_TRAIL | ||
9131 | |||
9132 | mov.l (%a1)+,FACT1HI(%a6) | ||
9133 | mov.l (%a1)+,FACT1LOW(%a6) | ||
9134 | fsub.x %fp1,%fp0 # X - N L_LEAD | ||
9135 | mov.w (%a1)+,FACT2(%a6) | ||
9136 | |||
9137 | fsub.x %fp2,%fp0 # X - N L_TRAIL | ||
9138 | |||
9139 | mov.w (%a1)+,FACT2HI(%a6) | ||
9140 | clr.w FACT2HI+2(%a6) | ||
9141 | clr.l FACT2LOW(%a6) | ||
9142 | |||
9143 | fmul.x LOG10(%pc),%fp0 # FP0 IS R | ||
9144 | add.w %d1,FACT1(%a6) | ||
9145 | add.w %d1,FACT2(%a6) | ||
9146 | |||
9147 | expr: | ||
9148 | #--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN. | ||
9149 | #--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64). | ||
9150 | #--FP0 IS R. THE FOLLOWING CODE COMPUTES | ||
9151 | #-- 2**(M'+M) * 2**(J/64) * EXP(R) | ||
9152 | |||
9153 | fmov.x %fp0,%fp1 | ||
9154 | fmul.x %fp1,%fp1 # FP1 IS S = R*R | ||
9155 | |||
9156 | fmov.d EXPA5(%pc),%fp2 # FP2 IS A5 | ||
9157 | fmov.d EXPA4(%pc),%fp3 # FP3 IS A4 | ||
9158 | |||
9159 | fmul.x %fp1,%fp2 # FP2 IS S*A5 | ||
9160 | fmul.x %fp1,%fp3 # FP3 IS S*A4 | ||
9161 | |||
9162 | fadd.d EXPA3(%pc),%fp2 # FP2 IS A3+S*A5 | ||
9163 | fadd.d EXPA2(%pc),%fp3 # FP3 IS A2+S*A4 | ||
9164 | |||
9165 | fmul.x %fp1,%fp2 # FP2 IS S*(A3+S*A5) | ||
9166 | fmul.x %fp1,%fp3 # FP3 IS S*(A2+S*A4) | ||
9167 | |||
9168 | fadd.d EXPA1(%pc),%fp2 # FP2 IS A1+S*(A3+S*A5) | ||
9169 | fmul.x %fp0,%fp3 # FP3 IS R*S*(A2+S*A4) | ||
9170 | |||
9171 | fmul.x %fp1,%fp2 # FP2 IS S*(A1+S*(A3+S*A5)) | ||
9172 | fadd.x %fp3,%fp0 # FP0 IS R+R*S*(A2+S*A4) | ||
9173 | fadd.x %fp2,%fp0 # FP0 IS EXP(R) - 1 | ||
9174 | |||
9175 | fmovm.x (%sp)+,&0x30 # restore fp2/fp3 | ||
9176 | |||
9177 | #--FINAL RECONSTRUCTION PROCESS | ||
9178 | #--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1) - (1 OR 0) | ||
9179 | |||
9180 | fmul.x FACT1(%a6),%fp0 | ||
9181 | fadd.x FACT2(%a6),%fp0 | ||
9182 | fadd.x FACT1(%a6),%fp0 | ||
9183 | |||
9184 | fmov.l %d0,%fpcr # restore users round prec,mode | ||
9185 | mov.w %d2,ADJFACT(%a6) # INSERT EXPONENT | ||
9186 | mov.l (%sp)+,%d2 | ||
9187 | mov.l &0x80000000,ADJFACT+4(%a6) | ||
9188 | clr.l ADJFACT+8(%a6) | ||
9189 | mov.b &FMUL_OP,%d1 # last inst is MUL | ||
9190 | fmul.x ADJFACT(%a6),%fp0 # FINAL ADJUSTMENT | ||
9191 | bra t_catch | ||
9192 | |||
9193 | global stentoxd | ||
9194 | stentoxd: | ||
9195 | #--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT | ||
9196 | |||
9197 | fmov.l %d0,%fpcr # set user's rounding mode/precision | ||
9198 | fmov.s &0x3F800000,%fp0 # RETURN 1 + X | ||
9199 | mov.l (%a0),%d1 | ||
9200 | or.l &0x00800001,%d1 | ||
9201 | fadd.s %d1,%fp0 | ||
9202 | bra t_pinx2 | ||
9203 | |||
9204 | ######################################################################### | ||
9205 | # sscale(): computes the destination operand scaled by the source # | ||
9206 | # operand. If the absoulute value of the source operand is # | ||
9207 | # >= 2^14, an overflow or underflow is returned. # | ||
9208 | # # | ||
9209 | # INPUT *************************************************************** # | ||
9210 | # a0 = pointer to double-extended source operand X # | ||
9211 | # a1 = pointer to double-extended destination operand Y # | ||
9212 | # # | ||
9213 | # OUTPUT ************************************************************** # | ||
9214 | # fp0 = scale(X,Y) # | ||
9215 | # # | ||
9216 | ######################################################################### | ||
9217 | |||
9218 | set SIGN, L_SCR1 | ||
9219 | |||
9220 | global sscale | ||
9221 | sscale: | ||
9222 | mov.l %d0,-(%sp) # store off ctrl bits for now | ||
9223 | |||
9224 | mov.w DST_EX(%a1),%d1 # get dst exponent | ||
9225 | smi.b SIGN(%a6) # use SIGN to hold dst sign | ||
9226 | andi.l &0x00007fff,%d1 # strip sign from dst exp | ||
9227 | |||
9228 | mov.w SRC_EX(%a0),%d0 # check src bounds | ||
9229 | andi.w &0x7fff,%d0 # clr src sign bit | ||
9230 | cmpi.w %d0,&0x3fff # is src ~ ZERO? | ||
9231 | blt.w src_small # yes | ||
9232 | cmpi.w %d0,&0x400c # no; is src too big? | ||
9233 | bgt.w src_out # yes | ||
9234 | |||
9235 | # | ||
9236 | # Source is within 2^14 range. | ||
9237 | # | ||
9238 | src_ok: | ||
9239 | fintrz.x SRC(%a0),%fp0 # calc int of src | ||
9240 | fmov.l %fp0,%d0 # int src to d0 | ||
9241 | # don't want any accrued bits from the fintrz showing up later since | ||
9242 | # we may need to read the fpsr for the last fp op in t_catch2(). | ||
9243 | fmov.l &0x0,%fpsr | ||
9244 | |||
9245 | tst.b DST_HI(%a1) # is dst denormalized? | ||
9246 | bmi.b sok_norm | ||
9247 | |||
9248 | # the dst is a DENORM. normalize the DENORM and add the adjustment to | ||
9249 | # the src value. then, jump to the norm part of the routine. | ||
9250 | sok_dnrm: | ||
9251 | mov.l %d0,-(%sp) # save src for now | ||
9252 | |||
9253 | mov.w DST_EX(%a1),FP_SCR0_EX(%a6) # make a copy | ||
9254 | mov.l DST_HI(%a1),FP_SCR0_HI(%a6) | ||
9255 | mov.l DST_LO(%a1),FP_SCR0_LO(%a6) | ||
9256 | |||
9257 | lea FP_SCR0(%a6),%a0 # pass ptr to DENORM | ||
9258 | bsr.l norm # normalize the DENORM | ||
9259 | neg.l %d0 | ||
9260 | add.l (%sp)+,%d0 # add adjustment to src | ||
9261 | |||
9262 | fmovm.x FP_SCR0(%a6),&0x80 # load normalized DENORM | ||
9263 | |||
9264 | cmpi.w %d0,&-0x3fff # is the shft amt really low? | ||
9265 | bge.b sok_norm2 # thank goodness no | ||
9266 | |||
9267 | # the multiply factor that we're trying to create should be a denorm | ||
9268 | # for the multiply to work. therefore, we're going to actually do a | ||
9269 | # multiply with a denorm which will cause an unimplemented data type | ||
9270 | # exception to be put into the machine which will be caught and corrected | ||
9271 | # later. we don't do this with the DENORMs above because this method | ||
9272 | # is slower. but, don't fret, I don't see it being used much either. | ||
9273 | fmov.l (%sp)+,%fpcr # restore user fpcr | ||
9274 | mov.l &0x80000000,%d1 # load normalized mantissa | ||
9275 | subi.l &-0x3fff,%d0 # how many should we shift? | ||
9276 | neg.l %d0 # make it positive | ||
9277 | cmpi.b %d0,&0x20 # is it > 32? | ||
9278 | bge.b sok_dnrm_32 # yes | ||
9279 | lsr.l %d0,%d1 # no; bit stays in upper lw | ||
9280 | clr.l -(%sp) # insert zero low mantissa | ||
9281 | mov.l %d1,-(%sp) # insert new high mantissa | ||
9282 | clr.l -(%sp) # make zero exponent | ||
9283 | bra.b sok_norm_cont | ||
9284 | sok_dnrm_32: | ||
9285 | subi.b &0x20,%d0 # get shift count | ||
9286 | lsr.l %d0,%d1 # make low mantissa longword | ||
9287 | mov.l %d1,-(%sp) # insert new low mantissa | ||
9288 | clr.l -(%sp) # insert zero high mantissa | ||
9289 | clr.l -(%sp) # make zero exponent | ||
9290 | bra.b sok_norm_cont | ||
9291 | |||
9292 | # the src will force the dst to a DENORM value or worse. so, let's | ||
9293 | # create an fp multiply that will create the result. | ||
9294 | sok_norm: | ||
9295 | fmovm.x DST(%a1),&0x80 # load fp0 with normalized src | ||
9296 | sok_norm2: | ||
9297 | fmov.l (%sp)+,%fpcr # restore user fpcr | ||
9298 | |||
9299 | addi.w &0x3fff,%d0 # turn src amt into exp value | ||
9300 | swap %d0 # put exponent in high word | ||
9301 | clr.l -(%sp) # insert new exponent | ||
9302 | mov.l &0x80000000,-(%sp) # insert new high mantissa | ||
9303 | mov.l %d0,-(%sp) # insert new lo mantissa | ||
9304 | |||
9305 | sok_norm_cont: | ||
9306 | fmov.l %fpcr,%d0 # d0 needs fpcr for t_catch2 | ||
9307 | mov.b &FMUL_OP,%d1 # last inst is MUL | ||
9308 | fmul.x (%sp)+,%fp0 # do the multiply | ||
9309 | bra t_catch2 # catch any exceptions | ||
9310 | |||
9311 | # | ||
9312 | # Source is outside of 2^14 range. Test the sign and branch | ||
9313 | # to the appropriate exception handler. | ||
9314 | # | ||
9315 | src_out: | ||
9316 | mov.l (%sp)+,%d0 # restore ctrl bits | ||
9317 | exg %a0,%a1 # swap src,dst ptrs | ||
9318 | tst.b SRC_EX(%a1) # is src negative? | ||
9319 | bmi t_unfl # yes; underflow | ||
9320 | bra t_ovfl_sc # no; overflow | ||
9321 | |||
9322 | # | ||
9323 | # The source input is below 1, so we check for denormalized numbers | ||
9324 | # and set unfl. | ||
9325 | # | ||
9326 | src_small: | ||
9327 | tst.b DST_HI(%a1) # is dst denormalized? | ||
9328 | bpl.b ssmall_done # yes | ||
9329 | |||
9330 | mov.l (%sp)+,%d0 | ||
9331 | fmov.l %d0,%fpcr # no; load control bits | ||
9332 | mov.b &FMOV_OP,%d1 # last inst is MOVE | ||
9333 | fmov.x DST(%a1),%fp0 # simply return dest | ||
9334 | bra t_catch2 | ||
9335 | ssmall_done: | ||
9336 | mov.l (%sp)+,%d0 # load control bits into d1 | ||
9337 | mov.l %a1,%a0 # pass ptr to dst | ||
9338 | bra t_resdnrm | ||
9339 | |||
9340 | ######################################################################### | ||
9341 | # smod(): computes the fp MOD of the input values X,Y. # | ||
9342 | # srem(): computes the fp (IEEE) REM of the input values X,Y. # | ||
9343 | # # | ||
9344 | # INPUT *************************************************************** # | ||
9345 | # a0 = pointer to extended precision input X # | ||
9346 | # a1 = pointer to extended precision input Y # | ||
9347 | # d0 = round precision,mode # | ||
9348 | # # | ||
9349 | # The input operands X and Y can be either normalized or # | ||
9350 | # denormalized. # | ||
9351 | # # | ||
9352 | # OUTPUT ************************************************************** # | ||
9353 | # fp0 = FREM(X,Y) or FMOD(X,Y) # | ||
9354 | # # | ||
9355 | # ALGORITHM *********************************************************** # | ||
9356 | # # | ||
9357 | # Step 1. Save and strip signs of X and Y: signX := sign(X), # | ||
9358 | # signY := sign(Y), X := |X|, Y := |Y|, # | ||
9359 | # signQ := signX EOR signY. Record whether MOD or REM # | ||
9360 | # is requested. # | ||
9361 | # # | ||
9362 | # Step 2. Set L := expo(X)-expo(Y), k := 0, Q := 0. # | ||
9363 | # If (L < 0) then # | ||
9364 | # R := X, go to Step 4. # | ||
9365 | # else # | ||
9366 | # R := 2^(-L)X, j := L. # | ||
9367 | # endif # | ||
9368 | # # | ||
9369 | # Step 3. Perform MOD(X,Y) # | ||
9370 | # 3.1 If R = Y, go to Step 9. # | ||
9371 | # 3.2 If R > Y, then { R := R - Y, Q := Q + 1} # | ||
9372 | # 3.3 If j = 0, go to Step 4. # | ||
9373 | # 3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to # | ||
9374 | # Step 3.1. # | ||
9375 | # # | ||
9376 | # Step 4. At this point, R = X - QY = MOD(X,Y). Set # | ||
9377 | # Last_Subtract := false (used in Step 7 below). If # | ||
9378 | # MOD is requested, go to Step 6. # | ||
9379 | # # | ||
9380 | # Step 5. R = MOD(X,Y), but REM(X,Y) is requested. # | ||
9381 | # 5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to # | ||
9382 | # Step 6. # | ||
9383 | # 5.2 If R > Y/2, then { set Last_Subtract := true, # | ||
9384 | # Q := Q + 1, Y := signY*Y }. Go to Step 6. # | ||
9385 | # 5.3 This is the tricky case of R = Y/2. If Q is odd, # | ||
9386 | # then { Q := Q + 1, signX := -signX }. # | ||
9387 | # # | ||
9388 | # Step 6. R := signX*R. # | ||
9389 | # # | ||
9390 | # Step 7. If Last_Subtract = true, R := R - Y. # | ||
9391 | # # | ||
9392 | # Step 8. Return signQ, last 7 bits of Q, and R as required. # | ||
9393 | # # | ||
9394 | # Step 9. At this point, R = 2^(-j)*X - Q Y = Y. Thus, # | ||
9395 | # X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1), # | ||
9396 | # R := 0. Return signQ, last 7 bits of Q, and R. # | ||
9397 | # # | ||
9398 | ######################################################################### | ||
9399 | |||
9400 | set Mod_Flag,L_SCR3 | ||
9401 | set Sc_Flag,L_SCR3+1 | ||
9402 | |||
9403 | set SignY,L_SCR2 | ||
9404 | set SignX,L_SCR2+2 | ||
9405 | set SignQ,L_SCR3+2 | ||
9406 | |||
9407 | set Y,FP_SCR0 | ||
9408 | set Y_Hi,Y+4 | ||
9409 | set Y_Lo,Y+8 | ||
9410 | |||
9411 | set R,FP_SCR1 | ||
9412 | set R_Hi,R+4 | ||
9413 | set R_Lo,R+8 | ||
9414 | |||
9415 | Scale: | ||
9416 | long 0x00010000,0x80000000,0x00000000,0x00000000 | ||
9417 | |||
9418 | global smod | ||
9419 | smod: | ||
9420 | clr.b FPSR_QBYTE(%a6) | ||
9421 | mov.l %d0,-(%sp) # save ctrl bits | ||
9422 | clr.b Mod_Flag(%a6) | ||
9423 | bra.b Mod_Rem | ||
9424 | |||
9425 | global srem | ||
9426 | srem: | ||
9427 | clr.b FPSR_QBYTE(%a6) | ||
9428 | mov.l %d0,-(%sp) # save ctrl bits | ||
9429 | mov.b &0x1,Mod_Flag(%a6) | ||
9430 | |||
9431 | Mod_Rem: | ||
9432 | #..Save sign of X and Y | ||
9433 | movm.l &0x3f00,-(%sp) # save data registers | ||
9434 | mov.w SRC_EX(%a0),%d3 | ||
9435 | mov.w %d3,SignY(%a6) | ||
9436 | and.l &0x00007FFF,%d3 # Y := |Y| | ||
9437 | |||
9438 | # | ||
9439 | mov.l SRC_HI(%a0),%d4 | ||
9440 | mov.l SRC_LO(%a0),%d5 # (D3,D4,D5) is |Y| | ||
9441 | |||
9442 | tst.l %d3 | ||
9443 | bne.b Y_Normal | ||
9444 | |||
9445 | mov.l &0x00003FFE,%d3 # $3FFD + 1 | ||
9446 | tst.l %d4 | ||
9447 | bne.b HiY_not0 | ||
9448 | |||
9449 | HiY_0: | ||
9450 | mov.l %d5,%d4 | ||
9451 | clr.l %d5 | ||
9452 | sub.l &32,%d3 | ||
9453 | clr.l %d6 | ||
9454 | bfffo %d4{&0:&32},%d6 | ||
9455 | lsl.l %d6,%d4 | ||
9456 | sub.l %d6,%d3 # (D3,D4,D5) is normalized | ||
9457 | # ...with bias $7FFD | ||
9458 | bra.b Chk_X | ||
9459 | |||
9460 | HiY_not0: | ||
9461 | clr.l %d6 | ||
9462 | bfffo %d4{&0:&32},%d6 | ||
9463 | sub.l %d6,%d3 | ||
9464 | lsl.l %d6,%d4 | ||
9465 | mov.l %d5,%d7 # a copy of D5 | ||
9466 | lsl.l %d6,%d5 | ||
9467 | neg.l %d6 | ||
9468 | add.l &32,%d6 | ||
9469 | lsr.l %d6,%d7 | ||
9470 | or.l %d7,%d4 # (D3,D4,D5) normalized | ||
9471 | # ...with bias $7FFD | ||
9472 | bra.b Chk_X | ||
9473 | |||
9474 | Y_Normal: | ||
9475 | add.l &0x00003FFE,%d3 # (D3,D4,D5) normalized | ||
9476 | # ...with bias $7FFD | ||
9477 | |||
9478 | Chk_X: | ||
9479 | mov.w DST_EX(%a1),%d0 | ||
9480 | mov.w %d0,SignX(%a6) | ||
9481 | mov.w SignY(%a6),%d1 | ||
9482 | eor.l %d0,%d1 | ||
9483 | and.l &0x00008000,%d1 | ||
9484 | mov.w %d1,SignQ(%a6) # sign(Q) obtained | ||
9485 | and.l &0x00007FFF,%d0 | ||
9486 | mov.l DST_HI(%a1),%d1 | ||
9487 | mov.l DST_LO(%a1),%d2 # (D0,D1,D2) is |X| | ||
9488 | tst.l %d0 | ||
9489 | bne.b X_Normal | ||
9490 | mov.l &0x00003FFE,%d0 | ||
9491 | tst.l %d1 | ||
9492 | bne.b HiX_not0 | ||
9493 | |||
9494 | HiX_0: | ||
9495 | mov.l %d2,%d1 | ||
9496 | clr.l %d2 | ||
9497 | sub.l &32,%d0 | ||
9498 | clr.l %d6 | ||
9499 | bfffo %d1{&0:&32},%d6 | ||
9500 | lsl.l %d6,%d1 | ||
9501 | sub.l %d6,%d0 # (D0,D1,D2) is normalized | ||
9502 | # ...with bias $7FFD | ||
9503 | bra.b Init | ||
9504 | |||
9505 | HiX_not0: | ||
9506 | clr.l %d6 | ||
9507 | bfffo %d1{&0:&32},%d6 | ||
9508 | sub.l %d6,%d0 | ||
9509 | lsl.l %d6,%d1 | ||
9510 | mov.l %d2,%d7 # a copy of D2 | ||
9511 | lsl.l %d6,%d2 | ||
9512 | neg.l %d6 | ||
9513 | add.l &32,%d6 | ||
9514 | lsr.l %d6,%d7 | ||
9515 | or.l %d7,%d1 # (D0,D1,D2) normalized | ||
9516 | # ...with bias $7FFD | ||
9517 | bra.b Init | ||
9518 | |||
9519 | X_Normal: | ||
9520 | add.l &0x00003FFE,%d0 # (D0,D1,D2) normalized | ||
9521 | # ...with bias $7FFD | ||
9522 | |||
9523 | Init: | ||
9524 | # | ||
9525 | mov.l %d3,L_SCR1(%a6) # save biased exp(Y) | ||
9526 | mov.l %d0,-(%sp) # save biased exp(X) | ||
9527 | sub.l %d3,%d0 # L := expo(X)-expo(Y) | ||
9528 | |||
9529 | clr.l %d6 # D6 := carry <- 0 | ||
9530 | clr.l %d3 # D3 is Q | ||
9531 | mov.l &0,%a1 # A1 is k; j+k=L, Q=0 | ||
9532 | |||
9533 | #..(Carry,D1,D2) is R | ||
9534 | tst.l %d0 | ||
9535 | bge.b Mod_Loop_pre | ||
9536 | |||
9537 | #..expo(X) < expo(Y). Thus X = mod(X,Y) | ||
9538 | # | ||
9539 | mov.l (%sp)+,%d0 # restore d0 | ||
9540 | bra.w Get_Mod | ||
9541 | |||
9542 | Mod_Loop_pre: | ||
9543 | addq.l &0x4,%sp # erase exp(X) | ||
9544 | #..At this point R = 2^(-L)X; Q = 0; k = 0; and k+j = L | ||
9545 | Mod_Loop: | ||
9546 | tst.l %d6 # test carry bit | ||
9547 | bgt.b R_GT_Y | ||
9548 | |||
9549 | #..At this point carry = 0, R = (D1,D2), Y = (D4,D5) | ||
9550 | cmp.l %d1,%d4 # compare hi(R) and hi(Y) | ||
9551 | bne.b R_NE_Y | ||
9552 | cmp.l %d2,%d5 # compare lo(R) and lo(Y) | ||
9553 | bne.b R_NE_Y | ||
9554 | |||
9555 | #..At this point, R = Y | ||
9556 | bra.w Rem_is_0 | ||
9557 | |||
9558 | R_NE_Y: | ||
9559 | #..use the borrow of the previous compare | ||
9560 | bcs.b R_LT_Y # borrow is set iff R < Y | ||
9561 | |||
9562 | R_GT_Y: | ||
9563 | #..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0 | ||
9564 | #..and Y < (D1,D2) < 2Y. Either way, perform R - Y | ||
9565 | sub.l %d5,%d2 # lo(R) - lo(Y) | ||
9566 | subx.l %d4,%d1 # hi(R) - hi(Y) | ||
9567 | clr.l %d6 # clear carry | ||
9568 | addq.l &1,%d3 # Q := Q + 1 | ||
9569 | |||
9570 | R_LT_Y: | ||
9571 | #..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0. | ||
9572 | tst.l %d0 # see if j = 0. | ||
9573 | beq.b PostLoop | ||
9574 | |||
9575 | add.l %d3,%d3 # Q := 2Q | ||
9576 | add.l %d2,%d2 # lo(R) = 2lo(R) | ||
9577 | roxl.l &1,%d1 # hi(R) = 2hi(R) + carry | ||
9578 | scs %d6 # set Carry if 2(R) overflows | ||
9579 | addq.l &1,%a1 # k := k+1 | ||
9580 | subq.l &1,%d0 # j := j - 1 | ||
9581 | #..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y. | ||
9582 | |||
9583 | bra.b Mod_Loop | ||
9584 | |||
9585 | PostLoop: | ||
9586 | #..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y. | ||
9587 | |||
9588 | #..normalize R. | ||
9589 | mov.l L_SCR1(%a6),%d0 # new biased expo of R | ||
9590 | tst.l %d1 | ||
9591 | bne.b HiR_not0 | ||
9592 | |||
9593 | HiR_0: | ||
9594 | mov.l %d2,%d1 | ||
9595 | clr.l %d2 | ||
9596 | sub.l &32,%d0 | ||
9597 | clr.l %d6 | ||
9598 | bfffo %d1{&0:&32},%d6 | ||
9599 | lsl.l %d6,%d1 | ||
9600 | sub.l %d6,%d0 # (D0,D1,D2) is normalized | ||
9601 | # ...with bias $7FFD | ||
9602 | bra.b Get_Mod | ||
9603 | |||
9604 | HiR_not0: | ||
9605 | clr.l %d6 | ||
9606 | bfffo %d1{&0:&32},%d6 | ||
9607 | bmi.b Get_Mod # already normalized | ||
9608 | sub.l %d6,%d0 | ||
9609 | lsl.l %d6,%d1 | ||
9610 | mov.l %d2,%d7 # a copy of D2 | ||
9611 | lsl.l %d6,%d2 | ||
9612 | neg.l %d6 | ||
9613 | add.l &32,%d6 | ||
9614 | lsr.l %d6,%d7 | ||
9615 | or.l %d7,%d1 # (D0,D1,D2) normalized | ||
9616 | |||
9617 | # | ||
9618 | Get_Mod: | ||
9619 | cmp.l %d0,&0x000041FE | ||
9620 | bge.b No_Scale | ||
9621 | Do_Scale: | ||
9622 | mov.w %d0,R(%a6) | ||
9623 | mov.l %d1,R_Hi(%a6) | ||
9624 | mov.l %d2,R_Lo(%a6) | ||
9625 | mov.l L_SCR1(%a6),%d6 | ||
9626 | mov.w %d6,Y(%a6) | ||
9627 | mov.l %d4,Y_Hi(%a6) | ||
9628 | mov.l %d5,Y_Lo(%a6) | ||
9629 | fmov.x R(%a6),%fp0 # no exception | ||
9630 | mov.b &1,Sc_Flag(%a6) | ||
9631 | bra.b ModOrRem | ||
9632 | No_Scale: | ||
9633 | mov.l %d1,R_Hi(%a6) | ||
9634 | mov.l %d2,R_Lo(%a6) | ||
9635 | sub.l &0x3FFE,%d0 | ||
9636 | mov.w %d0,R(%a6) | ||
9637 | mov.l L_SCR1(%a6),%d6 | ||
9638 | sub.l &0x3FFE,%d6 | ||
9639 | mov.l %d6,L_SCR1(%a6) | ||
9640 | fmov.x R(%a6),%fp0 | ||
9641 | mov.w %d6,Y(%a6) | ||
9642 | mov.l %d4,Y_Hi(%a6) | ||
9643 | mov.l %d5,Y_Lo(%a6) | ||
9644 | clr.b Sc_Flag(%a6) | ||
9645 | |||
9646 | # | ||
9647 | ModOrRem: | ||
9648 | tst.b Mod_Flag(%a6) | ||
9649 | beq.b Fix_Sign | ||
9650 | |||
9651 | mov.l L_SCR1(%a6),%d6 # new biased expo(Y) | ||
9652 | subq.l &1,%d6 # biased expo(Y/2) | ||
9653 | cmp.l %d0,%d6 | ||
9654 | blt.b Fix_Sign | ||
9655 | bgt.b Last_Sub | ||
9656 | |||
9657 | cmp.l %d1,%d4 | ||
9658 | bne.b Not_EQ | ||
9659 | cmp.l %d2,%d5 | ||
9660 | bne.b Not_EQ | ||
9661 | bra.w Tie_Case | ||
9662 | |||
9663 | Not_EQ: | ||
9664 | bcs.b Fix_Sign | ||
9665 | |||
9666 | Last_Sub: | ||
9667 | # | ||
9668 | fsub.x Y(%a6),%fp0 # no exceptions | ||
9669 | addq.l &1,%d3 # Q := Q + 1 | ||
9670 | |||
9671 | # | ||
9672 | Fix_Sign: | ||
9673 | #..Get sign of X | ||
9674 | mov.w SignX(%a6),%d6 | ||
9675 | bge.b Get_Q | ||
9676 | fneg.x %fp0 | ||
9677 | |||
9678 | #..Get Q | ||
9679 | # | ||
9680 | Get_Q: | ||
9681 | clr.l %d6 | ||
9682 | mov.w SignQ(%a6),%d6 # D6 is sign(Q) | ||
9683 | mov.l &8,%d7 | ||
9684 | lsr.l %d7,%d6 | ||
9685 | and.l &0x0000007F,%d3 # 7 bits of Q | ||
9686 | or.l %d6,%d3 # sign and bits of Q | ||
9687 | # swap %d3 | ||
9688 | # fmov.l %fpsr,%d6 | ||
9689 | # and.l &0xFF00FFFF,%d6 | ||
9690 | # or.l %d3,%d6 | ||
9691 | # fmov.l %d6,%fpsr # put Q in fpsr | ||
9692 | mov.b %d3,FPSR_QBYTE(%a6) # put Q in fpsr | ||
9693 | |||
9694 | # | ||
9695 | Restore: | ||
9696 | movm.l (%sp)+,&0xfc # {%d2-%d7} | ||
9697 | mov.l (%sp)+,%d0 | ||
9698 | fmov.l %d0,%fpcr | ||
9699 | tst.b Sc_Flag(%a6) | ||
9700 | beq.b Finish | ||
9701 | mov.b &FMUL_OP,%d1 # last inst is MUL | ||
9702 | fmul.x Scale(%pc),%fp0 # may cause underflow | ||
9703 | bra t_catch2 | ||
9704 | # the '040 package did this apparently to see if the dst operand for the | ||
9705 | # preceding fmul was a denorm. but, it better not have been since the | ||
9706 | # algorithm just got done playing with fp0 and expected no exceptions | ||
9707 | # as a result. trust me... | ||
9708 | # bra t_avoid_unsupp # check for denorm as a | ||
9709 | # ;result of the scaling | ||
9710 | |||
9711 | Finish: | ||
9712 | mov.b &FMOV_OP,%d1 # last inst is MOVE | ||
9713 | fmov.x %fp0,%fp0 # capture exceptions & round | ||
9714 | bra t_catch2 | ||
9715 | |||
9716 | Rem_is_0: | ||
9717 | #..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1) | ||
9718 | addq.l &1,%d3 | ||
9719 | cmp.l %d0,&8 # D0 is j | ||
9720 | bge.b Q_Big | ||
9721 | |||
9722 | lsl.l %d0,%d3 | ||
9723 | bra.b Set_R_0 | ||
9724 | |||
9725 | Q_Big: | ||
9726 | clr.l %d3 | ||
9727 | |||
9728 | Set_R_0: | ||
9729 | fmov.s &0x00000000,%fp0 | ||
9730 | clr.b Sc_Flag(%a6) | ||
9731 | bra.w Fix_Sign | ||
9732 | |||
9733 | Tie_Case: | ||
9734 | #..Check parity of Q | ||
9735 | mov.l %d3,%d6 | ||
9736 | and.l &0x00000001,%d6 | ||
9737 | tst.l %d6 | ||
9738 | beq.w Fix_Sign # Q is even | ||
9739 | |||
9740 | #..Q is odd, Q := Q + 1, signX := -signX | ||
9741 | addq.l &1,%d3 | ||
9742 | mov.w SignX(%a6),%d6 | ||
9743 | eor.l &0x00008000,%d6 | ||
9744 | mov.w %d6,SignX(%a6) | ||
9745 | bra.w Fix_Sign | ||
9746 | |||
9747 | ######################################################################### | ||
9748 | # XDEF **************************************************************** # | ||
9749 | # tag(): return the optype of the input ext fp number # | ||
9750 | # # | ||
9751 | # This routine is used by the 060FPLSP. # | ||
9752 | # # | ||
9753 | # XREF **************************************************************** # | ||
9754 | # None # | ||
9755 | # # | ||
9756 | # INPUT *************************************************************** # | ||
9757 | # a0 = pointer to extended precision operand # | ||
9758 | # # | ||
9759 | # OUTPUT ************************************************************** # | ||
9760 | # d0 = value of type tag # | ||
9761 | # one of: NORM, INF, QNAN, SNAN, DENORM, ZERO # | ||
9762 | # # | ||
9763 | # ALGORITHM *********************************************************** # | ||
9764 | # Simply test the exponent, j-bit, and mantissa values to # | ||
9765 | # determine the type of operand. # | ||
9766 | # If it's an unnormalized zero, alter the operand and force it # | ||
9767 | # to be a normal zero. # | ||
9768 | # # | ||
9769 | ######################################################################### | ||
9770 | |||
9771 | global tag | ||
9772 | tag: | ||
9773 | mov.w FTEMP_EX(%a0), %d0 # extract exponent | ||
9774 | andi.w &0x7fff, %d0 # strip off sign | ||
9775 | cmpi.w %d0, &0x7fff # is (EXP == MAX)? | ||
9776 | beq.b inf_or_nan_x | ||
9777 | not_inf_or_nan_x: | ||
9778 | btst &0x7,FTEMP_HI(%a0) | ||
9779 | beq.b not_norm_x | ||
9780 | is_norm_x: | ||
9781 | mov.b &NORM, %d0 | ||
9782 | rts | ||
9783 | not_norm_x: | ||
9784 | tst.w %d0 # is exponent = 0? | ||
9785 | bne.b is_unnorm_x | ||
9786 | not_unnorm_x: | ||
9787 | tst.l FTEMP_HI(%a0) | ||
9788 | bne.b is_denorm_x | ||
9789 | tst.l FTEMP_LO(%a0) | ||
9790 | bne.b is_denorm_x | ||
9791 | is_zero_x: | ||
9792 | mov.b &ZERO, %d0 | ||
9793 | rts | ||
9794 | is_denorm_x: | ||
9795 | mov.b &DENORM, %d0 | ||
9796 | rts | ||
9797 | is_unnorm_x: | ||
9798 | bsr.l unnorm_fix # convert to norm,denorm,or zero | ||
9799 | rts | ||
9800 | is_unnorm_reg_x: | ||
9801 | mov.b &UNNORM, %d0 | ||
9802 | rts | ||
9803 | inf_or_nan_x: | ||
9804 | tst.l FTEMP_LO(%a0) | ||
9805 | bne.b is_nan_x | ||
9806 | mov.l FTEMP_HI(%a0), %d0 | ||
9807 | and.l &0x7fffffff, %d0 # msb is a don't care! | ||
9808 | bne.b is_nan_x | ||
9809 | is_inf_x: | ||
9810 | mov.b &INF, %d0 | ||
9811 | rts | ||
9812 | is_nan_x: | ||
9813 | mov.b &QNAN, %d0 | ||
9814 | rts | ||
9815 | |||
9816 | ############################################################# | ||
9817 | |||
9818 | qnan: long 0x7fff0000, 0xffffffff, 0xffffffff | ||
9819 | |||
9820 | ######################################################################### | ||
9821 | # XDEF **************************************************************** # | ||
9822 | # t_dz(): Handle 060FPLSP dz exception for "flogn" emulation. # | ||
9823 | # t_dz2(): Handle 060FPLSP dz exception for "fatanh" emulation. # | ||
9824 | # # | ||
9825 | # These rouitnes are used by the 060FPLSP package. # | ||
9826 | # # | ||
9827 | # XREF **************************************************************** # | ||
9828 | # None # | ||
9829 | # # | ||
9830 | # INPUT *************************************************************** # | ||
9831 | # a0 = pointer to extended precision source operand. # | ||
9832 | # # | ||
9833 | # OUTPUT ************************************************************** # | ||
9834 | # fp0 = default DZ result. # | ||
9835 | # # | ||
9836 | # ALGORITHM *********************************************************** # | ||
9837 | # Transcendental emulation for the 060FPLSP has detected that # | ||
9838 | # a DZ exception should occur for the instruction. If DZ is disabled, # | ||
9839 | # return the default result. # | ||
9840 | # If DZ is enabled, the dst operand should be returned unscathed # | ||
9841 | # in fp0 while fp1 is used to create a DZ exception so that the # | ||
9842 | # operating system can log that such an event occurred. # | ||
9843 | # # | ||
9844 | ######################################################################### | ||
9845 | |||
9846 | global t_dz | ||
9847 | t_dz: | ||
9848 | tst.b SRC_EX(%a0) # check sign for neg or pos | ||
9849 | bpl.b dz_pinf # branch if pos sign | ||
9850 | |||
9851 | global t_dz2 | ||
9852 | t_dz2: | ||
9853 | ori.l &dzinf_mask+neg_mask,USER_FPSR(%a6) # set N/I/DZ/ADZ | ||
9854 | |||
9855 | btst &dz_bit,FPCR_ENABLE(%a6) | ||
9856 | bne.b dz_minf_ena | ||
9857 | |||
9858 | # dz is disabled. return a -INF. | ||
9859 | fmov.s &0xff800000,%fp0 # return -INF | ||
9860 | rts | ||
9861 | |||
9862 | # dz is enabled. create a dz exception so the user can record it | ||
9863 | # but use fp1 instead. return the dst operand unscathed in fp0. | ||
9864 | dz_minf_ena: | ||
9865 | fmovm.x EXC_FP0(%a6),&0x80 # return fp0 unscathed | ||
9866 | fmov.l USER_FPCR(%a6),%fpcr | ||
9867 | fmov.s &0xbf800000,%fp1 # load -1 | ||
9868 | fdiv.s &0x00000000,%fp1 # -1 / 0 | ||
9869 | rts | ||
9870 | |||
9871 | dz_pinf: | ||
9872 | ori.l &dzinf_mask,USER_FPSR(%a6) # set I/DZ/ADZ | ||
9873 | |||
9874 | btst &dz_bit,FPCR_ENABLE(%a6) | ||
9875 | bne.b dz_pinf_ena | ||
9876 | |||
9877 | # dz is disabled. return a +INF. | ||
9878 | fmov.s &0x7f800000,%fp0 # return +INF | ||
9879 | rts | ||
9880 | |||
9881 | # dz is enabled. create a dz exception so the user can record it | ||
9882 | # but use fp1 instead. return the dst operand unscathed in fp0. | ||
9883 | dz_pinf_ena: | ||
9884 | fmovm.x EXC_FP0(%a6),&0x80 # return fp0 unscathed | ||
9885 | fmov.l USER_FPCR(%a6),%fpcr | ||
9886 | fmov.s &0x3f800000,%fp1 # load +1 | ||
9887 | fdiv.s &0x00000000,%fp1 # +1 / 0 | ||
9888 | rts | ||
9889 | |||
9890 | ######################################################################### | ||
9891 | # XDEF **************************************************************** # | ||
9892 | # t_operr(): Handle 060FPLSP OPERR exception during emulation. # | ||
9893 | # # | ||
9894 | # This routine is used by the 060FPLSP package. # | ||
9895 | # # | ||
9896 | # XREF **************************************************************** # | ||
9897 | # None. # | ||
9898 | # # | ||
9899 | # INPUT *************************************************************** # | ||
9900 | # fp1 = source operand # | ||
9901 | # # | ||
9902 | # OUTPUT ************************************************************** # | ||
9903 | # fp0 = default result # | ||
9904 | # fp1 = unchanged # | ||
9905 | # # | ||
9906 | # ALGORITHM *********************************************************** # | ||
9907 | # An operand error should occur as the result of transcendental # | ||
9908 | # emulation in the 060FPLSP. If OPERR is disabled, just return a NAN # | ||
9909 | # in fp0. If OPERR is enabled, return the dst operand unscathed in fp0 # | ||
9910 | # and the source operand in fp1. Use fp2 to create an OPERR exception # | ||
9911 | # so that the operating system can log the event. # | ||
9912 | # # | ||
9913 | ######################################################################### | ||
9914 | |||
9915 | global t_operr | ||
9916 | t_operr: | ||
9917 | ori.l &opnan_mask,USER_FPSR(%a6) # set NAN/OPERR/AIOP | ||
9918 | |||
9919 | btst &operr_bit,FPCR_ENABLE(%a6) | ||
9920 | bne.b operr_ena | ||
9921 | |||
9922 | # operr is disabled. return a QNAN in fp0 | ||
9923 | fmovm.x qnan(%pc),&0x80 # return QNAN | ||
9924 | rts | ||
9925 | |||
9926 | # operr is enabled. create an operr exception so the user can record it | ||
9927 | # but use fp2 instead. return the dst operand unscathed in fp0. | ||
9928 | operr_ena: | ||
9929 | fmovm.x EXC_FP0(%a6),&0x80 # return fp0 unscathed | ||
9930 | fmov.l USER_FPCR(%a6),%fpcr | ||
9931 | fmovm.x &0x04,-(%sp) # save fp2 | ||
9932 | fmov.s &0x7f800000,%fp2 # load +INF | ||
9933 | fmul.s &0x00000000,%fp2 # +INF x 0 | ||
9934 | fmovm.x (%sp)+,&0x20 # restore fp2 | ||
9935 | rts | ||
9936 | |||
9937 | pls_huge: | ||
9938 | long 0x7ffe0000,0xffffffff,0xffffffff | ||
9939 | mns_huge: | ||
9940 | long 0xfffe0000,0xffffffff,0xffffffff | ||
9941 | pls_tiny: | ||
9942 | long 0x00000000,0x80000000,0x00000000 | ||
9943 | mns_tiny: | ||
9944 | long 0x80000000,0x80000000,0x00000000 | ||
9945 | |||
9946 | ######################################################################### | ||
9947 | # XDEF **************************************************************** # | ||
9948 | # t_unfl(): Handle 060FPLSP underflow exception during emulation. # | ||
9949 | # t_unfl2(): Handle 060FPLSP underflow exception during # | ||
9950 | # emulation. result always positive. # | ||
9951 | # # | ||
9952 | # This routine is used by the 060FPLSP package. # | ||
9953 | # # | ||
9954 | # XREF **************************************************************** # | ||
9955 | # None. # | ||
9956 | # # | ||
9957 | # INPUT *************************************************************** # | ||
9958 | # a0 = pointer to extended precision source operand # | ||
9959 | # # | ||
9960 | # OUTPUT ************************************************************** # | ||
9961 | # fp0 = default underflow result # | ||
9962 | # # | ||
9963 | # ALGORITHM *********************************************************** # | ||
9964 | # An underflow should occur as the result of transcendental # | ||
9965 | # emulation in the 060FPLSP. Create an underflow by using "fmul" # | ||
9966 | # and two very small numbers of appropriate sign so the operating # | ||
9967 | # system can log the event. # | ||
9968 | # # | ||
9969 | ######################################################################### | ||
9970 | |||
9971 | global t_unfl | ||
9972 | t_unfl: | ||
9973 | tst.b SRC_EX(%a0) | ||
9974 | bpl.b unf_pos | ||
9975 | |||
9976 | global t_unfl2 | ||
9977 | t_unfl2: | ||
9978 | ori.l &unfinx_mask+neg_mask,USER_FPSR(%a6) # set N/UNFL/INEX2/AUNFL/AINEX | ||
9979 | |||
9980 | fmov.l USER_FPCR(%a6),%fpcr | ||
9981 | fmovm.x mns_tiny(%pc),&0x80 | ||
9982 | fmul.x pls_tiny(%pc),%fp0 | ||
9983 | |||
9984 | fmov.l %fpsr,%d0 | ||
9985 | rol.l &0x8,%d0 | ||
9986 | mov.b %d0,FPSR_CC(%a6) | ||
9987 | rts | ||
9988 | unf_pos: | ||
9989 | ori.w &unfinx_mask,FPSR_EXCEPT(%a6) # set UNFL/INEX2/AUNFL/AINEX | ||
9990 | |||
9991 | fmov.l USER_FPCR(%a6),%fpcr | ||
9992 | fmovm.x pls_tiny(%pc),&0x80 | ||
9993 | fmul.x %fp0,%fp0 | ||
9994 | |||
9995 | fmov.l %fpsr,%d0 | ||
9996 | rol.l &0x8,%d0 | ||
9997 | mov.b %d0,FPSR_CC(%a6) | ||
9998 | rts | ||
9999 | |||
10000 | ######################################################################### | ||
10001 | # XDEF **************************************************************** # | ||
10002 | # t_ovfl(): Handle 060FPLSP overflow exception during emulation. # | ||
10003 | # (monadic) # | ||
10004 | # t_ovfl2(): Handle 060FPLSP overflow exception during # | ||
10005 | # emulation. result always positive. (dyadic) # | ||
10006 | # t_ovfl_sc(): Handle 060FPLSP overflow exception during # | ||
10007 | # emulation for "fscale". # | ||
10008 | # # | ||
10009 | # This routine is used by the 060FPLSP package. # | ||
10010 | # # | ||
10011 | # XREF **************************************************************** # | ||
10012 | # None. # | ||
10013 | # # | ||
10014 | # INPUT *************************************************************** # | ||
10015 | # a0 = pointer to extended precision source operand # | ||
10016 | # # | ||
10017 | # OUTPUT ************************************************************** # | ||
10018 | # fp0 = default underflow result # | ||
10019 | # # | ||
10020 | # ALGORITHM *********************************************************** # | ||
10021 | # An overflow should occur as the result of transcendental # | ||
10022 | # emulation in the 060FPLSP. Create an overflow by using "fmul" # | ||
10023 | # and two very lareg numbers of appropriate sign so the operating # | ||
10024 | # system can log the event. # | ||
10025 | # For t_ovfl_sc() we take special care not to lose the INEX2 bit. # | ||
10026 | # # | ||
10027 | ######################################################################### | ||
10028 | |||
10029 | global t_ovfl_sc | ||
10030 | t_ovfl_sc: | ||
10031 | ori.l &ovfl_inx_mask,USER_FPSR(%a6) # set OVFL/AOVFL/AINEX | ||
10032 | |||
10033 | mov.b %d0,%d1 # fetch rnd prec,mode | ||
10034 | andi.b &0xc0,%d1 # extract prec | ||
10035 | beq.w ovfl_work | ||
10036 | |||
10037 | # dst op is a DENORM. we have to normalize the mantissa to see if the | ||
10038 | # result would be inexact for the given precision. make a copy of the | ||
10039 | # dst so we don't screw up the version passed to us. | ||
10040 | mov.w LOCAL_EX(%a0),FP_SCR0_EX(%a6) | ||
10041 | mov.l LOCAL_HI(%a0),FP_SCR0_HI(%a6) | ||
10042 | mov.l LOCAL_LO(%a0),FP_SCR0_LO(%a6) | ||
10043 | lea FP_SCR0(%a6),%a0 # pass ptr to FP_SCR0 | ||
10044 | movm.l &0xc080,-(%sp) # save d0-d1/a0 | ||
10045 | bsr.l norm # normalize mantissa | ||
10046 | movm.l (%sp)+,&0x0103 # restore d0-d1/a0 | ||
10047 | |||
10048 | cmpi.b %d1,&0x40 # is precision sgl? | ||
10049 | bne.b ovfl_sc_dbl # no; dbl | ||
10050 | ovfl_sc_sgl: | ||
10051 | tst.l LOCAL_LO(%a0) # is lo lw of sgl set? | ||
10052 | bne.b ovfl_sc_inx # yes | ||
10053 | tst.b 3+LOCAL_HI(%a0) # is lo byte of hi lw set? | ||
10054 | bne.b ovfl_sc_inx # yes | ||
10055 | bra.w ovfl_work # don't set INEX2 | ||
10056 | ovfl_sc_dbl: | ||
10057 | mov.l LOCAL_LO(%a0),%d1 # are any of lo 11 bits of | ||
10058 | andi.l &0x7ff,%d1 # dbl mantissa set? | ||
10059 | beq.w ovfl_work # no; don't set INEX2 | ||
10060 | ovfl_sc_inx: | ||
10061 | ori.l &inex2_mask,USER_FPSR(%a6) # set INEX2 | ||
10062 | bra.b ovfl_work # continue | ||
10063 | |||
10064 | global t_ovfl | ||
10065 | t_ovfl: | ||
10066 | ori.w &ovfinx_mask,FPSR_EXCEPT(%a6) # set OVFL/INEX2/AOVFL/AINEX | ||
10067 | ovfl_work: | ||
10068 | tst.b SRC_EX(%a0) | ||
10069 | bpl.b ovfl_p | ||
10070 | ovfl_m: | ||
10071 | fmov.l USER_FPCR(%a6),%fpcr | ||
10072 | fmovm.x mns_huge(%pc),&0x80 | ||
10073 | fmul.x pls_huge(%pc),%fp0 | ||
10074 | |||
10075 | fmov.l %fpsr,%d0 | ||
10076 | rol.l &0x8,%d0 | ||
10077 | ori.b &neg_mask,%d0 | ||
10078 | mov.b %d0,FPSR_CC(%a6) | ||
10079 | rts | ||
10080 | ovfl_p: | ||
10081 | fmov.l USER_FPCR(%a6),%fpcr | ||
10082 | fmovm.x pls_huge(%pc),&0x80 | ||
10083 | fmul.x pls_huge(%pc),%fp0 | ||
10084 | |||
10085 | fmov.l %fpsr,%d0 | ||
10086 | rol.l &0x8,%d0 | ||
10087 | mov.b %d0,FPSR_CC(%a6) | ||
10088 | rts | ||
10089 | |||
10090 | global t_ovfl2 | ||
10091 | t_ovfl2: | ||
10092 | ori.w &ovfinx_mask,FPSR_EXCEPT(%a6) # set OVFL/INEX2/AOVFL/AINEX | ||
10093 | fmov.l USER_FPCR(%a6),%fpcr | ||
10094 | fmovm.x pls_huge(%pc),&0x80 | ||
10095 | fmul.x pls_huge(%pc),%fp0 | ||
10096 | |||
10097 | fmov.l %fpsr,%d0 | ||
10098 | rol.l &0x8,%d0 | ||
10099 | mov.b %d0,FPSR_CC(%a6) | ||
10100 | rts | ||
10101 | |||
10102 | ######################################################################### | ||
10103 | # XDEF **************************************************************** # | ||
10104 | # t_catch(): Handle 060FPLSP OVFL,UNFL,or INEX2 exception during # | ||
10105 | # emulation. # | ||
10106 | # t_catch2(): Handle 060FPLSP OVFL,UNFL,or INEX2 exception during # | ||
10107 | # emulation. # | ||
10108 | # # | ||
10109 | # These routines are used by the 060FPLSP package. # | ||
10110 | # # | ||
10111 | # XREF **************************************************************** # | ||
10112 | # None. # | ||
10113 | # # | ||
10114 | # INPUT *************************************************************** # | ||
10115 | # fp0 = default underflow or overflow result # | ||
10116 | # # | ||
10117 | # OUTPUT ************************************************************** # | ||
10118 | # fp0 = default result # | ||
10119 | # # | ||
10120 | # ALGORITHM *********************************************************** # | ||
10121 | # If an overflow or underflow occurred during the last # | ||
10122 | # instruction of transcendental 060FPLSP emulation, then it has already # | ||
10123 | # occurred and has been logged. Now we need to see if an inexact # | ||
10124 | # exception should occur. # | ||
10125 | # # | ||
10126 | ######################################################################### | ||
10127 | |||
10128 | global t_catch2 | ||
10129 | t_catch2: | ||
10130 | fmov.l %fpsr,%d0 | ||
10131 | or.l %d0,USER_FPSR(%a6) | ||
10132 | bra.b inx2_work | ||
10133 | |||
10134 | global t_catch | ||
10135 | t_catch: | ||
10136 | fmov.l %fpsr,%d0 | ||
10137 | or.l %d0,USER_FPSR(%a6) | ||
10138 | |||
10139 | ######################################################################### | ||
10140 | # XDEF **************************************************************** # | ||
10141 | # t_inx2(): Handle inexact 060FPLSP exception during emulation. # | ||
10142 | # t_pinx2(): Handle inexact 060FPLSP exception for "+" results. # | ||
10143 | # t_minx2(): Handle inexact 060FPLSP exception for "-" results. # | ||
10144 | # # | ||
10145 | # XREF **************************************************************** # | ||
10146 | # None. # | ||
10147 | # # | ||
10148 | # INPUT *************************************************************** # | ||
10149 | # fp0 = default result # | ||
10150 | # # | ||
10151 | # OUTPUT ************************************************************** # | ||
10152 | # fp0 = default result # | ||
10153 | # # | ||
10154 | # ALGORITHM *********************************************************** # | ||
10155 | # The last instruction of transcendental emulation for the # | ||
10156 | # 060FPLSP should be inexact. So, if inexact is enabled, then we create # | ||
10157 | # the event here by adding a large and very small number together # | ||
10158 | # so that the operating system can log the event. # | ||
10159 | # Must check, too, if the result was zero, in which case we just # | ||
10160 | # set the FPSR bits and return. # | ||
10161 | # # | ||
10162 | ######################################################################### | ||
10163 | |||
10164 | global t_inx2 | ||
10165 | t_inx2: | ||
10166 | fblt.w t_minx2 | ||
10167 | fbeq.w inx2_zero | ||
10168 | |||
10169 | global t_pinx2 | ||
10170 | t_pinx2: | ||
10171 | ori.w &inx2a_mask,FPSR_EXCEPT(%a6) # set INEX2/AINEX | ||
10172 | bra.b inx2_work | ||
10173 | |||
10174 | global t_minx2 | ||
10175 | t_minx2: | ||
10176 | ori.l &inx2a_mask+neg_mask,USER_FPSR(%a6) | ||
10177 | |||
10178 | inx2_work: | ||
10179 | btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled? | ||
10180 | bne.b inx2_work_ena # yes | ||
10181 | rts | ||
10182 | inx2_work_ena: | ||
10183 | fmov.l USER_FPCR(%a6),%fpcr # insert user's exceptions | ||
10184 | fmov.s &0x3f800000,%fp1 # load +1 | ||
10185 | fadd.x pls_tiny(%pc),%fp1 # cause exception | ||
10186 | rts | ||
10187 | |||
10188 | inx2_zero: | ||
10189 | mov.b &z_bmask,FPSR_CC(%a6) | ||
10190 | ori.w &inx2a_mask,2+USER_FPSR(%a6) # set INEX/AINEX | ||
10191 | rts | ||
10192 | |||
10193 | ######################################################################### | ||
10194 | # XDEF **************************************************************** # | ||
10195 | # t_extdnrm(): Handle DENORM inputs in 060FPLSP. # | ||
10196 | # t_resdnrm(): Handle DENORM inputs in 060FPLSP for "fscale". # | ||
10197 | # # | ||
10198 | # This routine is used by the 060FPLSP package. # | ||
10199 | # # | ||
10200 | # XREF **************************************************************** # | ||
10201 | # None. # | ||
10202 | # # | ||
10203 | # INPUT *************************************************************** # | ||
10204 | # a0 = pointer to extended precision input operand # | ||
10205 | # # | ||
10206 | # OUTPUT ************************************************************** # | ||
10207 | # fp0 = default result # | ||
10208 | # # | ||
10209 | # ALGORITHM *********************************************************** # | ||
10210 | # For all functions that have a denormalized input and that # | ||
10211 | # f(x)=x, this is the entry point. # | ||
10212 | # DENORM value is moved using "fmove" which triggers an exception # | ||
10213 | # if enabled so the operating system can log the event. # | ||
10214 | # # | ||
10215 | ######################################################################### | ||
10216 | |||
10217 | global t_extdnrm | ||
10218 | t_extdnrm: | ||
10219 | fmov.l USER_FPCR(%a6),%fpcr | ||
10220 | fmov.x SRC_EX(%a0),%fp0 | ||
10221 | fmov.l %fpsr,%d0 | ||
10222 | ori.l &unfinx_mask,%d0 | ||
10223 | or.l %d0,USER_FPSR(%a6) | ||
10224 | rts | ||
10225 | |||
10226 | global t_resdnrm | ||
10227 | t_resdnrm: | ||
10228 | fmov.l USER_FPCR(%a6),%fpcr | ||
10229 | fmov.x SRC_EX(%a0),%fp0 | ||
10230 | fmov.l %fpsr,%d0 | ||
10231 | or.l %d0,USER_FPSR(%a6) | ||
10232 | rts | ||
10233 | |||
10234 | ########################################## | ||
10235 | |||
10236 | # | ||
10237 | # sto_cos: | ||
10238 | # This is used by fsincos library emulation. The correct | ||
10239 | # values are already in fp0 and fp1 so we do nothing here. | ||
10240 | # | ||
10241 | global sto_cos | ||
10242 | sto_cos: | ||
10243 | rts | ||
10244 | |||
10245 | ########################################## | ||
10246 | |||
10247 | # | ||
10248 | # dst_qnan --- force result when destination is a NaN | ||
10249 | # | ||
10250 | global dst_qnan | ||
10251 | dst_qnan: | ||
10252 | fmov.x DST(%a1),%fp0 | ||
10253 | tst.b DST_EX(%a1) | ||
10254 | bmi.b dst_qnan_m | ||
10255 | dst_qnan_p: | ||
10256 | mov.b &nan_bmask,FPSR_CC(%a6) | ||
10257 | rts | ||
10258 | dst_qnan_m: | ||
10259 | mov.b &nan_bmask+neg_bmask,FPSR_CC(%a6) | ||
10260 | rts | ||
10261 | |||
10262 | # | ||
10263 | # src_qnan --- force result when source is a NaN | ||
10264 | # | ||
10265 | global src_qnan | ||
10266 | src_qnan: | ||
10267 | fmov.x SRC(%a0),%fp0 | ||
10268 | tst.b SRC_EX(%a0) | ||
10269 | bmi.b src_qnan_m | ||
10270 | src_qnan_p: | ||
10271 | mov.b &nan_bmask,FPSR_CC(%a6) | ||
10272 | rts | ||
10273 | src_qnan_m: | ||
10274 | mov.b &nan_bmask+neg_bmask,FPSR_CC(%a6) | ||
10275 | rts | ||
10276 | |||
10277 | ########################################## | ||
10278 | |||
10279 | # | ||
10280 | # Native instruction support | ||
10281 | # | ||
10282 | # Some systems may need entry points even for 68060 native | ||
10283 | # instructions. These routines are provided for | ||
10284 | # convenience. | ||
10285 | # | ||
10286 | global _fadds_ | ||
10287 | _fadds_: | ||
10288 | fmov.l %fpcr,-(%sp) # save fpcr | ||
10289 | fmov.l &0x00000000,%fpcr # clear fpcr for load | ||
10290 | fmov.s 0x8(%sp),%fp0 # load sgl dst | ||
10291 | fmov.l (%sp)+,%fpcr # restore fpcr | ||
10292 | fadd.s 0x8(%sp),%fp0 # fadd w/ sgl src | ||
10293 | rts | ||
10294 | |||
10295 | global _faddd_ | ||
10296 | _faddd_: | ||
10297 | fmov.l %fpcr,-(%sp) # save fpcr | ||
10298 | fmov.l &0x00000000,%fpcr # clear fpcr for load | ||
10299 | fmov.d 0x8(%sp),%fp0 # load dbl dst | ||
10300 | fmov.l (%sp)+,%fpcr # restore fpcr | ||
10301 | fadd.d 0xc(%sp),%fp0 # fadd w/ dbl src | ||
10302 | rts | ||
10303 | |||
10304 | global _faddx_ | ||
10305 | _faddx_: | ||
10306 | fmovm.x 0x4(%sp),&0x80 # load ext dst | ||
10307 | fadd.x 0x10(%sp),%fp0 # fadd w/ ext src | ||
10308 | rts | ||
10309 | |||
10310 | global _fsubs_ | ||
10311 | _fsubs_: | ||
10312 | fmov.l %fpcr,-(%sp) # save fpcr | ||
10313 | fmov.l &0x00000000,%fpcr # clear fpcr for load | ||
10314 | fmov.s 0x8(%sp),%fp0 # load sgl dst | ||
10315 | fmov.l (%sp)+,%fpcr # restore fpcr | ||
10316 | fsub.s 0x8(%sp),%fp0 # fsub w/ sgl src | ||
10317 | rts | ||
10318 | |||
10319 | global _fsubd_ | ||
10320 | _fsubd_: | ||
10321 | fmov.l %fpcr,-(%sp) # save fpcr | ||
10322 | fmov.l &0x00000000,%fpcr # clear fpcr for load | ||
10323 | fmov.d 0x8(%sp),%fp0 # load dbl dst | ||
10324 | fmov.l (%sp)+,%fpcr # restore fpcr | ||
10325 | fsub.d 0xc(%sp),%fp0 # fsub w/ dbl src | ||
10326 | rts | ||
10327 | |||
10328 | global _fsubx_ | ||
10329 | _fsubx_: | ||
10330 | fmovm.x 0x4(%sp),&0x80 # load ext dst | ||
10331 | fsub.x 0x10(%sp),%fp0 # fsub w/ ext src | ||
10332 | rts | ||
10333 | |||
10334 | global _fmuls_ | ||
10335 | _fmuls_: | ||
10336 | fmov.l %fpcr,-(%sp) # save fpcr | ||
10337 | fmov.l &0x00000000,%fpcr # clear fpcr for load | ||
10338 | fmov.s 0x8(%sp),%fp0 # load sgl dst | ||
10339 | fmov.l (%sp)+,%fpcr # restore fpcr | ||
10340 | fmul.s 0x8(%sp),%fp0 # fmul w/ sgl src | ||
10341 | rts | ||
10342 | |||
10343 | global _fmuld_ | ||
10344 | _fmuld_: | ||
10345 | fmov.l %fpcr,-(%sp) # save fpcr | ||
10346 | fmov.l &0x00000000,%fpcr # clear fpcr for load | ||
10347 | fmov.d 0x8(%sp),%fp0 # load dbl dst | ||
10348 | fmov.l (%sp)+,%fpcr # restore fpcr | ||
10349 | fmul.d 0xc(%sp),%fp0 # fmul w/ dbl src | ||
10350 | rts | ||
10351 | |||
10352 | global _fmulx_ | ||
10353 | _fmulx_: | ||
10354 | fmovm.x 0x4(%sp),&0x80 # load ext dst | ||
10355 | fmul.x 0x10(%sp),%fp0 # fmul w/ ext src | ||
10356 | rts | ||
10357 | |||
10358 | global _fdivs_ | ||
10359 | _fdivs_: | ||
10360 | fmov.l %fpcr,-(%sp) # save fpcr | ||
10361 | fmov.l &0x00000000,%fpcr # clear fpcr for load | ||
10362 | fmov.s 0x8(%sp),%fp0 # load sgl dst | ||
10363 | fmov.l (%sp)+,%fpcr # restore fpcr | ||
10364 | fdiv.s 0x8(%sp),%fp0 # fdiv w/ sgl src | ||
10365 | rts | ||
10366 | |||
10367 | global _fdivd_ | ||
10368 | _fdivd_: | ||
10369 | fmov.l %fpcr,-(%sp) # save fpcr | ||
10370 | fmov.l &0x00000000,%fpcr # clear fpcr for load | ||
10371 | fmov.d 0x8(%sp),%fp0 # load dbl dst | ||
10372 | fmov.l (%sp)+,%fpcr # restore fpcr | ||
10373 | fdiv.d 0xc(%sp),%fp0 # fdiv w/ dbl src | ||
10374 | rts | ||
10375 | |||
10376 | global _fdivx_ | ||
10377 | _fdivx_: | ||
10378 | fmovm.x 0x4(%sp),&0x80 # load ext dst | ||
10379 | fdiv.x 0x10(%sp),%fp0 # fdiv w/ ext src | ||
10380 | rts | ||
10381 | |||
10382 | global _fabss_ | ||
10383 | _fabss_: | ||
10384 | fabs.s 0x4(%sp),%fp0 # fabs w/ sgl src | ||
10385 | rts | ||
10386 | |||
10387 | global _fabsd_ | ||
10388 | _fabsd_: | ||
10389 | fabs.d 0x4(%sp),%fp0 # fabs w/ dbl src | ||
10390 | rts | ||
10391 | |||
10392 | global _fabsx_ | ||
10393 | _fabsx_: | ||
10394 | fabs.x 0x4(%sp),%fp0 # fabs w/ ext src | ||
10395 | rts | ||
10396 | |||
10397 | global _fnegs_ | ||
10398 | _fnegs_: | ||
10399 | fneg.s 0x4(%sp),%fp0 # fneg w/ sgl src | ||
10400 | rts | ||
10401 | |||
10402 | global _fnegd_ | ||
10403 | _fnegd_: | ||
10404 | fneg.d 0x4(%sp),%fp0 # fneg w/ dbl src | ||
10405 | rts | ||
10406 | |||
10407 | global _fnegx_ | ||
10408 | _fnegx_: | ||
10409 | fneg.x 0x4(%sp),%fp0 # fneg w/ ext src | ||
10410 | rts | ||
10411 | |||
10412 | global _fsqrts_ | ||
10413 | _fsqrts_: | ||
10414 | fsqrt.s 0x4(%sp),%fp0 # fsqrt w/ sgl src | ||
10415 | rts | ||
10416 | |||
10417 | global _fsqrtd_ | ||
10418 | _fsqrtd_: | ||
10419 | fsqrt.d 0x4(%sp),%fp0 # fsqrt w/ dbl src | ||
10420 | rts | ||
10421 | |||
10422 | global _fsqrtx_ | ||
10423 | _fsqrtx_: | ||
10424 | fsqrt.x 0x4(%sp),%fp0 # fsqrt w/ ext src | ||
10425 | rts | ||
10426 | |||
10427 | global _fints_ | ||
10428 | _fints_: | ||
10429 | fint.s 0x4(%sp),%fp0 # fint w/ sgl src | ||
10430 | rts | ||
10431 | |||
10432 | global _fintd_ | ||
10433 | _fintd_: | ||
10434 | fint.d 0x4(%sp),%fp0 # fint w/ dbl src | ||
10435 | rts | ||
10436 | |||
10437 | global _fintx_ | ||
10438 | _fintx_: | ||
10439 | fint.x 0x4(%sp),%fp0 # fint w/ ext src | ||
10440 | rts | ||
10441 | |||
10442 | global _fintrzs_ | ||
10443 | _fintrzs_: | ||
10444 | fintrz.s 0x4(%sp),%fp0 # fintrz w/ sgl src | ||
10445 | rts | ||
10446 | |||
10447 | global _fintrzd_ | ||
10448 | _fintrzd_: | ||
10449 | fintrz.d 0x4(%sp),%fp0 # fintrx w/ dbl src | ||
10450 | rts | ||
10451 | |||
10452 | global _fintrzx_ | ||
10453 | _fintrzx_: | ||
10454 | fintrz.x 0x4(%sp),%fp0 # fintrz w/ ext src | ||
10455 | rts | ||
10456 | |||
10457 | ######################################################################## | ||
10458 | |||
10459 | ######################################################################### | ||
10460 | # src_zero(): Return signed zero according to sign of src operand. # | ||
10461 | ######################################################################### | ||
10462 | global src_zero | ||
10463 | src_zero: | ||
10464 | tst.b SRC_EX(%a0) # get sign of src operand | ||
10465 | bmi.b ld_mzero # if neg, load neg zero | ||
10466 | |||
10467 | # | ||
10468 | # ld_pzero(): return a positive zero. | ||
10469 | # | ||
10470 | global ld_pzero | ||
10471 | ld_pzero: | ||
10472 | fmov.s &0x00000000,%fp0 # load +0 | ||
10473 | mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit | ||
10474 | rts | ||
10475 | |||
10476 | # ld_mzero(): return a negative zero. | ||
10477 | global ld_mzero | ||
10478 | ld_mzero: | ||
10479 | fmov.s &0x80000000,%fp0 # load -0 | ||
10480 | mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set 'N','Z' ccode bits | ||
10481 | rts | ||
10482 | |||
10483 | ######################################################################### | ||
10484 | # dst_zero(): Return signed zero according to sign of dst operand. # | ||
10485 | ######################################################################### | ||
10486 | global dst_zero | ||
10487 | dst_zero: | ||
10488 | tst.b DST_EX(%a1) # get sign of dst operand | ||
10489 | bmi.b ld_mzero # if neg, load neg zero | ||
10490 | bra.b ld_pzero # load positive zero | ||
10491 | |||
10492 | ######################################################################### | ||
10493 | # src_inf(): Return signed inf according to sign of src operand. # | ||
10494 | ######################################################################### | ||
10495 | global src_inf | ||
10496 | src_inf: | ||
10497 | tst.b SRC_EX(%a0) # get sign of src operand | ||
10498 | bmi.b ld_minf # if negative branch | ||
10499 | |||
10500 | # | ||
10501 | # ld_pinf(): return a positive infinity. | ||
10502 | # | ||
10503 | global ld_pinf | ||
10504 | ld_pinf: | ||
10505 | fmov.s &0x7f800000,%fp0 # load +INF | ||
10506 | mov.b &inf_bmask,FPSR_CC(%a6) # set 'INF' ccode bit | ||
10507 | rts | ||
10508 | |||
10509 | # | ||
10510 | # ld_minf():return a negative infinity. | ||
10511 | # | ||
10512 | global ld_minf | ||
10513 | ld_minf: | ||
10514 | fmov.s &0xff800000,%fp0 # load -INF | ||
10515 | mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits | ||
10516 | rts | ||
10517 | |||
10518 | ######################################################################### | ||
10519 | # dst_inf(): Return signed inf according to sign of dst operand. # | ||
10520 | ######################################################################### | ||
10521 | global dst_inf | ||
10522 | dst_inf: | ||
10523 | tst.b DST_EX(%a1) # get sign of dst operand | ||
10524 | bmi.b ld_minf # if negative branch | ||
10525 | bra.b ld_pinf | ||
10526 | |||
10527 | global szr_inf | ||
10528 | ################################################################# | ||
10529 | # szr_inf(): Return +ZERO for a negative src operand or # | ||
10530 | # +INF for a positive src operand. # | ||
10531 | # Routine used for fetox, ftwotox, and ftentox. # | ||
10532 | ################################################################# | ||
10533 | szr_inf: | ||
10534 | tst.b SRC_EX(%a0) # check sign of source | ||
10535 | bmi.b ld_pzero | ||
10536 | bra.b ld_pinf | ||
10537 | |||
10538 | ######################################################################### | ||
10539 | # sopr_inf(): Return +INF for a positive src operand or # | ||
10540 | # jump to operand error routine for a negative src operand. # | ||
10541 | # Routine used for flogn, flognp1, flog10, and flog2. # | ||
10542 | ######################################################################### | ||
10543 | global sopr_inf | ||
10544 | sopr_inf: | ||
10545 | tst.b SRC_EX(%a0) # check sign of source | ||
10546 | bmi.w t_operr | ||
10547 | bra.b ld_pinf | ||
10548 | |||
10549 | ################################################################# | ||
10550 | # setoxm1i(): Return minus one for a negative src operand or # | ||
10551 | # positive infinity for a positive src operand. # | ||
10552 | # Routine used for fetoxm1. # | ||
10553 | ################################################################# | ||
10554 | global setoxm1i | ||
10555 | setoxm1i: | ||
10556 | tst.b SRC_EX(%a0) # check sign of source | ||
10557 | bmi.b ld_mone | ||
10558 | bra.b ld_pinf | ||
10559 | |||
10560 | ######################################################################### | ||
10561 | # src_one(): Return signed one according to sign of src operand. # | ||
10562 | ######################################################################### | ||
10563 | global src_one | ||
10564 | src_one: | ||
10565 | tst.b SRC_EX(%a0) # check sign of source | ||
10566 | bmi.b ld_mone | ||
10567 | |||
10568 | # | ||
10569 | # ld_pone(): return positive one. | ||
10570 | # | ||
10571 | global ld_pone | ||
10572 | ld_pone: | ||
10573 | fmov.s &0x3f800000,%fp0 # load +1 | ||
10574 | clr.b FPSR_CC(%a6) | ||
10575 | rts | ||
10576 | |||
10577 | # | ||
10578 | # ld_mone(): return negative one. | ||
10579 | # | ||
10580 | global ld_mone | ||
10581 | ld_mone: | ||
10582 | fmov.s &0xbf800000,%fp0 # load -1 | ||
10583 | mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit | ||
10584 | rts | ||
10585 | |||
10586 | ppiby2: long 0x3fff0000, 0xc90fdaa2, 0x2168c235 | ||
10587 | mpiby2: long 0xbfff0000, 0xc90fdaa2, 0x2168c235 | ||
10588 | |||
10589 | ################################################################# | ||
10590 | # spi_2(): Return signed PI/2 according to sign of src operand. # | ||
10591 | ################################################################# | ||
10592 | global spi_2 | ||
10593 | spi_2: | ||
10594 | tst.b SRC_EX(%a0) # check sign of source | ||
10595 | bmi.b ld_mpi2 | ||
10596 | |||
10597 | # | ||
10598 | # ld_ppi2(): return positive PI/2. | ||
10599 | # | ||
10600 | global ld_ppi2 | ||
10601 | ld_ppi2: | ||
10602 | fmov.l %d0,%fpcr | ||
10603 | fmov.x ppiby2(%pc),%fp0 # load +pi/2 | ||
10604 | bra.w t_pinx2 # set INEX2 | ||
10605 | |||
10606 | # | ||
10607 | # ld_mpi2(): return negative PI/2. | ||
10608 | # | ||
10609 | global ld_mpi2 | ||
10610 | ld_mpi2: | ||
10611 | fmov.l %d0,%fpcr | ||
10612 | fmov.x mpiby2(%pc),%fp0 # load -pi/2 | ||
10613 | bra.w t_minx2 # set INEX2 | ||
10614 | |||
10615 | #################################################### | ||
10616 | # The following routines give support for fsincos. # | ||
10617 | #################################################### | ||
10618 | |||
10619 | # | ||
10620 | # ssincosz(): When the src operand is ZERO, store a one in the | ||
10621 | # cosine register and return a ZERO in fp0 w/ the same sign | ||
10622 | # as the src operand. | ||
10623 | # | ||
10624 | global ssincosz | ||
10625 | ssincosz: | ||
10626 | fmov.s &0x3f800000,%fp1 | ||
10627 | tst.b SRC_EX(%a0) # test sign | ||
10628 | bpl.b sincoszp | ||
10629 | fmov.s &0x80000000,%fp0 # return sin result in fp0 | ||
10630 | mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) | ||
10631 | rts | ||
10632 | sincoszp: | ||
10633 | fmov.s &0x00000000,%fp0 # return sin result in fp0 | ||
10634 | mov.b &z_bmask,FPSR_CC(%a6) | ||
10635 | rts | ||
10636 | |||
10637 | # | ||
10638 | # ssincosi(): When the src operand is INF, store a QNAN in the cosine | ||
10639 | # register and jump to the operand error routine for negative | ||
10640 | # src operands. | ||
10641 | # | ||
10642 | global ssincosi | ||
10643 | ssincosi: | ||
10644 | fmov.x qnan(%pc),%fp1 # load NAN | ||
10645 | bra.w t_operr | ||
10646 | |||
10647 | # | ||
10648 | # ssincosqnan(): When the src operand is a QNAN, store the QNAN in the cosine | ||
10649 | # register and branch to the src QNAN routine. | ||
10650 | # | ||
10651 | global ssincosqnan | ||
10652 | ssincosqnan: | ||
10653 | fmov.x LOCAL_EX(%a0),%fp1 | ||
10654 | bra.w src_qnan | ||
10655 | |||
10656 | ######################################################################## | ||
10657 | |||
10658 | global smod_sdnrm | ||
10659 | global smod_snorm | ||
10660 | smod_sdnrm: | ||
10661 | smod_snorm: | ||
10662 | mov.b DTAG(%a6),%d1 | ||
10663 | beq.l smod | ||
10664 | cmpi.b %d1,&ZERO | ||
10665 | beq.w smod_zro | ||
10666 | cmpi.b %d1,&INF | ||
10667 | beq.l t_operr | ||
10668 | cmpi.b %d1,&DENORM | ||
10669 | beq.l smod | ||
10670 | bra.l dst_qnan | ||
10671 | |||
10672 | global smod_szero | ||
10673 | smod_szero: | ||
10674 | mov.b DTAG(%a6),%d1 | ||
10675 | beq.l t_operr | ||
10676 | cmpi.b %d1,&ZERO | ||
10677 | beq.l t_operr | ||
10678 | cmpi.b %d1,&INF | ||
10679 | beq.l t_operr | ||
10680 | cmpi.b %d1,&DENORM | ||
10681 | beq.l t_operr | ||
10682 | bra.l dst_qnan | ||
10683 | |||
10684 | global smod_sinf | ||
10685 | smod_sinf: | ||
10686 | mov.b DTAG(%a6),%d1 | ||
10687 | beq.l smod_fpn | ||
10688 | cmpi.b %d1,&ZERO | ||
10689 | beq.l smod_zro | ||
10690 | cmpi.b %d1,&INF | ||
10691 | beq.l t_operr | ||
10692 | cmpi.b %d1,&DENORM | ||
10693 | beq.l smod_fpn | ||
10694 | bra.l dst_qnan | ||
10695 | |||
10696 | smod_zro: | ||
10697 | srem_zro: | ||
10698 | mov.b SRC_EX(%a0),%d1 # get src sign | ||
10699 | mov.b DST_EX(%a1),%d0 # get dst sign | ||
10700 | eor.b %d0,%d1 # get qbyte sign | ||
10701 | andi.b &0x80,%d1 | ||
10702 | mov.b %d1,FPSR_QBYTE(%a6) | ||
10703 | tst.b %d0 | ||
10704 | bpl.w ld_pzero | ||
10705 | bra.w ld_mzero | ||
10706 | |||
10707 | smod_fpn: | ||
10708 | srem_fpn: | ||
10709 | clr.b FPSR_QBYTE(%a6) | ||
10710 | mov.l %d0,-(%sp) | ||
10711 | mov.b SRC_EX(%a0),%d1 # get src sign | ||
10712 | mov.b DST_EX(%a1),%d0 # get dst sign | ||
10713 | eor.b %d0,%d1 # get qbyte sign | ||
10714 | andi.b &0x80,%d1 | ||
10715 | mov.b %d1,FPSR_QBYTE(%a6) | ||
10716 | cmpi.b DTAG(%a6),&DENORM | ||
10717 | bne.b smod_nrm | ||
10718 | lea DST(%a1),%a0 | ||
10719 | mov.l (%sp)+,%d0 | ||
10720 | bra t_resdnrm | ||
10721 | smod_nrm: | ||
10722 | fmov.l (%sp)+,%fpcr | ||
10723 | fmov.x DST(%a1),%fp0 | ||
10724 | tst.b DST_EX(%a1) | ||
10725 | bmi.b smod_nrm_neg | ||
10726 | rts | ||
10727 | |||
10728 | smod_nrm_neg: | ||
10729 | mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' code | ||
10730 | rts | ||
10731 | |||
10732 | ######################################################################### | ||
10733 | global srem_snorm | ||
10734 | global srem_sdnrm | ||
10735 | srem_sdnrm: | ||
10736 | srem_snorm: | ||
10737 | mov.b DTAG(%a6),%d1 | ||
10738 | beq.l srem | ||
10739 | cmpi.b %d1,&ZERO | ||
10740 | beq.w srem_zro | ||
10741 | cmpi.b %d1,&INF | ||
10742 | beq.l t_operr | ||
10743 | cmpi.b %d1,&DENORM | ||
10744 | beq.l srem | ||
10745 | bra.l dst_qnan | ||
10746 | |||
10747 | global srem_szero | ||
10748 | srem_szero: | ||
10749 | mov.b DTAG(%a6),%d1 | ||
10750 | beq.l t_operr | ||
10751 | cmpi.b %d1,&ZERO | ||
10752 | beq.l t_operr | ||
10753 | cmpi.b %d1,&INF | ||
10754 | beq.l t_operr | ||
10755 | cmpi.b %d1,&DENORM | ||
10756 | beq.l t_operr | ||
10757 | bra.l dst_qnan | ||
10758 | |||
10759 | global srem_sinf | ||
10760 | srem_sinf: | ||
10761 | mov.b DTAG(%a6),%d1 | ||
10762 | beq.w srem_fpn | ||
10763 | cmpi.b %d1,&ZERO | ||
10764 | beq.w srem_zro | ||
10765 | cmpi.b %d1,&INF | ||
10766 | beq.l t_operr | ||
10767 | cmpi.b %d1,&DENORM | ||
10768 | beq.l srem_fpn | ||
10769 | bra.l dst_qnan | ||
10770 | |||
10771 | ######################################################################### | ||
10772 | |||
10773 | global sscale_snorm | ||
10774 | global sscale_sdnrm | ||
10775 | sscale_snorm: | ||
10776 | sscale_sdnrm: | ||
10777 | mov.b DTAG(%a6),%d1 | ||
10778 | beq.l sscale | ||
10779 | cmpi.b %d1,&ZERO | ||
10780 | beq.l dst_zero | ||
10781 | cmpi.b %d1,&INF | ||
10782 | beq.l dst_inf | ||
10783 | cmpi.b %d1,&DENORM | ||
10784 | beq.l sscale | ||
10785 | bra.l dst_qnan | ||
10786 | |||
10787 | global sscale_szero | ||
10788 | sscale_szero: | ||
10789 | mov.b DTAG(%a6),%d1 | ||
10790 | beq.l sscale | ||
10791 | cmpi.b %d1,&ZERO | ||
10792 | beq.l dst_zero | ||
10793 | cmpi.b %d1,&INF | ||
10794 | beq.l dst_inf | ||
10795 | cmpi.b %d1,&DENORM | ||
10796 | beq.l sscale | ||
10797 | bra.l dst_qnan | ||
10798 | |||
10799 | global sscale_sinf | ||
10800 | sscale_sinf: | ||
10801 | mov.b DTAG(%a6),%d1 | ||
10802 | beq.l t_operr | ||
10803 | cmpi.b %d1,&QNAN | ||
10804 | beq.l dst_qnan | ||
10805 | bra.l t_operr | ||
10806 | |||
10807 | ######################################################################## | ||
10808 | |||
10809 | global sop_sqnan | ||
10810 | sop_sqnan: | ||
10811 | mov.b DTAG(%a6),%d1 | ||
10812 | cmpi.b %d1,&QNAN | ||
10813 | beq.l dst_qnan | ||
10814 | bra.l src_qnan | ||
10815 | |||
10816 | ######################################################################### | ||
10817 | # norm(): normalize the mantissa of an extended precision input. the # | ||
10818 | # input operand should not be normalized already. # | ||
10819 | # # | ||
10820 | # XDEF **************************************************************** # | ||
10821 | # norm() # | ||
10822 | # # | ||
10823 | # XREF **************************************************************** # | ||
10824 | # none # | ||
10825 | # # | ||
10826 | # INPUT *************************************************************** # | ||
10827 | # a0 = pointer fp extended precision operand to normalize # | ||
10828 | # # | ||
10829 | # OUTPUT ************************************************************** # | ||
10830 | # d0 = number of bit positions the mantissa was shifted # | ||
10831 | # a0 = the input operand's mantissa is normalized; the exponent # | ||
10832 | # is unchanged. # | ||
10833 | # # | ||
10834 | ######################################################################### | ||
10835 | global norm | ||
10836 | norm: | ||
10837 | mov.l %d2, -(%sp) # create some temp regs | ||
10838 | mov.l %d3, -(%sp) | ||
10839 | |||
10840 | mov.l FTEMP_HI(%a0), %d0 # load hi(mantissa) | ||
10841 | mov.l FTEMP_LO(%a0), %d1 # load lo(mantissa) | ||
10842 | |||
10843 | bfffo %d0{&0:&32}, %d2 # how many places to shift? | ||
10844 | beq.b norm_lo # hi(man) is all zeroes! | ||
10845 | |||
10846 | norm_hi: | ||
10847 | lsl.l %d2, %d0 # left shift hi(man) | ||
10848 | bfextu %d1{&0:%d2}, %d3 # extract lo bits | ||
10849 | |||
10850 | or.l %d3, %d0 # create hi(man) | ||
10851 | lsl.l %d2, %d1 # create lo(man) | ||
10852 | |||
10853 | mov.l %d0, FTEMP_HI(%a0) # store new hi(man) | ||
10854 | mov.l %d1, FTEMP_LO(%a0) # store new lo(man) | ||
10855 | |||
10856 | mov.l %d2, %d0 # return shift amount | ||
10857 | |||
10858 | mov.l (%sp)+, %d3 # restore temp regs | ||
10859 | mov.l (%sp)+, %d2 | ||
10860 | |||
10861 | rts | ||
10862 | |||
10863 | norm_lo: | ||
10864 | bfffo %d1{&0:&32}, %d2 # how many places to shift? | ||
10865 | lsl.l %d2, %d1 # shift lo(man) | ||
10866 | add.l &32, %d2 # add 32 to shft amount | ||
10867 | |||
10868 | mov.l %d1, FTEMP_HI(%a0) # store hi(man) | ||
10869 | clr.l FTEMP_LO(%a0) # lo(man) is now zero | ||
10870 | |||
10871 | mov.l %d2, %d0 # return shift amount | ||
10872 | |||
10873 | mov.l (%sp)+, %d3 # restore temp regs | ||
10874 | mov.l (%sp)+, %d2 | ||
10875 | |||
10876 | rts | ||
10877 | |||
10878 | ######################################################################### | ||
10879 | # unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO # | ||
10880 | # - returns corresponding optype tag # | ||
10881 | # # | ||
10882 | # XDEF **************************************************************** # | ||
10883 | # unnorm_fix() # | ||
10884 | # # | ||
10885 | # XREF **************************************************************** # | ||
10886 | # norm() - normalize the mantissa # | ||
10887 | # # | ||
10888 | # INPUT *************************************************************** # | ||
10889 | # a0 = pointer to unnormalized extended precision number # | ||
10890 | # # | ||
10891 | # OUTPUT ************************************************************** # | ||
10892 | # d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO # | ||
10893 | # a0 = input operand has been converted to a norm, denorm, or # | ||
10894 | # zero; both the exponent and mantissa are changed. # | ||
10895 | # # | ||
10896 | ######################################################################### | ||
10897 | |||
10898 | global unnorm_fix | ||
10899 | unnorm_fix: | ||
10900 | bfffo FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed? | ||
10901 | bne.b unnorm_shift # hi(man) is not all zeroes | ||
10902 | |||
10903 | # | ||
10904 | # hi(man) is all zeroes so see if any bits in lo(man) are set | ||
10905 | # | ||
10906 | unnorm_chk_lo: | ||
10907 | bfffo FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero? | ||
10908 | beq.w unnorm_zero # yes | ||
10909 | |||
10910 | add.w &32, %d0 # no; fix shift distance | ||
10911 | |||
10912 | # | ||
10913 | # d0 = # shifts needed for complete normalization | ||
10914 | # | ||
10915 | unnorm_shift: | ||
10916 | clr.l %d1 # clear top word | ||
10917 | mov.w FTEMP_EX(%a0), %d1 # extract exponent | ||
10918 | and.w &0x7fff, %d1 # strip off sgn | ||
10919 | |||
10920 | cmp.w %d0, %d1 # will denorm push exp < 0? | ||
10921 | bgt.b unnorm_nrm_zero # yes; denorm only until exp = 0 | ||
10922 | |||
10923 | # | ||
10924 | # exponent would not go < 0. therefore, number stays normalized | ||
10925 | # | ||
10926 | sub.w %d0, %d1 # shift exponent value | ||
10927 | mov.w FTEMP_EX(%a0), %d0 # load old exponent | ||
10928 | and.w &0x8000, %d0 # save old sign | ||
10929 | or.w %d0, %d1 # {sgn,new exp} | ||
10930 | mov.w %d1, FTEMP_EX(%a0) # insert new exponent | ||
10931 | |||
10932 | bsr.l norm # normalize UNNORM | ||
10933 | |||
10934 | mov.b &NORM, %d0 # return new optype tag | ||
10935 | rts | ||
10936 | |||
10937 | # | ||
10938 | # exponent would go < 0, so only denormalize until exp = 0 | ||
10939 | # | ||
10940 | unnorm_nrm_zero: | ||
10941 | cmp.b %d1, &32 # is exp <= 32? | ||
10942 | bgt.b unnorm_nrm_zero_lrg # no; go handle large exponent | ||
10943 | |||
10944 | bfextu FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man) | ||
10945 | mov.l %d0, FTEMP_HI(%a0) # save new hi(man) | ||
10946 | |||
10947 | mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man) | ||
10948 | lsl.l %d1, %d0 # extract new lo(man) | ||
10949 | mov.l %d0, FTEMP_LO(%a0) # save new lo(man) | ||
10950 | |||
10951 | and.w &0x8000, FTEMP_EX(%a0) # set exp = 0 | ||
10952 | |||
10953 | mov.b &DENORM, %d0 # return new optype tag | ||
10954 | rts | ||
10955 | |||
10956 | # | ||
10957 | # only mantissa bits set are in lo(man) | ||
10958 | # | ||
10959 | unnorm_nrm_zero_lrg: | ||
10960 | sub.w &32, %d1 # adjust shft amt by 32 | ||
10961 | |||
10962 | mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man) | ||
10963 | lsl.l %d1, %d0 # left shift lo(man) | ||
10964 | |||
10965 | mov.l %d0, FTEMP_HI(%a0) # store new hi(man) | ||
10966 | clr.l FTEMP_LO(%a0) # lo(man) = 0 | ||
10967 | |||
10968 | and.w &0x8000, FTEMP_EX(%a0) # set exp = 0 | ||
10969 | |||
10970 | mov.b &DENORM, %d0 # return new optype tag | ||
10971 | rts | ||
10972 | |||
10973 | # | ||
10974 | # whole mantissa is zero so this UNNORM is actually a zero | ||
10975 | # | ||
10976 | unnorm_zero: | ||
10977 | and.w &0x8000, FTEMP_EX(%a0) # force exponent to zero | ||
10978 | |||
10979 | mov.b &ZERO, %d0 # fix optype tag | ||
10980 | rts | ||