aboutsummaryrefslogtreecommitdiffstats
path: root/arch/m68k/ifpsp060/src/fplsp.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/m68k/ifpsp060/src/fplsp.S')
-rw-r--r--arch/m68k/ifpsp060/src/fplsp.S10980
1 files changed, 10980 insertions, 0 deletions
diff --git a/arch/m68k/ifpsp060/src/fplsp.S b/arch/m68k/ifpsp060/src/fplsp.S
new file mode 100644
index 000000000000..fdb79b927ef1
--- /dev/null
+++ b/arch/m68k/ifpsp060/src/fplsp.S
@@ -0,0 +1,10980 @@
1~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
3M68000 Hi-Performance Microprocessor Division
4M68060 Software Package
5Production Release P1.00 -- October 10, 1994
6
7M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved.
8
9THE SOFTWARE is provided on an "AS IS" basis and without warranty.
10To the maximum extent permitted by applicable law,
11MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
12INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
13and any warranty against infringement with regard to the SOFTWARE
14(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
15
16To the maximum extent permitted by applicable law,
17IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
18(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
19BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
20ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
21Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
22
23You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
24so long as this entire notice is retained without alteration in any modified and/or
25redistributed versions, and that such modified versions are clearly identified as such.
26No licenses are granted by implication, estoppel or otherwise under any patents
27or trademarks of Motorola, Inc.
28~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
29#
30# lfptop.s:
31# This file is appended to the top of the 060ILSP package
32# and contains the entry points into the package. The user, in
33# effect, branches to one of the branch table entries located here.
34#
35
36 bra.l _facoss_
37 short 0x0000
38 bra.l _facosd_
39 short 0x0000
40 bra.l _facosx_
41 short 0x0000
42
43 bra.l _fasins_
44 short 0x0000
45 bra.l _fasind_
46 short 0x0000
47 bra.l _fasinx_
48 short 0x0000
49
50 bra.l _fatans_
51 short 0x0000
52 bra.l _fatand_
53 short 0x0000
54 bra.l _fatanx_
55 short 0x0000
56
57 bra.l _fatanhs_
58 short 0x0000
59 bra.l _fatanhd_
60 short 0x0000
61 bra.l _fatanhx_
62 short 0x0000
63
64 bra.l _fcoss_
65 short 0x0000
66 bra.l _fcosd_
67 short 0x0000
68 bra.l _fcosx_
69 short 0x0000
70
71 bra.l _fcoshs_
72 short 0x0000
73 bra.l _fcoshd_
74 short 0x0000
75 bra.l _fcoshx_
76 short 0x0000
77
78 bra.l _fetoxs_
79 short 0x0000
80 bra.l _fetoxd_
81 short 0x0000
82 bra.l _fetoxx_
83 short 0x0000
84
85 bra.l _fetoxm1s_
86 short 0x0000
87 bra.l _fetoxm1d_
88 short 0x0000
89 bra.l _fetoxm1x_
90 short 0x0000
91
92 bra.l _fgetexps_
93 short 0x0000
94 bra.l _fgetexpd_
95 short 0x0000
96 bra.l _fgetexpx_
97 short 0x0000
98
99 bra.l _fgetmans_
100 short 0x0000
101 bra.l _fgetmand_
102 short 0x0000
103 bra.l _fgetmanx_
104 short 0x0000
105
106 bra.l _flog10s_
107 short 0x0000
108 bra.l _flog10d_
109 short 0x0000
110 bra.l _flog10x_
111 short 0x0000
112
113 bra.l _flog2s_
114 short 0x0000
115 bra.l _flog2d_
116 short 0x0000
117 bra.l _flog2x_
118 short 0x0000
119
120 bra.l _flogns_
121 short 0x0000
122 bra.l _flognd_
123 short 0x0000
124 bra.l _flognx_
125 short 0x0000
126
127 bra.l _flognp1s_
128 short 0x0000
129 bra.l _flognp1d_
130 short 0x0000
131 bra.l _flognp1x_
132 short 0x0000
133
134 bra.l _fmods_
135 short 0x0000
136 bra.l _fmodd_
137 short 0x0000
138 bra.l _fmodx_
139 short 0x0000
140
141 bra.l _frems_
142 short 0x0000
143 bra.l _fremd_
144 short 0x0000
145 bra.l _fremx_
146 short 0x0000
147
148 bra.l _fscales_
149 short 0x0000
150 bra.l _fscaled_
151 short 0x0000
152 bra.l _fscalex_
153 short 0x0000
154
155 bra.l _fsins_
156 short 0x0000
157 bra.l _fsind_
158 short 0x0000
159 bra.l _fsinx_
160 short 0x0000
161
162 bra.l _fsincoss_
163 short 0x0000
164 bra.l _fsincosd_
165 short 0x0000
166 bra.l _fsincosx_
167 short 0x0000
168
169 bra.l _fsinhs_
170 short 0x0000
171 bra.l _fsinhd_
172 short 0x0000
173 bra.l _fsinhx_
174 short 0x0000
175
176 bra.l _ftans_
177 short 0x0000
178 bra.l _ftand_
179 short 0x0000
180 bra.l _ftanx_
181 short 0x0000
182
183 bra.l _ftanhs_
184 short 0x0000
185 bra.l _ftanhd_
186 short 0x0000
187 bra.l _ftanhx_
188 short 0x0000
189
190 bra.l _ftentoxs_
191 short 0x0000
192 bra.l _ftentoxd_
193 short 0x0000
194 bra.l _ftentoxx_
195 short 0x0000
196
197 bra.l _ftwotoxs_
198 short 0x0000
199 bra.l _ftwotoxd_
200 short 0x0000
201 bra.l _ftwotoxx_
202 short 0x0000
203
204 bra.l _fabss_
205 short 0x0000
206 bra.l _fabsd_
207 short 0x0000
208 bra.l _fabsx_
209 short 0x0000
210
211 bra.l _fadds_
212 short 0x0000
213 bra.l _faddd_
214 short 0x0000
215 bra.l _faddx_
216 short 0x0000
217
218 bra.l _fdivs_
219 short 0x0000
220 bra.l _fdivd_
221 short 0x0000
222 bra.l _fdivx_
223 short 0x0000
224
225 bra.l _fints_
226 short 0x0000
227 bra.l _fintd_
228 short 0x0000
229 bra.l _fintx_
230 short 0x0000
231
232 bra.l _fintrzs_
233 short 0x0000
234 bra.l _fintrzd_
235 short 0x0000
236 bra.l _fintrzx_
237 short 0x0000
238
239 bra.l _fmuls_
240 short 0x0000
241 bra.l _fmuld_
242 short 0x0000
243 bra.l _fmulx_
244 short 0x0000
245
246 bra.l _fnegs_
247 short 0x0000
248 bra.l _fnegd_
249 short 0x0000
250 bra.l _fnegx_
251 short 0x0000
252
253 bra.l _fsqrts_
254 short 0x0000
255 bra.l _fsqrtd_
256 short 0x0000
257 bra.l _fsqrtx_
258 short 0x0000
259
260 bra.l _fsubs_
261 short 0x0000
262 bra.l _fsubd_
263 short 0x0000
264 bra.l _fsubx_
265 short 0x0000
266
267# leave room for future possible additions
268 align 0x400
269
270#
271# This file contains a set of define statements for constants
272# in order to promote readability within the corecode itself.
273#
274
275set LOCAL_SIZE, 192 # stack frame size(bytes)
276set LV, -LOCAL_SIZE # stack offset
277
278set EXC_SR, 0x4 # stack status register
279set EXC_PC, 0x6 # stack pc
280set EXC_VOFF, 0xa # stacked vector offset
281set EXC_EA, 0xc # stacked <ea>
282
283set EXC_FP, 0x0 # frame pointer
284
285set EXC_AREGS, -68 # offset of all address regs
286set EXC_DREGS, -100 # offset of all data regs
287set EXC_FPREGS, -36 # offset of all fp regs
288
289set EXC_A7, EXC_AREGS+(7*4) # offset of saved a7
290set OLD_A7, EXC_AREGS+(6*4) # extra copy of saved a7
291set EXC_A6, EXC_AREGS+(6*4) # offset of saved a6
292set EXC_A5, EXC_AREGS+(5*4)
293set EXC_A4, EXC_AREGS+(4*4)
294set EXC_A3, EXC_AREGS+(3*4)
295set EXC_A2, EXC_AREGS+(2*4)
296set EXC_A1, EXC_AREGS+(1*4)
297set EXC_A0, EXC_AREGS+(0*4)
298set EXC_D7, EXC_DREGS+(7*4)
299set EXC_D6, EXC_DREGS+(6*4)
300set EXC_D5, EXC_DREGS+(5*4)
301set EXC_D4, EXC_DREGS+(4*4)
302set EXC_D3, EXC_DREGS+(3*4)
303set EXC_D2, EXC_DREGS+(2*4)
304set EXC_D1, EXC_DREGS+(1*4)
305set EXC_D0, EXC_DREGS+(0*4)
306
307set EXC_FP0, EXC_FPREGS+(0*12) # offset of saved fp0
308set EXC_FP1, EXC_FPREGS+(1*12) # offset of saved fp1
309set EXC_FP2, EXC_FPREGS+(2*12) # offset of saved fp2 (not used)
310
311set FP_SCR1, LV+80 # fp scratch 1
312set FP_SCR1_EX, FP_SCR1+0
313set FP_SCR1_SGN, FP_SCR1+2
314set FP_SCR1_HI, FP_SCR1+4
315set FP_SCR1_LO, FP_SCR1+8
316
317set FP_SCR0, LV+68 # fp scratch 0
318set FP_SCR0_EX, FP_SCR0+0
319set FP_SCR0_SGN, FP_SCR0+2
320set FP_SCR0_HI, FP_SCR0+4
321set FP_SCR0_LO, FP_SCR0+8
322
323set FP_DST, LV+56 # fp destination operand
324set FP_DST_EX, FP_DST+0
325set FP_DST_SGN, FP_DST+2
326set FP_DST_HI, FP_DST+4
327set FP_DST_LO, FP_DST+8
328
329set FP_SRC, LV+44 # fp source operand
330set FP_SRC_EX, FP_SRC+0
331set FP_SRC_SGN, FP_SRC+2
332set FP_SRC_HI, FP_SRC+4
333set FP_SRC_LO, FP_SRC+8
334
335set USER_FPIAR, LV+40 # FP instr address register
336
337set USER_FPSR, LV+36 # FP status register
338set FPSR_CC, USER_FPSR+0 # FPSR condition codes
339set FPSR_QBYTE, USER_FPSR+1 # FPSR qoutient byte
340set FPSR_EXCEPT, USER_FPSR+2 # FPSR exception status byte
341set FPSR_AEXCEPT, USER_FPSR+3 # FPSR accrued exception byte
342
343set USER_FPCR, LV+32 # FP control register
344set FPCR_ENABLE, USER_FPCR+2 # FPCR exception enable
345set FPCR_MODE, USER_FPCR+3 # FPCR rounding mode control
346
347set L_SCR3, LV+28 # integer scratch 3
348set L_SCR2, LV+24 # integer scratch 2
349set L_SCR1, LV+20 # integer scratch 1
350
351set STORE_FLG, LV+19 # flag: operand store (ie. not fcmp/ftst)
352
353set EXC_TEMP2, LV+24 # temporary space
354set EXC_TEMP, LV+16 # temporary space
355
356set DTAG, LV+15 # destination operand type
357set STAG, LV+14 # source operand type
358
359set SPCOND_FLG, LV+10 # flag: special case (see below)
360
361set EXC_CC, LV+8 # saved condition codes
362set EXC_EXTWPTR, LV+4 # saved current PC (active)
363set EXC_EXTWORD, LV+2 # saved extension word
364set EXC_CMDREG, LV+2 # saved extension word
365set EXC_OPWORD, LV+0 # saved operation word
366
367################################
368
369# Helpful macros
370
371set FTEMP, 0 # offsets within an
372set FTEMP_EX, 0 # extended precision
373set FTEMP_SGN, 2 # value saved in memory.
374set FTEMP_HI, 4
375set FTEMP_LO, 8
376set FTEMP_GRS, 12
377
378set LOCAL, 0 # offsets within an
379set LOCAL_EX, 0 # extended precision
380set LOCAL_SGN, 2 # value saved in memory.
381set LOCAL_HI, 4
382set LOCAL_LO, 8
383set LOCAL_GRS, 12
384
385set DST, 0 # offsets within an
386set DST_EX, 0 # extended precision
387set DST_HI, 4 # value saved in memory.
388set DST_LO, 8
389
390set SRC, 0 # offsets within an
391set SRC_EX, 0 # extended precision
392set SRC_HI, 4 # value saved in memory.
393set SRC_LO, 8
394
395set SGL_LO, 0x3f81 # min sgl prec exponent
396set SGL_HI, 0x407e # max sgl prec exponent
397set DBL_LO, 0x3c01 # min dbl prec exponent
398set DBL_HI, 0x43fe # max dbl prec exponent
399set EXT_LO, 0x0 # min ext prec exponent
400set EXT_HI, 0x7ffe # max ext prec exponent
401
402set EXT_BIAS, 0x3fff # extended precision bias
403set SGL_BIAS, 0x007f # single precision bias
404set DBL_BIAS, 0x03ff # double precision bias
405
406set NORM, 0x00 # operand type for STAG/DTAG
407set ZERO, 0x01 # operand type for STAG/DTAG
408set INF, 0x02 # operand type for STAG/DTAG
409set QNAN, 0x03 # operand type for STAG/DTAG
410set DENORM, 0x04 # operand type for STAG/DTAG
411set SNAN, 0x05 # operand type for STAG/DTAG
412set UNNORM, 0x06 # operand type for STAG/DTAG
413
414##################
415# FPSR/FPCR bits #
416##################
417set neg_bit, 0x3 # negative result
418set z_bit, 0x2 # zero result
419set inf_bit, 0x1 # infinite result
420set nan_bit, 0x0 # NAN result
421
422set q_sn_bit, 0x7 # sign bit of quotient byte
423
424set bsun_bit, 7 # branch on unordered
425set snan_bit, 6 # signalling NAN
426set operr_bit, 5 # operand error
427set ovfl_bit, 4 # overflow
428set unfl_bit, 3 # underflow
429set dz_bit, 2 # divide by zero
430set inex2_bit, 1 # inexact result 2
431set inex1_bit, 0 # inexact result 1
432
433set aiop_bit, 7 # accrued inexact operation bit
434set aovfl_bit, 6 # accrued overflow bit
435set aunfl_bit, 5 # accrued underflow bit
436set adz_bit, 4 # accrued dz bit
437set ainex_bit, 3 # accrued inexact bit
438
439#############################
440# FPSR individual bit masks #
441#############################
442set neg_mask, 0x08000000 # negative bit mask (lw)
443set inf_mask, 0x02000000 # infinity bit mask (lw)
444set z_mask, 0x04000000 # zero bit mask (lw)
445set nan_mask, 0x01000000 # nan bit mask (lw)
446
447set neg_bmask, 0x08 # negative bit mask (byte)
448set inf_bmask, 0x02 # infinity bit mask (byte)
449set z_bmask, 0x04 # zero bit mask (byte)
450set nan_bmask, 0x01 # nan bit mask (byte)
451
452set bsun_mask, 0x00008000 # bsun exception mask
453set snan_mask, 0x00004000 # snan exception mask
454set operr_mask, 0x00002000 # operr exception mask
455set ovfl_mask, 0x00001000 # overflow exception mask
456set unfl_mask, 0x00000800 # underflow exception mask
457set dz_mask, 0x00000400 # dz exception mask
458set inex2_mask, 0x00000200 # inex2 exception mask
459set inex1_mask, 0x00000100 # inex1 exception mask
460
461set aiop_mask, 0x00000080 # accrued illegal operation
462set aovfl_mask, 0x00000040 # accrued overflow
463set aunfl_mask, 0x00000020 # accrued underflow
464set adz_mask, 0x00000010 # accrued divide by zero
465set ainex_mask, 0x00000008 # accrued inexact
466
467######################################
468# FPSR combinations used in the FPSP #
469######################################
470set dzinf_mask, inf_mask+dz_mask+adz_mask
471set opnan_mask, nan_mask+operr_mask+aiop_mask
472set nzi_mask, 0x01ffffff #clears N, Z, and I
473set unfinx_mask, unfl_mask+inex2_mask+aunfl_mask+ainex_mask
474set unf2inx_mask, unfl_mask+inex2_mask+ainex_mask
475set ovfinx_mask, ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
476set inx1a_mask, inex1_mask+ainex_mask
477set inx2a_mask, inex2_mask+ainex_mask
478set snaniop_mask, nan_mask+snan_mask+aiop_mask
479set snaniop2_mask, snan_mask+aiop_mask
480set naniop_mask, nan_mask+aiop_mask
481set neginf_mask, neg_mask+inf_mask
482set infaiop_mask, inf_mask+aiop_mask
483set negz_mask, neg_mask+z_mask
484set opaop_mask, operr_mask+aiop_mask
485set unfl_inx_mask, unfl_mask+aunfl_mask+ainex_mask
486set ovfl_inx_mask, ovfl_mask+aovfl_mask+ainex_mask
487
488#########
489# misc. #
490#########
491set rnd_stky_bit, 29 # stky bit pos in longword
492
493set sign_bit, 0x7 # sign bit
494set signan_bit, 0x6 # signalling nan bit
495
496set sgl_thresh, 0x3f81 # minimum sgl exponent
497set dbl_thresh, 0x3c01 # minimum dbl exponent
498
499set x_mode, 0x0 # extended precision
500set s_mode, 0x4 # single precision
501set d_mode, 0x8 # double precision
502
503set rn_mode, 0x0 # round-to-nearest
504set rz_mode, 0x1 # round-to-zero
505set rm_mode, 0x2 # round-tp-minus-infinity
506set rp_mode, 0x3 # round-to-plus-infinity
507
508set mantissalen, 64 # length of mantissa in bits
509
510set BYTE, 1 # len(byte) == 1 byte
511set WORD, 2 # len(word) == 2 bytes
512set LONG, 4 # len(longword) == 2 bytes
513
514set BSUN_VEC, 0xc0 # bsun vector offset
515set INEX_VEC, 0xc4 # inexact vector offset
516set DZ_VEC, 0xc8 # dz vector offset
517set UNFL_VEC, 0xcc # unfl vector offset
518set OPERR_VEC, 0xd0 # operr vector offset
519set OVFL_VEC, 0xd4 # ovfl vector offset
520set SNAN_VEC, 0xd8 # snan vector offset
521
522###########################
523# SPecial CONDition FLaGs #
524###########################
525set ftrapcc_flg, 0x01 # flag bit: ftrapcc exception
526set fbsun_flg, 0x02 # flag bit: bsun exception
527set mia7_flg, 0x04 # flag bit: (a7)+ <ea>
528set mda7_flg, 0x08 # flag bit: -(a7) <ea>
529set fmovm_flg, 0x40 # flag bit: fmovm instruction
530set immed_flg, 0x80 # flag bit: &<data> <ea>
531
532set ftrapcc_bit, 0x0
533set fbsun_bit, 0x1
534set mia7_bit, 0x2
535set mda7_bit, 0x3
536set immed_bit, 0x7
537
538##################################
539# TRANSCENDENTAL "LAST-OP" FLAGS #
540##################################
541set FMUL_OP, 0x0 # fmul instr performed last
542set FDIV_OP, 0x1 # fdiv performed last
543set FADD_OP, 0x2 # fadd performed last
544set FMOV_OP, 0x3 # fmov performed last
545
546#############
547# CONSTANTS #
548#############
549T1: long 0x40C62D38,0xD3D64634 # 16381 LOG2 LEAD
550T2: long 0x3D6F90AE,0xB1E75CC7 # 16381 LOG2 TRAIL
551
552PI: long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000
553PIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
554
555TWOBYPI:
556 long 0x3FE45F30,0x6DC9C883
557
558#########################################################################
559# MONADIC TEMPLATE #
560#########################################################################
561 global _fsins_
562_fsins_:
563 link %a6,&-LOCAL_SIZE
564
565 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
566 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
567 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
568
569 fmov.l &0x0,%fpcr # zero FPCR
570
571#
572# copy, convert, and tag input argument
573#
574 fmov.s 0x8(%a6),%fp0 # load sgl input
575 fmov.x %fp0,FP_SRC(%a6)
576 lea FP_SRC(%a6),%a0
577 bsr.l tag # fetch operand type
578 mov.b %d0,STAG(%a6)
579 mov.b %d0,%d1
580
581 andi.l &0x00ff00ff,USER_FPSR(%a6)
582
583 clr.l %d0
584 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
585
586 tst.b %d1
587 bne.b _L0_2s
588 bsr.l ssin # operand is a NORM
589 bra.b _L0_6s
590_L0_2s:
591 cmpi.b %d1,&ZERO # is operand a ZERO?
592 bne.b _L0_3s # no
593 bsr.l src_zero # yes
594 bra.b _L0_6s
595_L0_3s:
596 cmpi.b %d1,&INF # is operand an INF?
597 bne.b _L0_4s # no
598 bsr.l t_operr # yes
599 bra.b _L0_6s
600_L0_4s:
601 cmpi.b %d1,&QNAN # is operand a QNAN?
602 bne.b _L0_5s # no
603 bsr.l src_qnan # yes
604 bra.b _L0_6s
605_L0_5s:
606 bsr.l ssind # operand is a DENORM
607_L0_6s:
608
609#
610# Result is now in FP0
611#
612 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
613 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
614 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
615 unlk %a6
616 rts
617
618 global _fsind_
619_fsind_:
620 link %a6,&-LOCAL_SIZE
621
622 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
623 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
624 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
625
626 fmov.l &0x0,%fpcr # zero FPCR
627
628#
629# copy, convert, and tag input argument
630#
631 fmov.d 0x8(%a6),%fp0 # load dbl input
632 fmov.x %fp0,FP_SRC(%a6)
633 lea FP_SRC(%a6),%a0
634 bsr.l tag # fetch operand type
635 mov.b %d0,STAG(%a6)
636 mov.b %d0,%d1
637
638 andi.l &0x00ff00ff,USER_FPSR(%a6)
639
640 clr.l %d0
641 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
642
643 mov.b %d1,STAG(%a6)
644 tst.b %d1
645 bne.b _L0_2d
646 bsr.l ssin # operand is a NORM
647 bra.b _L0_6d
648_L0_2d:
649 cmpi.b %d1,&ZERO # is operand a ZERO?
650 bne.b _L0_3d # no
651 bsr.l src_zero # yes
652 bra.b _L0_6d
653_L0_3d:
654 cmpi.b %d1,&INF # is operand an INF?
655 bne.b _L0_4d # no
656 bsr.l t_operr # yes
657 bra.b _L0_6d
658_L0_4d:
659 cmpi.b %d1,&QNAN # is operand a QNAN?
660 bne.b _L0_5d # no
661 bsr.l src_qnan # yes
662 bra.b _L0_6d
663_L0_5d:
664 bsr.l ssind # operand is a DENORM
665_L0_6d:
666
667#
668# Result is now in FP0
669#
670 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
671 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
672 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
673 unlk %a6
674 rts
675
676 global _fsinx_
677_fsinx_:
678 link %a6,&-LOCAL_SIZE
679
680 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
681 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
682 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
683
684 fmov.l &0x0,%fpcr # zero FPCR
685
686#
687# copy, convert, and tag input argument
688#
689 lea FP_SRC(%a6),%a0
690 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
691 mov.l 0x8+0x4(%a6),0x4(%a0)
692 mov.l 0x8+0x8(%a6),0x8(%a0)
693 bsr.l tag # fetch operand type
694 mov.b %d0,STAG(%a6)
695 mov.b %d0,%d1
696
697 andi.l &0x00ff00ff,USER_FPSR(%a6)
698
699 clr.l %d0
700 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
701
702 tst.b %d1
703 bne.b _L0_2x
704 bsr.l ssin # operand is a NORM
705 bra.b _L0_6x
706_L0_2x:
707 cmpi.b %d1,&ZERO # is operand a ZERO?
708 bne.b _L0_3x # no
709 bsr.l src_zero # yes
710 bra.b _L0_6x
711_L0_3x:
712 cmpi.b %d1,&INF # is operand an INF?
713 bne.b _L0_4x # no
714 bsr.l t_operr # yes
715 bra.b _L0_6x
716_L0_4x:
717 cmpi.b %d1,&QNAN # is operand a QNAN?
718 bne.b _L0_5x # no
719 bsr.l src_qnan # yes
720 bra.b _L0_6x
721_L0_5x:
722 bsr.l ssind # operand is a DENORM
723_L0_6x:
724
725#
726# Result is now in FP0
727#
728 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
729 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
730 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
731 unlk %a6
732 rts
733
734
735#########################################################################
736# MONADIC TEMPLATE #
737#########################################################################
738 global _fcoss_
739_fcoss_:
740 link %a6,&-LOCAL_SIZE
741
742 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
743 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
744 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
745
746 fmov.l &0x0,%fpcr # zero FPCR
747
748#
749# copy, convert, and tag input argument
750#
751 fmov.s 0x8(%a6),%fp0 # load sgl input
752 fmov.x %fp0,FP_SRC(%a6)
753 lea FP_SRC(%a6),%a0
754 bsr.l tag # fetch operand type
755 mov.b %d0,STAG(%a6)
756 mov.b %d0,%d1
757
758 andi.l &0x00ff00ff,USER_FPSR(%a6)
759
760 clr.l %d0
761 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
762
763 tst.b %d1
764 bne.b _L1_2s
765 bsr.l scos # operand is a NORM
766 bra.b _L1_6s
767_L1_2s:
768 cmpi.b %d1,&ZERO # is operand a ZERO?
769 bne.b _L1_3s # no
770 bsr.l ld_pone # yes
771 bra.b _L1_6s
772_L1_3s:
773 cmpi.b %d1,&INF # is operand an INF?
774 bne.b _L1_4s # no
775 bsr.l t_operr # yes
776 bra.b _L1_6s
777_L1_4s:
778 cmpi.b %d1,&QNAN # is operand a QNAN?
779 bne.b _L1_5s # no
780 bsr.l src_qnan # yes
781 bra.b _L1_6s
782_L1_5s:
783 bsr.l scosd # operand is a DENORM
784_L1_6s:
785
786#
787# Result is now in FP0
788#
789 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
790 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
791 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
792 unlk %a6
793 rts
794
795 global _fcosd_
796_fcosd_:
797 link %a6,&-LOCAL_SIZE
798
799 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
800 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
801 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
802
803 fmov.l &0x0,%fpcr # zero FPCR
804
805#
806# copy, convert, and tag input argument
807#
808 fmov.d 0x8(%a6),%fp0 # load dbl input
809 fmov.x %fp0,FP_SRC(%a6)
810 lea FP_SRC(%a6),%a0
811 bsr.l tag # fetch operand type
812 mov.b %d0,STAG(%a6)
813 mov.b %d0,%d1
814
815 andi.l &0x00ff00ff,USER_FPSR(%a6)
816
817 clr.l %d0
818 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
819
820 mov.b %d1,STAG(%a6)
821 tst.b %d1
822 bne.b _L1_2d
823 bsr.l scos # operand is a NORM
824 bra.b _L1_6d
825_L1_2d:
826 cmpi.b %d1,&ZERO # is operand a ZERO?
827 bne.b _L1_3d # no
828 bsr.l ld_pone # yes
829 bra.b _L1_6d
830_L1_3d:
831 cmpi.b %d1,&INF # is operand an INF?
832 bne.b _L1_4d # no
833 bsr.l t_operr # yes
834 bra.b _L1_6d
835_L1_4d:
836 cmpi.b %d1,&QNAN # is operand a QNAN?
837 bne.b _L1_5d # no
838 bsr.l src_qnan # yes
839 bra.b _L1_6d
840_L1_5d:
841 bsr.l scosd # operand is a DENORM
842_L1_6d:
843
844#
845# Result is now in FP0
846#
847 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
848 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
849 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
850 unlk %a6
851 rts
852
853 global _fcosx_
854_fcosx_:
855 link %a6,&-LOCAL_SIZE
856
857 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
858 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
859 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
860
861 fmov.l &0x0,%fpcr # zero FPCR
862
863#
864# copy, convert, and tag input argument
865#
866 lea FP_SRC(%a6),%a0
867 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
868 mov.l 0x8+0x4(%a6),0x4(%a0)
869 mov.l 0x8+0x8(%a6),0x8(%a0)
870 bsr.l tag # fetch operand type
871 mov.b %d0,STAG(%a6)
872 mov.b %d0,%d1
873
874 andi.l &0x00ff00ff,USER_FPSR(%a6)
875
876 clr.l %d0
877 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
878
879 tst.b %d1
880 bne.b _L1_2x
881 bsr.l scos # operand is a NORM
882 bra.b _L1_6x
883_L1_2x:
884 cmpi.b %d1,&ZERO # is operand a ZERO?
885 bne.b _L1_3x # no
886 bsr.l ld_pone # yes
887 bra.b _L1_6x
888_L1_3x:
889 cmpi.b %d1,&INF # is operand an INF?
890 bne.b _L1_4x # no
891 bsr.l t_operr # yes
892 bra.b _L1_6x
893_L1_4x:
894 cmpi.b %d1,&QNAN # is operand a QNAN?
895 bne.b _L1_5x # no
896 bsr.l src_qnan # yes
897 bra.b _L1_6x
898_L1_5x:
899 bsr.l scosd # operand is a DENORM
900_L1_6x:
901
902#
903# Result is now in FP0
904#
905 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
906 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
907 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
908 unlk %a6
909 rts
910
911
912#########################################################################
913# MONADIC TEMPLATE #
914#########################################################################
915 global _fsinhs_
916_fsinhs_:
917 link %a6,&-LOCAL_SIZE
918
919 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
920 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
921 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
922
923 fmov.l &0x0,%fpcr # zero FPCR
924
925#
926# copy, convert, and tag input argument
927#
928 fmov.s 0x8(%a6),%fp0 # load sgl input
929 fmov.x %fp0,FP_SRC(%a6)
930 lea FP_SRC(%a6),%a0
931 bsr.l tag # fetch operand type
932 mov.b %d0,STAG(%a6)
933 mov.b %d0,%d1
934
935 andi.l &0x00ff00ff,USER_FPSR(%a6)
936
937 clr.l %d0
938 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
939
940 tst.b %d1
941 bne.b _L2_2s
942 bsr.l ssinh # operand is a NORM
943 bra.b _L2_6s
944_L2_2s:
945 cmpi.b %d1,&ZERO # is operand a ZERO?
946 bne.b _L2_3s # no
947 bsr.l src_zero # yes
948 bra.b _L2_6s
949_L2_3s:
950 cmpi.b %d1,&INF # is operand an INF?
951 bne.b _L2_4s # no
952 bsr.l src_inf # yes
953 bra.b _L2_6s
954_L2_4s:
955 cmpi.b %d1,&QNAN # is operand a QNAN?
956 bne.b _L2_5s # no
957 bsr.l src_qnan # yes
958 bra.b _L2_6s
959_L2_5s:
960 bsr.l ssinhd # operand is a DENORM
961_L2_6s:
962
963#
964# Result is now in FP0
965#
966 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
967 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
968 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
969 unlk %a6
970 rts
971
972 global _fsinhd_
973_fsinhd_:
974 link %a6,&-LOCAL_SIZE
975
976 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
977 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
978 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
979
980 fmov.l &0x0,%fpcr # zero FPCR
981
982#
983# copy, convert, and tag input argument
984#
985 fmov.d 0x8(%a6),%fp0 # load dbl input
986 fmov.x %fp0,FP_SRC(%a6)
987 lea FP_SRC(%a6),%a0
988 bsr.l tag # fetch operand type
989 mov.b %d0,STAG(%a6)
990 mov.b %d0,%d1
991
992 andi.l &0x00ff00ff,USER_FPSR(%a6)
993
994 clr.l %d0
995 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
996
997 mov.b %d1,STAG(%a6)
998 tst.b %d1
999 bne.b _L2_2d
1000 bsr.l ssinh # operand is a NORM
1001 bra.b _L2_6d
1002_L2_2d:
1003 cmpi.b %d1,&ZERO # is operand a ZERO?
1004 bne.b _L2_3d # no
1005 bsr.l src_zero # yes
1006 bra.b _L2_6d
1007_L2_3d:
1008 cmpi.b %d1,&INF # is operand an INF?
1009 bne.b _L2_4d # no
1010 bsr.l src_inf # yes
1011 bra.b _L2_6d
1012_L2_4d:
1013 cmpi.b %d1,&QNAN # is operand a QNAN?
1014 bne.b _L2_5d # no
1015 bsr.l src_qnan # yes
1016 bra.b _L2_6d
1017_L2_5d:
1018 bsr.l ssinhd # operand is a DENORM
1019_L2_6d:
1020
1021#
1022# Result is now in FP0
1023#
1024 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1025 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1026 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1027 unlk %a6
1028 rts
1029
1030 global _fsinhx_
1031_fsinhx_:
1032 link %a6,&-LOCAL_SIZE
1033
1034 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1035 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1036 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1037
1038 fmov.l &0x0,%fpcr # zero FPCR
1039
1040#
1041# copy, convert, and tag input argument
1042#
1043 lea FP_SRC(%a6),%a0
1044 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
1045 mov.l 0x8+0x4(%a6),0x4(%a0)
1046 mov.l 0x8+0x8(%a6),0x8(%a0)
1047 bsr.l tag # fetch operand type
1048 mov.b %d0,STAG(%a6)
1049 mov.b %d0,%d1
1050
1051 andi.l &0x00ff00ff,USER_FPSR(%a6)
1052
1053 clr.l %d0
1054 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1055
1056 tst.b %d1
1057 bne.b _L2_2x
1058 bsr.l ssinh # operand is a NORM
1059 bra.b _L2_6x
1060_L2_2x:
1061 cmpi.b %d1,&ZERO # is operand a ZERO?
1062 bne.b _L2_3x # no
1063 bsr.l src_zero # yes
1064 bra.b _L2_6x
1065_L2_3x:
1066 cmpi.b %d1,&INF # is operand an INF?
1067 bne.b _L2_4x # no
1068 bsr.l src_inf # yes
1069 bra.b _L2_6x
1070_L2_4x:
1071 cmpi.b %d1,&QNAN # is operand a QNAN?
1072 bne.b _L2_5x # no
1073 bsr.l src_qnan # yes
1074 bra.b _L2_6x
1075_L2_5x:
1076 bsr.l ssinhd # operand is a DENORM
1077_L2_6x:
1078
1079#
1080# Result is now in FP0
1081#
1082 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1083 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1084 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1085 unlk %a6
1086 rts
1087
1088
1089#########################################################################
1090# MONADIC TEMPLATE #
1091#########################################################################
1092 global _flognp1s_
1093_flognp1s_:
1094 link %a6,&-LOCAL_SIZE
1095
1096 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1097 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1098 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1099
1100 fmov.l &0x0,%fpcr # zero FPCR
1101
1102#
1103# copy, convert, and tag input argument
1104#
1105 fmov.s 0x8(%a6),%fp0 # load sgl input
1106 fmov.x %fp0,FP_SRC(%a6)
1107 lea FP_SRC(%a6),%a0
1108 bsr.l tag # fetch operand type
1109 mov.b %d0,STAG(%a6)
1110 mov.b %d0,%d1
1111
1112 andi.l &0x00ff00ff,USER_FPSR(%a6)
1113
1114 clr.l %d0
1115 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1116
1117 tst.b %d1
1118 bne.b _L3_2s
1119 bsr.l slognp1 # operand is a NORM
1120 bra.b _L3_6s
1121_L3_2s:
1122 cmpi.b %d1,&ZERO # is operand a ZERO?
1123 bne.b _L3_3s # no
1124 bsr.l src_zero # yes
1125 bra.b _L3_6s
1126_L3_3s:
1127 cmpi.b %d1,&INF # is operand an INF?
1128 bne.b _L3_4s # no
1129 bsr.l sopr_inf # yes
1130 bra.b _L3_6s
1131_L3_4s:
1132 cmpi.b %d1,&QNAN # is operand a QNAN?
1133 bne.b _L3_5s # no
1134 bsr.l src_qnan # yes
1135 bra.b _L3_6s
1136_L3_5s:
1137 bsr.l slognp1d # operand is a DENORM
1138_L3_6s:
1139
1140#
1141# Result is now in FP0
1142#
1143 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1144 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1145 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1146 unlk %a6
1147 rts
1148
1149 global _flognp1d_
1150_flognp1d_:
1151 link %a6,&-LOCAL_SIZE
1152
1153 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1154 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1155 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1156
1157 fmov.l &0x0,%fpcr # zero FPCR
1158
1159#
1160# copy, convert, and tag input argument
1161#
1162 fmov.d 0x8(%a6),%fp0 # load dbl input
1163 fmov.x %fp0,FP_SRC(%a6)
1164 lea FP_SRC(%a6),%a0
1165 bsr.l tag # fetch operand type
1166 mov.b %d0,STAG(%a6)
1167 mov.b %d0,%d1
1168
1169 andi.l &0x00ff00ff,USER_FPSR(%a6)
1170
1171 clr.l %d0
1172 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1173
1174 mov.b %d1,STAG(%a6)
1175 tst.b %d1
1176 bne.b _L3_2d
1177 bsr.l slognp1 # operand is a NORM
1178 bra.b _L3_6d
1179_L3_2d:
1180 cmpi.b %d1,&ZERO # is operand a ZERO?
1181 bne.b _L3_3d # no
1182 bsr.l src_zero # yes
1183 bra.b _L3_6d
1184_L3_3d:
1185 cmpi.b %d1,&INF # is operand an INF?
1186 bne.b _L3_4d # no
1187 bsr.l sopr_inf # yes
1188 bra.b _L3_6d
1189_L3_4d:
1190 cmpi.b %d1,&QNAN # is operand a QNAN?
1191 bne.b _L3_5d # no
1192 bsr.l src_qnan # yes
1193 bra.b _L3_6d
1194_L3_5d:
1195 bsr.l slognp1d # operand is a DENORM
1196_L3_6d:
1197
1198#
1199# Result is now in FP0
1200#
1201 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1202 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1203 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1204 unlk %a6
1205 rts
1206
1207 global _flognp1x_
1208_flognp1x_:
1209 link %a6,&-LOCAL_SIZE
1210
1211 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1212 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1213 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1214
1215 fmov.l &0x0,%fpcr # zero FPCR
1216
1217#
1218# copy, convert, and tag input argument
1219#
1220 lea FP_SRC(%a6),%a0
1221 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
1222 mov.l 0x8+0x4(%a6),0x4(%a0)
1223 mov.l 0x8+0x8(%a6),0x8(%a0)
1224 bsr.l tag # fetch operand type
1225 mov.b %d0,STAG(%a6)
1226 mov.b %d0,%d1
1227
1228 andi.l &0x00ff00ff,USER_FPSR(%a6)
1229
1230 clr.l %d0
1231 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1232
1233 tst.b %d1
1234 bne.b _L3_2x
1235 bsr.l slognp1 # operand is a NORM
1236 bra.b _L3_6x
1237_L3_2x:
1238 cmpi.b %d1,&ZERO # is operand a ZERO?
1239 bne.b _L3_3x # no
1240 bsr.l src_zero # yes
1241 bra.b _L3_6x
1242_L3_3x:
1243 cmpi.b %d1,&INF # is operand an INF?
1244 bne.b _L3_4x # no
1245 bsr.l sopr_inf # yes
1246 bra.b _L3_6x
1247_L3_4x:
1248 cmpi.b %d1,&QNAN # is operand a QNAN?
1249 bne.b _L3_5x # no
1250 bsr.l src_qnan # yes
1251 bra.b _L3_6x
1252_L3_5x:
1253 bsr.l slognp1d # operand is a DENORM
1254_L3_6x:
1255
1256#
1257# Result is now in FP0
1258#
1259 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1260 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1261 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1262 unlk %a6
1263 rts
1264
1265
1266#########################################################################
1267# MONADIC TEMPLATE #
1268#########################################################################
1269 global _fetoxm1s_
1270_fetoxm1s_:
1271 link %a6,&-LOCAL_SIZE
1272
1273 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1274 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1275 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1276
1277 fmov.l &0x0,%fpcr # zero FPCR
1278
1279#
1280# copy, convert, and tag input argument
1281#
1282 fmov.s 0x8(%a6),%fp0 # load sgl input
1283 fmov.x %fp0,FP_SRC(%a6)
1284 lea FP_SRC(%a6),%a0
1285 bsr.l tag # fetch operand type
1286 mov.b %d0,STAG(%a6)
1287 mov.b %d0,%d1
1288
1289 andi.l &0x00ff00ff,USER_FPSR(%a6)
1290
1291 clr.l %d0
1292 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1293
1294 tst.b %d1
1295 bne.b _L4_2s
1296 bsr.l setoxm1 # operand is a NORM
1297 bra.b _L4_6s
1298_L4_2s:
1299 cmpi.b %d1,&ZERO # is operand a ZERO?
1300 bne.b _L4_3s # no
1301 bsr.l src_zero # yes
1302 bra.b _L4_6s
1303_L4_3s:
1304 cmpi.b %d1,&INF # is operand an INF?
1305 bne.b _L4_4s # no
1306 bsr.l setoxm1i # yes
1307 bra.b _L4_6s
1308_L4_4s:
1309 cmpi.b %d1,&QNAN # is operand a QNAN?
1310 bne.b _L4_5s # no
1311 bsr.l src_qnan # yes
1312 bra.b _L4_6s
1313_L4_5s:
1314 bsr.l setoxm1d # operand is a DENORM
1315_L4_6s:
1316
1317#
1318# Result is now in FP0
1319#
1320 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1321 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1322 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1323 unlk %a6
1324 rts
1325
1326 global _fetoxm1d_
1327_fetoxm1d_:
1328 link %a6,&-LOCAL_SIZE
1329
1330 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1331 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1332 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1333
1334 fmov.l &0x0,%fpcr # zero FPCR
1335
1336#
1337# copy, convert, and tag input argument
1338#
1339 fmov.d 0x8(%a6),%fp0 # load dbl input
1340 fmov.x %fp0,FP_SRC(%a6)
1341 lea FP_SRC(%a6),%a0
1342 bsr.l tag # fetch operand type
1343 mov.b %d0,STAG(%a6)
1344 mov.b %d0,%d1
1345
1346 andi.l &0x00ff00ff,USER_FPSR(%a6)
1347
1348 clr.l %d0
1349 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1350
1351 mov.b %d1,STAG(%a6)
1352 tst.b %d1
1353 bne.b _L4_2d
1354 bsr.l setoxm1 # operand is a NORM
1355 bra.b _L4_6d
1356_L4_2d:
1357 cmpi.b %d1,&ZERO # is operand a ZERO?
1358 bne.b _L4_3d # no
1359 bsr.l src_zero # yes
1360 bra.b _L4_6d
1361_L4_3d:
1362 cmpi.b %d1,&INF # is operand an INF?
1363 bne.b _L4_4d # no
1364 bsr.l setoxm1i # yes
1365 bra.b _L4_6d
1366_L4_4d:
1367 cmpi.b %d1,&QNAN # is operand a QNAN?
1368 bne.b _L4_5d # no
1369 bsr.l src_qnan # yes
1370 bra.b _L4_6d
1371_L4_5d:
1372 bsr.l setoxm1d # operand is a DENORM
1373_L4_6d:
1374
1375#
1376# Result is now in FP0
1377#
1378 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1379 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1380 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1381 unlk %a6
1382 rts
1383
1384 global _fetoxm1x_
1385_fetoxm1x_:
1386 link %a6,&-LOCAL_SIZE
1387
1388 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1389 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1390 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1391
1392 fmov.l &0x0,%fpcr # zero FPCR
1393
1394#
1395# copy, convert, and tag input argument
1396#
1397 lea FP_SRC(%a6),%a0
1398 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
1399 mov.l 0x8+0x4(%a6),0x4(%a0)
1400 mov.l 0x8+0x8(%a6),0x8(%a0)
1401 bsr.l tag # fetch operand type
1402 mov.b %d0,STAG(%a6)
1403 mov.b %d0,%d1
1404
1405 andi.l &0x00ff00ff,USER_FPSR(%a6)
1406
1407 clr.l %d0
1408 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1409
1410 tst.b %d1
1411 bne.b _L4_2x
1412 bsr.l setoxm1 # operand is a NORM
1413 bra.b _L4_6x
1414_L4_2x:
1415 cmpi.b %d1,&ZERO # is operand a ZERO?
1416 bne.b _L4_3x # no
1417 bsr.l src_zero # yes
1418 bra.b _L4_6x
1419_L4_3x:
1420 cmpi.b %d1,&INF # is operand an INF?
1421 bne.b _L4_4x # no
1422 bsr.l setoxm1i # yes
1423 bra.b _L4_6x
1424_L4_4x:
1425 cmpi.b %d1,&QNAN # is operand a QNAN?
1426 bne.b _L4_5x # no
1427 bsr.l src_qnan # yes
1428 bra.b _L4_6x
1429_L4_5x:
1430 bsr.l setoxm1d # operand is a DENORM
1431_L4_6x:
1432
1433#
1434# Result is now in FP0
1435#
1436 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1437 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1438 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1439 unlk %a6
1440 rts
1441
1442
1443#########################################################################
1444# MONADIC TEMPLATE #
1445#########################################################################
1446 global _ftanhs_
1447_ftanhs_:
1448 link %a6,&-LOCAL_SIZE
1449
1450 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1451 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1452 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1453
1454 fmov.l &0x0,%fpcr # zero FPCR
1455
1456#
1457# copy, convert, and tag input argument
1458#
1459 fmov.s 0x8(%a6),%fp0 # load sgl input
1460 fmov.x %fp0,FP_SRC(%a6)
1461 lea FP_SRC(%a6),%a0
1462 bsr.l tag # fetch operand type
1463 mov.b %d0,STAG(%a6)
1464 mov.b %d0,%d1
1465
1466 andi.l &0x00ff00ff,USER_FPSR(%a6)
1467
1468 clr.l %d0
1469 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1470
1471 tst.b %d1
1472 bne.b _L5_2s
1473 bsr.l stanh # operand is a NORM
1474 bra.b _L5_6s
1475_L5_2s:
1476 cmpi.b %d1,&ZERO # is operand a ZERO?
1477 bne.b _L5_3s # no
1478 bsr.l src_zero # yes
1479 bra.b _L5_6s
1480_L5_3s:
1481 cmpi.b %d1,&INF # is operand an INF?
1482 bne.b _L5_4s # no
1483 bsr.l src_one # yes
1484 bra.b _L5_6s
1485_L5_4s:
1486 cmpi.b %d1,&QNAN # is operand a QNAN?
1487 bne.b _L5_5s # no
1488 bsr.l src_qnan # yes
1489 bra.b _L5_6s
1490_L5_5s:
1491 bsr.l stanhd # operand is a DENORM
1492_L5_6s:
1493
1494#
1495# Result is now in FP0
1496#
1497 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1498 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1499 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1500 unlk %a6
1501 rts
1502
1503 global _ftanhd_
1504_ftanhd_:
1505 link %a6,&-LOCAL_SIZE
1506
1507 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1508 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1509 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1510
1511 fmov.l &0x0,%fpcr # zero FPCR
1512
1513#
1514# copy, convert, and tag input argument
1515#
1516 fmov.d 0x8(%a6),%fp0 # load dbl input
1517 fmov.x %fp0,FP_SRC(%a6)
1518 lea FP_SRC(%a6),%a0
1519 bsr.l tag # fetch operand type
1520 mov.b %d0,STAG(%a6)
1521 mov.b %d0,%d1
1522
1523 andi.l &0x00ff00ff,USER_FPSR(%a6)
1524
1525 clr.l %d0
1526 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1527
1528 mov.b %d1,STAG(%a6)
1529 tst.b %d1
1530 bne.b _L5_2d
1531 bsr.l stanh # operand is a NORM
1532 bra.b _L5_6d
1533_L5_2d:
1534 cmpi.b %d1,&ZERO # is operand a ZERO?
1535 bne.b _L5_3d # no
1536 bsr.l src_zero # yes
1537 bra.b _L5_6d
1538_L5_3d:
1539 cmpi.b %d1,&INF # is operand an INF?
1540 bne.b _L5_4d # no
1541 bsr.l src_one # yes
1542 bra.b _L5_6d
1543_L5_4d:
1544 cmpi.b %d1,&QNAN # is operand a QNAN?
1545 bne.b _L5_5d # no
1546 bsr.l src_qnan # yes
1547 bra.b _L5_6d
1548_L5_5d:
1549 bsr.l stanhd # operand is a DENORM
1550_L5_6d:
1551
1552#
1553# Result is now in FP0
1554#
1555 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1556 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1557 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1558 unlk %a6
1559 rts
1560
1561 global _ftanhx_
1562_ftanhx_:
1563 link %a6,&-LOCAL_SIZE
1564
1565 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1566 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1567 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1568
1569 fmov.l &0x0,%fpcr # zero FPCR
1570
1571#
1572# copy, convert, and tag input argument
1573#
1574 lea FP_SRC(%a6),%a0
1575 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
1576 mov.l 0x8+0x4(%a6),0x4(%a0)
1577 mov.l 0x8+0x8(%a6),0x8(%a0)
1578 bsr.l tag # fetch operand type
1579 mov.b %d0,STAG(%a6)
1580 mov.b %d0,%d1
1581
1582 andi.l &0x00ff00ff,USER_FPSR(%a6)
1583
1584 clr.l %d0
1585 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1586
1587 tst.b %d1
1588 bne.b _L5_2x
1589 bsr.l stanh # operand is a NORM
1590 bra.b _L5_6x
1591_L5_2x:
1592 cmpi.b %d1,&ZERO # is operand a ZERO?
1593 bne.b _L5_3x # no
1594 bsr.l src_zero # yes
1595 bra.b _L5_6x
1596_L5_3x:
1597 cmpi.b %d1,&INF # is operand an INF?
1598 bne.b _L5_4x # no
1599 bsr.l src_one # yes
1600 bra.b _L5_6x
1601_L5_4x:
1602 cmpi.b %d1,&QNAN # is operand a QNAN?
1603 bne.b _L5_5x # no
1604 bsr.l src_qnan # yes
1605 bra.b _L5_6x
1606_L5_5x:
1607 bsr.l stanhd # operand is a DENORM
1608_L5_6x:
1609
1610#
1611# Result is now in FP0
1612#
1613 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1614 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1615 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1616 unlk %a6
1617 rts
1618
1619
1620#########################################################################
1621# MONADIC TEMPLATE #
1622#########################################################################
1623 global _fatans_
1624_fatans_:
1625 link %a6,&-LOCAL_SIZE
1626
1627 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1628 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1629 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1630
1631 fmov.l &0x0,%fpcr # zero FPCR
1632
1633#
1634# copy, convert, and tag input argument
1635#
1636 fmov.s 0x8(%a6),%fp0 # load sgl input
1637 fmov.x %fp0,FP_SRC(%a6)
1638 lea FP_SRC(%a6),%a0
1639 bsr.l tag # fetch operand type
1640 mov.b %d0,STAG(%a6)
1641 mov.b %d0,%d1
1642
1643 andi.l &0x00ff00ff,USER_FPSR(%a6)
1644
1645 clr.l %d0
1646 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1647
1648 tst.b %d1
1649 bne.b _L6_2s
1650 bsr.l satan # operand is a NORM
1651 bra.b _L6_6s
1652_L6_2s:
1653 cmpi.b %d1,&ZERO # is operand a ZERO?
1654 bne.b _L6_3s # no
1655 bsr.l src_zero # yes
1656 bra.b _L6_6s
1657_L6_3s:
1658 cmpi.b %d1,&INF # is operand an INF?
1659 bne.b _L6_4s # no
1660 bsr.l spi_2 # yes
1661 bra.b _L6_6s
1662_L6_4s:
1663 cmpi.b %d1,&QNAN # is operand a QNAN?
1664 bne.b _L6_5s # no
1665 bsr.l src_qnan # yes
1666 bra.b _L6_6s
1667_L6_5s:
1668 bsr.l satand # operand is a DENORM
1669_L6_6s:
1670
1671#
1672# Result is now in FP0
1673#
1674 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1675 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1676 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1677 unlk %a6
1678 rts
1679
1680 global _fatand_
1681_fatand_:
1682 link %a6,&-LOCAL_SIZE
1683
1684 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1685 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1686 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1687
1688 fmov.l &0x0,%fpcr # zero FPCR
1689
1690#
1691# copy, convert, and tag input argument
1692#
1693 fmov.d 0x8(%a6),%fp0 # load dbl input
1694 fmov.x %fp0,FP_SRC(%a6)
1695 lea FP_SRC(%a6),%a0
1696 bsr.l tag # fetch operand type
1697 mov.b %d0,STAG(%a6)
1698 mov.b %d0,%d1
1699
1700 andi.l &0x00ff00ff,USER_FPSR(%a6)
1701
1702 clr.l %d0
1703 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1704
1705 mov.b %d1,STAG(%a6)
1706 tst.b %d1
1707 bne.b _L6_2d
1708 bsr.l satan # operand is a NORM
1709 bra.b _L6_6d
1710_L6_2d:
1711 cmpi.b %d1,&ZERO # is operand a ZERO?
1712 bne.b _L6_3d # no
1713 bsr.l src_zero # yes
1714 bra.b _L6_6d
1715_L6_3d:
1716 cmpi.b %d1,&INF # is operand an INF?
1717 bne.b _L6_4d # no
1718 bsr.l spi_2 # yes
1719 bra.b _L6_6d
1720_L6_4d:
1721 cmpi.b %d1,&QNAN # is operand a QNAN?
1722 bne.b _L6_5d # no
1723 bsr.l src_qnan # yes
1724 bra.b _L6_6d
1725_L6_5d:
1726 bsr.l satand # operand is a DENORM
1727_L6_6d:
1728
1729#
1730# Result is now in FP0
1731#
1732 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1733 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1734 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1735 unlk %a6
1736 rts
1737
1738 global _fatanx_
1739_fatanx_:
1740 link %a6,&-LOCAL_SIZE
1741
1742 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1743 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1744 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1745
1746 fmov.l &0x0,%fpcr # zero FPCR
1747
1748#
1749# copy, convert, and tag input argument
1750#
1751 lea FP_SRC(%a6),%a0
1752 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
1753 mov.l 0x8+0x4(%a6),0x4(%a0)
1754 mov.l 0x8+0x8(%a6),0x8(%a0)
1755 bsr.l tag # fetch operand type
1756 mov.b %d0,STAG(%a6)
1757 mov.b %d0,%d1
1758
1759 andi.l &0x00ff00ff,USER_FPSR(%a6)
1760
1761 clr.l %d0
1762 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1763
1764 tst.b %d1
1765 bne.b _L6_2x
1766 bsr.l satan # operand is a NORM
1767 bra.b _L6_6x
1768_L6_2x:
1769 cmpi.b %d1,&ZERO # is operand a ZERO?
1770 bne.b _L6_3x # no
1771 bsr.l src_zero # yes
1772 bra.b _L6_6x
1773_L6_3x:
1774 cmpi.b %d1,&INF # is operand an INF?
1775 bne.b _L6_4x # no
1776 bsr.l spi_2 # yes
1777 bra.b _L6_6x
1778_L6_4x:
1779 cmpi.b %d1,&QNAN # is operand a QNAN?
1780 bne.b _L6_5x # no
1781 bsr.l src_qnan # yes
1782 bra.b _L6_6x
1783_L6_5x:
1784 bsr.l satand # operand is a DENORM
1785_L6_6x:
1786
1787#
1788# Result is now in FP0
1789#
1790 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1791 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1792 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1793 unlk %a6
1794 rts
1795
1796
1797#########################################################################
1798# MONADIC TEMPLATE #
1799#########################################################################
1800 global _fasins_
1801_fasins_:
1802 link %a6,&-LOCAL_SIZE
1803
1804 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1805 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1806 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1807
1808 fmov.l &0x0,%fpcr # zero FPCR
1809
1810#
1811# copy, convert, and tag input argument
1812#
1813 fmov.s 0x8(%a6),%fp0 # load sgl input
1814 fmov.x %fp0,FP_SRC(%a6)
1815 lea FP_SRC(%a6),%a0
1816 bsr.l tag # fetch operand type
1817 mov.b %d0,STAG(%a6)
1818 mov.b %d0,%d1
1819
1820 andi.l &0x00ff00ff,USER_FPSR(%a6)
1821
1822 clr.l %d0
1823 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1824
1825 tst.b %d1
1826 bne.b _L7_2s
1827 bsr.l sasin # operand is a NORM
1828 bra.b _L7_6s
1829_L7_2s:
1830 cmpi.b %d1,&ZERO # is operand a ZERO?
1831 bne.b _L7_3s # no
1832 bsr.l src_zero # yes
1833 bra.b _L7_6s
1834_L7_3s:
1835 cmpi.b %d1,&INF # is operand an INF?
1836 bne.b _L7_4s # no
1837 bsr.l t_operr # yes
1838 bra.b _L7_6s
1839_L7_4s:
1840 cmpi.b %d1,&QNAN # is operand a QNAN?
1841 bne.b _L7_5s # no
1842 bsr.l src_qnan # yes
1843 bra.b _L7_6s
1844_L7_5s:
1845 bsr.l sasind # operand is a DENORM
1846_L7_6s:
1847
1848#
1849# Result is now in FP0
1850#
1851 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1852 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1853 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1854 unlk %a6
1855 rts
1856
1857 global _fasind_
1858_fasind_:
1859 link %a6,&-LOCAL_SIZE
1860
1861 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1862 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1863 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1864
1865 fmov.l &0x0,%fpcr # zero FPCR
1866
1867#
1868# copy, convert, and tag input argument
1869#
1870 fmov.d 0x8(%a6),%fp0 # load dbl input
1871 fmov.x %fp0,FP_SRC(%a6)
1872 lea FP_SRC(%a6),%a0
1873 bsr.l tag # fetch operand type
1874 mov.b %d0,STAG(%a6)
1875 mov.b %d0,%d1
1876
1877 andi.l &0x00ff00ff,USER_FPSR(%a6)
1878
1879 clr.l %d0
1880 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1881
1882 mov.b %d1,STAG(%a6)
1883 tst.b %d1
1884 bne.b _L7_2d
1885 bsr.l sasin # operand is a NORM
1886 bra.b _L7_6d
1887_L7_2d:
1888 cmpi.b %d1,&ZERO # is operand a ZERO?
1889 bne.b _L7_3d # no
1890 bsr.l src_zero # yes
1891 bra.b _L7_6d
1892_L7_3d:
1893 cmpi.b %d1,&INF # is operand an INF?
1894 bne.b _L7_4d # no
1895 bsr.l t_operr # yes
1896 bra.b _L7_6d
1897_L7_4d:
1898 cmpi.b %d1,&QNAN # is operand a QNAN?
1899 bne.b _L7_5d # no
1900 bsr.l src_qnan # yes
1901 bra.b _L7_6d
1902_L7_5d:
1903 bsr.l sasind # operand is a DENORM
1904_L7_6d:
1905
1906#
1907# Result is now in FP0
1908#
1909 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1910 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1911 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1912 unlk %a6
1913 rts
1914
1915 global _fasinx_
1916_fasinx_:
1917 link %a6,&-LOCAL_SIZE
1918
1919 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1920 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1921 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1922
1923 fmov.l &0x0,%fpcr # zero FPCR
1924
1925#
1926# copy, convert, and tag input argument
1927#
1928 lea FP_SRC(%a6),%a0
1929 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
1930 mov.l 0x8+0x4(%a6),0x4(%a0)
1931 mov.l 0x8+0x8(%a6),0x8(%a0)
1932 bsr.l tag # fetch operand type
1933 mov.b %d0,STAG(%a6)
1934 mov.b %d0,%d1
1935
1936 andi.l &0x00ff00ff,USER_FPSR(%a6)
1937
1938 clr.l %d0
1939 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1940
1941 tst.b %d1
1942 bne.b _L7_2x
1943 bsr.l sasin # operand is a NORM
1944 bra.b _L7_6x
1945_L7_2x:
1946 cmpi.b %d1,&ZERO # is operand a ZERO?
1947 bne.b _L7_3x # no
1948 bsr.l src_zero # yes
1949 bra.b _L7_6x
1950_L7_3x:
1951 cmpi.b %d1,&INF # is operand an INF?
1952 bne.b _L7_4x # no
1953 bsr.l t_operr # yes
1954 bra.b _L7_6x
1955_L7_4x:
1956 cmpi.b %d1,&QNAN # is operand a QNAN?
1957 bne.b _L7_5x # no
1958 bsr.l src_qnan # yes
1959 bra.b _L7_6x
1960_L7_5x:
1961 bsr.l sasind # operand is a DENORM
1962_L7_6x:
1963
1964#
1965# Result is now in FP0
1966#
1967 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1968 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1969 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1970 unlk %a6
1971 rts
1972
1973
1974#########################################################################
1975# MONADIC TEMPLATE #
1976#########################################################################
1977 global _fatanhs_
1978_fatanhs_:
1979 link %a6,&-LOCAL_SIZE
1980
1981 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1982 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1983 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1984
1985 fmov.l &0x0,%fpcr # zero FPCR
1986
1987#
1988# copy, convert, and tag input argument
1989#
1990 fmov.s 0x8(%a6),%fp0 # load sgl input
1991 fmov.x %fp0,FP_SRC(%a6)
1992 lea FP_SRC(%a6),%a0
1993 bsr.l tag # fetch operand type
1994 mov.b %d0,STAG(%a6)
1995 mov.b %d0,%d1
1996
1997 andi.l &0x00ff00ff,USER_FPSR(%a6)
1998
1999 clr.l %d0
2000 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2001
2002 tst.b %d1
2003 bne.b _L8_2s
2004 bsr.l satanh # operand is a NORM
2005 bra.b _L8_6s
2006_L8_2s:
2007 cmpi.b %d1,&ZERO # is operand a ZERO?
2008 bne.b _L8_3s # no
2009 bsr.l src_zero # yes
2010 bra.b _L8_6s
2011_L8_3s:
2012 cmpi.b %d1,&INF # is operand an INF?
2013 bne.b _L8_4s # no
2014 bsr.l t_operr # yes
2015 bra.b _L8_6s
2016_L8_4s:
2017 cmpi.b %d1,&QNAN # is operand a QNAN?
2018 bne.b _L8_5s # no
2019 bsr.l src_qnan # yes
2020 bra.b _L8_6s
2021_L8_5s:
2022 bsr.l satanhd # operand is a DENORM
2023_L8_6s:
2024
2025#
2026# Result is now in FP0
2027#
2028 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2029 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2030 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2031 unlk %a6
2032 rts
2033
2034 global _fatanhd_
2035_fatanhd_:
2036 link %a6,&-LOCAL_SIZE
2037
2038 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2039 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2040 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2041
2042 fmov.l &0x0,%fpcr # zero FPCR
2043
2044#
2045# copy, convert, and tag input argument
2046#
2047 fmov.d 0x8(%a6),%fp0 # load dbl input
2048 fmov.x %fp0,FP_SRC(%a6)
2049 lea FP_SRC(%a6),%a0
2050 bsr.l tag # fetch operand type
2051 mov.b %d0,STAG(%a6)
2052 mov.b %d0,%d1
2053
2054 andi.l &0x00ff00ff,USER_FPSR(%a6)
2055
2056 clr.l %d0
2057 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2058
2059 mov.b %d1,STAG(%a6)
2060 tst.b %d1
2061 bne.b _L8_2d
2062 bsr.l satanh # operand is a NORM
2063 bra.b _L8_6d
2064_L8_2d:
2065 cmpi.b %d1,&ZERO # is operand a ZERO?
2066 bne.b _L8_3d # no
2067 bsr.l src_zero # yes
2068 bra.b _L8_6d
2069_L8_3d:
2070 cmpi.b %d1,&INF # is operand an INF?
2071 bne.b _L8_4d # no
2072 bsr.l t_operr # yes
2073 bra.b _L8_6d
2074_L8_4d:
2075 cmpi.b %d1,&QNAN # is operand a QNAN?
2076 bne.b _L8_5d # no
2077 bsr.l src_qnan # yes
2078 bra.b _L8_6d
2079_L8_5d:
2080 bsr.l satanhd # operand is a DENORM
2081_L8_6d:
2082
2083#
2084# Result is now in FP0
2085#
2086 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2087 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2088 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2089 unlk %a6
2090 rts
2091
2092 global _fatanhx_
2093_fatanhx_:
2094 link %a6,&-LOCAL_SIZE
2095
2096 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2097 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2098 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2099
2100 fmov.l &0x0,%fpcr # zero FPCR
2101
2102#
2103# copy, convert, and tag input argument
2104#
2105 lea FP_SRC(%a6),%a0
2106 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
2107 mov.l 0x8+0x4(%a6),0x4(%a0)
2108 mov.l 0x8+0x8(%a6),0x8(%a0)
2109 bsr.l tag # fetch operand type
2110 mov.b %d0,STAG(%a6)
2111 mov.b %d0,%d1
2112
2113 andi.l &0x00ff00ff,USER_FPSR(%a6)
2114
2115 clr.l %d0
2116 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2117
2118 tst.b %d1
2119 bne.b _L8_2x
2120 bsr.l satanh # operand is a NORM
2121 bra.b _L8_6x
2122_L8_2x:
2123 cmpi.b %d1,&ZERO # is operand a ZERO?
2124 bne.b _L8_3x # no
2125 bsr.l src_zero # yes
2126 bra.b _L8_6x
2127_L8_3x:
2128 cmpi.b %d1,&INF # is operand an INF?
2129 bne.b _L8_4x # no
2130 bsr.l t_operr # yes
2131 bra.b _L8_6x
2132_L8_4x:
2133 cmpi.b %d1,&QNAN # is operand a QNAN?
2134 bne.b _L8_5x # no
2135 bsr.l src_qnan # yes
2136 bra.b _L8_6x
2137_L8_5x:
2138 bsr.l satanhd # operand is a DENORM
2139_L8_6x:
2140
2141#
2142# Result is now in FP0
2143#
2144 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2145 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2146 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2147 unlk %a6
2148 rts
2149
2150
2151#########################################################################
2152# MONADIC TEMPLATE #
2153#########################################################################
2154 global _ftans_
2155_ftans_:
2156 link %a6,&-LOCAL_SIZE
2157
2158 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2159 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2160 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2161
2162 fmov.l &0x0,%fpcr # zero FPCR
2163
2164#
2165# copy, convert, and tag input argument
2166#
2167 fmov.s 0x8(%a6),%fp0 # load sgl input
2168 fmov.x %fp0,FP_SRC(%a6)
2169 lea FP_SRC(%a6),%a0
2170 bsr.l tag # fetch operand type
2171 mov.b %d0,STAG(%a6)
2172 mov.b %d0,%d1
2173
2174 andi.l &0x00ff00ff,USER_FPSR(%a6)
2175
2176 clr.l %d0
2177 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2178
2179 tst.b %d1
2180 bne.b _L9_2s
2181 bsr.l stan # operand is a NORM
2182 bra.b _L9_6s
2183_L9_2s:
2184 cmpi.b %d1,&ZERO # is operand a ZERO?
2185 bne.b _L9_3s # no
2186 bsr.l src_zero # yes
2187 bra.b _L9_6s
2188_L9_3s:
2189 cmpi.b %d1,&INF # is operand an INF?
2190 bne.b _L9_4s # no
2191 bsr.l t_operr # yes
2192 bra.b _L9_6s
2193_L9_4s:
2194 cmpi.b %d1,&QNAN # is operand a QNAN?
2195 bne.b _L9_5s # no
2196 bsr.l src_qnan # yes
2197 bra.b _L9_6s
2198_L9_5s:
2199 bsr.l stand # operand is a DENORM
2200_L9_6s:
2201
2202#
2203# Result is now in FP0
2204#
2205 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2206 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2207 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2208 unlk %a6
2209 rts
2210
2211 global _ftand_
2212_ftand_:
2213 link %a6,&-LOCAL_SIZE
2214
2215 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2216 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2217 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2218
2219 fmov.l &0x0,%fpcr # zero FPCR
2220
2221#
2222# copy, convert, and tag input argument
2223#
2224 fmov.d 0x8(%a6),%fp0 # load dbl input
2225 fmov.x %fp0,FP_SRC(%a6)
2226 lea FP_SRC(%a6),%a0
2227 bsr.l tag # fetch operand type
2228 mov.b %d0,STAG(%a6)
2229 mov.b %d0,%d1
2230
2231 andi.l &0x00ff00ff,USER_FPSR(%a6)
2232
2233 clr.l %d0
2234 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2235
2236 mov.b %d1,STAG(%a6)
2237 tst.b %d1
2238 bne.b _L9_2d
2239 bsr.l stan # operand is a NORM
2240 bra.b _L9_6d
2241_L9_2d:
2242 cmpi.b %d1,&ZERO # is operand a ZERO?
2243 bne.b _L9_3d # no
2244 bsr.l src_zero # yes
2245 bra.b _L9_6d
2246_L9_3d:
2247 cmpi.b %d1,&INF # is operand an INF?
2248 bne.b _L9_4d # no
2249 bsr.l t_operr # yes
2250 bra.b _L9_6d
2251_L9_4d:
2252 cmpi.b %d1,&QNAN # is operand a QNAN?
2253 bne.b _L9_5d # no
2254 bsr.l src_qnan # yes
2255 bra.b _L9_6d
2256_L9_5d:
2257 bsr.l stand # operand is a DENORM
2258_L9_6d:
2259
2260#
2261# Result is now in FP0
2262#
2263 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2264 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2265 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2266 unlk %a6
2267 rts
2268
2269 global _ftanx_
2270_ftanx_:
2271 link %a6,&-LOCAL_SIZE
2272
2273 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2274 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2275 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2276
2277 fmov.l &0x0,%fpcr # zero FPCR
2278
2279#
2280# copy, convert, and tag input argument
2281#
2282 lea FP_SRC(%a6),%a0
2283 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
2284 mov.l 0x8+0x4(%a6),0x4(%a0)
2285 mov.l 0x8+0x8(%a6),0x8(%a0)
2286 bsr.l tag # fetch operand type
2287 mov.b %d0,STAG(%a6)
2288 mov.b %d0,%d1
2289
2290 andi.l &0x00ff00ff,USER_FPSR(%a6)
2291
2292 clr.l %d0
2293 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2294
2295 tst.b %d1
2296 bne.b _L9_2x
2297 bsr.l stan # operand is a NORM
2298 bra.b _L9_6x
2299_L9_2x:
2300 cmpi.b %d1,&ZERO # is operand a ZERO?
2301 bne.b _L9_3x # no
2302 bsr.l src_zero # yes
2303 bra.b _L9_6x
2304_L9_3x:
2305 cmpi.b %d1,&INF # is operand an INF?
2306 bne.b _L9_4x # no
2307 bsr.l t_operr # yes
2308 bra.b _L9_6x
2309_L9_4x:
2310 cmpi.b %d1,&QNAN # is operand a QNAN?
2311 bne.b _L9_5x # no
2312 bsr.l src_qnan # yes
2313 bra.b _L9_6x
2314_L9_5x:
2315 bsr.l stand # operand is a DENORM
2316_L9_6x:
2317
2318#
2319# Result is now in FP0
2320#
2321 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2322 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2323 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2324 unlk %a6
2325 rts
2326
2327
2328#########################################################################
2329# MONADIC TEMPLATE #
2330#########################################################################
2331 global _fetoxs_
2332_fetoxs_:
2333 link %a6,&-LOCAL_SIZE
2334
2335 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2336 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2337 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2338
2339 fmov.l &0x0,%fpcr # zero FPCR
2340
2341#
2342# copy, convert, and tag input argument
2343#
2344 fmov.s 0x8(%a6),%fp0 # load sgl input
2345 fmov.x %fp0,FP_SRC(%a6)
2346 lea FP_SRC(%a6),%a0
2347 bsr.l tag # fetch operand type
2348 mov.b %d0,STAG(%a6)
2349 mov.b %d0,%d1
2350
2351 andi.l &0x00ff00ff,USER_FPSR(%a6)
2352
2353 clr.l %d0
2354 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2355
2356 tst.b %d1
2357 bne.b _L10_2s
2358 bsr.l setox # operand is a NORM
2359 bra.b _L10_6s
2360_L10_2s:
2361 cmpi.b %d1,&ZERO # is operand a ZERO?
2362 bne.b _L10_3s # no
2363 bsr.l ld_pone # yes
2364 bra.b _L10_6s
2365_L10_3s:
2366 cmpi.b %d1,&INF # is operand an INF?
2367 bne.b _L10_4s # no
2368 bsr.l szr_inf # yes
2369 bra.b _L10_6s
2370_L10_4s:
2371 cmpi.b %d1,&QNAN # is operand a QNAN?
2372 bne.b _L10_5s # no
2373 bsr.l src_qnan # yes
2374 bra.b _L10_6s
2375_L10_5s:
2376 bsr.l setoxd # operand is a DENORM
2377_L10_6s:
2378
2379#
2380# Result is now in FP0
2381#
2382 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2383 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2384 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2385 unlk %a6
2386 rts
2387
2388 global _fetoxd_
2389_fetoxd_:
2390 link %a6,&-LOCAL_SIZE
2391
2392 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2393 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2394 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2395
2396 fmov.l &0x0,%fpcr # zero FPCR
2397
2398#
2399# copy, convert, and tag input argument
2400#
2401 fmov.d 0x8(%a6),%fp0 # load dbl input
2402 fmov.x %fp0,FP_SRC(%a6)
2403 lea FP_SRC(%a6),%a0
2404 bsr.l tag # fetch operand type
2405 mov.b %d0,STAG(%a6)
2406 mov.b %d0,%d1
2407
2408 andi.l &0x00ff00ff,USER_FPSR(%a6)
2409
2410 clr.l %d0
2411 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2412
2413 mov.b %d1,STAG(%a6)
2414 tst.b %d1
2415 bne.b _L10_2d
2416 bsr.l setox # operand is a NORM
2417 bra.b _L10_6d
2418_L10_2d:
2419 cmpi.b %d1,&ZERO # is operand a ZERO?
2420 bne.b _L10_3d # no
2421 bsr.l ld_pone # yes
2422 bra.b _L10_6d
2423_L10_3d:
2424 cmpi.b %d1,&INF # is operand an INF?
2425 bne.b _L10_4d # no
2426 bsr.l szr_inf # yes
2427 bra.b _L10_6d
2428_L10_4d:
2429 cmpi.b %d1,&QNAN # is operand a QNAN?
2430 bne.b _L10_5d # no
2431 bsr.l src_qnan # yes
2432 bra.b _L10_6d
2433_L10_5d:
2434 bsr.l setoxd # operand is a DENORM
2435_L10_6d:
2436
2437#
2438# Result is now in FP0
2439#
2440 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2441 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2442 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2443 unlk %a6
2444 rts
2445
2446 global _fetoxx_
2447_fetoxx_:
2448 link %a6,&-LOCAL_SIZE
2449
2450 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2451 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2452 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2453
2454 fmov.l &0x0,%fpcr # zero FPCR
2455
2456#
2457# copy, convert, and tag input argument
2458#
2459 lea FP_SRC(%a6),%a0
2460 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
2461 mov.l 0x8+0x4(%a6),0x4(%a0)
2462 mov.l 0x8+0x8(%a6),0x8(%a0)
2463 bsr.l tag # fetch operand type
2464 mov.b %d0,STAG(%a6)
2465 mov.b %d0,%d1
2466
2467 andi.l &0x00ff00ff,USER_FPSR(%a6)
2468
2469 clr.l %d0
2470 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2471
2472 tst.b %d1
2473 bne.b _L10_2x
2474 bsr.l setox # operand is a NORM
2475 bra.b _L10_6x
2476_L10_2x:
2477 cmpi.b %d1,&ZERO # is operand a ZERO?
2478 bne.b _L10_3x # no
2479 bsr.l ld_pone # yes
2480 bra.b _L10_6x
2481_L10_3x:
2482 cmpi.b %d1,&INF # is operand an INF?
2483 bne.b _L10_4x # no
2484 bsr.l szr_inf # yes
2485 bra.b _L10_6x
2486_L10_4x:
2487 cmpi.b %d1,&QNAN # is operand a QNAN?
2488 bne.b _L10_5x # no
2489 bsr.l src_qnan # yes
2490 bra.b _L10_6x
2491_L10_5x:
2492 bsr.l setoxd # operand is a DENORM
2493_L10_6x:
2494
2495#
2496# Result is now in FP0
2497#
2498 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2499 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2500 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2501 unlk %a6
2502 rts
2503
2504
2505#########################################################################
2506# MONADIC TEMPLATE #
2507#########################################################################
2508 global _ftwotoxs_
2509_ftwotoxs_:
2510 link %a6,&-LOCAL_SIZE
2511
2512 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2513 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2514 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2515
2516 fmov.l &0x0,%fpcr # zero FPCR
2517
2518#
2519# copy, convert, and tag input argument
2520#
2521 fmov.s 0x8(%a6),%fp0 # load sgl input
2522 fmov.x %fp0,FP_SRC(%a6)
2523 lea FP_SRC(%a6),%a0
2524 bsr.l tag # fetch operand type
2525 mov.b %d0,STAG(%a6)
2526 mov.b %d0,%d1
2527
2528 andi.l &0x00ff00ff,USER_FPSR(%a6)
2529
2530 clr.l %d0
2531 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2532
2533 tst.b %d1
2534 bne.b _L11_2s
2535 bsr.l stwotox # operand is a NORM
2536 bra.b _L11_6s
2537_L11_2s:
2538 cmpi.b %d1,&ZERO # is operand a ZERO?
2539 bne.b _L11_3s # no
2540 bsr.l ld_pone # yes
2541 bra.b _L11_6s
2542_L11_3s:
2543 cmpi.b %d1,&INF # is operand an INF?
2544 bne.b _L11_4s # no
2545 bsr.l szr_inf # yes
2546 bra.b _L11_6s
2547_L11_4s:
2548 cmpi.b %d1,&QNAN # is operand a QNAN?
2549 bne.b _L11_5s # no
2550 bsr.l src_qnan # yes
2551 bra.b _L11_6s
2552_L11_5s:
2553 bsr.l stwotoxd # operand is a DENORM
2554_L11_6s:
2555
2556#
2557# Result is now in FP0
2558#
2559 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2560 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2561 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2562 unlk %a6
2563 rts
2564
2565 global _ftwotoxd_
2566_ftwotoxd_:
2567 link %a6,&-LOCAL_SIZE
2568
2569 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2570 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2571 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2572
2573 fmov.l &0x0,%fpcr # zero FPCR
2574
2575#
2576# copy, convert, and tag input argument
2577#
2578 fmov.d 0x8(%a6),%fp0 # load dbl input
2579 fmov.x %fp0,FP_SRC(%a6)
2580 lea FP_SRC(%a6),%a0
2581 bsr.l tag # fetch operand type
2582 mov.b %d0,STAG(%a6)
2583 mov.b %d0,%d1
2584
2585 andi.l &0x00ff00ff,USER_FPSR(%a6)
2586
2587 clr.l %d0
2588 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2589
2590 mov.b %d1,STAG(%a6)
2591 tst.b %d1
2592 bne.b _L11_2d
2593 bsr.l stwotox # operand is a NORM
2594 bra.b _L11_6d
2595_L11_2d:
2596 cmpi.b %d1,&ZERO # is operand a ZERO?
2597 bne.b _L11_3d # no
2598 bsr.l ld_pone # yes
2599 bra.b _L11_6d
2600_L11_3d:
2601 cmpi.b %d1,&INF # is operand an INF?
2602 bne.b _L11_4d # no
2603 bsr.l szr_inf # yes
2604 bra.b _L11_6d
2605_L11_4d:
2606 cmpi.b %d1,&QNAN # is operand a QNAN?
2607 bne.b _L11_5d # no
2608 bsr.l src_qnan # yes
2609 bra.b _L11_6d
2610_L11_5d:
2611 bsr.l stwotoxd # operand is a DENORM
2612_L11_6d:
2613
2614#
2615# Result is now in FP0
2616#
2617 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2618 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2619 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2620 unlk %a6
2621 rts
2622
2623 global _ftwotoxx_
2624_ftwotoxx_:
2625 link %a6,&-LOCAL_SIZE
2626
2627 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2628 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2629 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2630
2631 fmov.l &0x0,%fpcr # zero FPCR
2632
2633#
2634# copy, convert, and tag input argument
2635#
2636 lea FP_SRC(%a6),%a0
2637 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
2638 mov.l 0x8+0x4(%a6),0x4(%a0)
2639 mov.l 0x8+0x8(%a6),0x8(%a0)
2640 bsr.l tag # fetch operand type
2641 mov.b %d0,STAG(%a6)
2642 mov.b %d0,%d1
2643
2644 andi.l &0x00ff00ff,USER_FPSR(%a6)
2645
2646 clr.l %d0
2647 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2648
2649 tst.b %d1
2650 bne.b _L11_2x
2651 bsr.l stwotox # operand is a NORM
2652 bra.b _L11_6x
2653_L11_2x:
2654 cmpi.b %d1,&ZERO # is operand a ZERO?
2655 bne.b _L11_3x # no
2656 bsr.l ld_pone # yes
2657 bra.b _L11_6x
2658_L11_3x:
2659 cmpi.b %d1,&INF # is operand an INF?
2660 bne.b _L11_4x # no
2661 bsr.l szr_inf # yes
2662 bra.b _L11_6x
2663_L11_4x:
2664 cmpi.b %d1,&QNAN # is operand a QNAN?
2665 bne.b _L11_5x # no
2666 bsr.l src_qnan # yes
2667 bra.b _L11_6x
2668_L11_5x:
2669 bsr.l stwotoxd # operand is a DENORM
2670_L11_6x:
2671
2672#
2673# Result is now in FP0
2674#
2675 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2676 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2677 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2678 unlk %a6
2679 rts
2680
2681
2682#########################################################################
2683# MONADIC TEMPLATE #
2684#########################################################################
2685 global _ftentoxs_
2686_ftentoxs_:
2687 link %a6,&-LOCAL_SIZE
2688
2689 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2690 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2691 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2692
2693 fmov.l &0x0,%fpcr # zero FPCR
2694
2695#
2696# copy, convert, and tag input argument
2697#
2698 fmov.s 0x8(%a6),%fp0 # load sgl input
2699 fmov.x %fp0,FP_SRC(%a6)
2700 lea FP_SRC(%a6),%a0
2701 bsr.l tag # fetch operand type
2702 mov.b %d0,STAG(%a6)
2703 mov.b %d0,%d1
2704
2705 andi.l &0x00ff00ff,USER_FPSR(%a6)
2706
2707 clr.l %d0
2708 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2709
2710 tst.b %d1
2711 bne.b _L12_2s
2712 bsr.l stentox # operand is a NORM
2713 bra.b _L12_6s
2714_L12_2s:
2715 cmpi.b %d1,&ZERO # is operand a ZERO?
2716 bne.b _L12_3s # no
2717 bsr.l ld_pone # yes
2718 bra.b _L12_6s
2719_L12_3s:
2720 cmpi.b %d1,&INF # is operand an INF?
2721 bne.b _L12_4s # no
2722 bsr.l szr_inf # yes
2723 bra.b _L12_6s
2724_L12_4s:
2725 cmpi.b %d1,&QNAN # is operand a QNAN?
2726 bne.b _L12_5s # no
2727 bsr.l src_qnan # yes
2728 bra.b _L12_6s
2729_L12_5s:
2730 bsr.l stentoxd # operand is a DENORM
2731_L12_6s:
2732
2733#
2734# Result is now in FP0
2735#
2736 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2737 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2738 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2739 unlk %a6
2740 rts
2741
2742 global _ftentoxd_
2743_ftentoxd_:
2744 link %a6,&-LOCAL_SIZE
2745
2746 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2747 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2748 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2749
2750 fmov.l &0x0,%fpcr # zero FPCR
2751
2752#
2753# copy, convert, and tag input argument
2754#
2755 fmov.d 0x8(%a6),%fp0 # load dbl input
2756 fmov.x %fp0,FP_SRC(%a6)
2757 lea FP_SRC(%a6),%a0
2758 bsr.l tag # fetch operand type
2759 mov.b %d0,STAG(%a6)
2760 mov.b %d0,%d1
2761
2762 andi.l &0x00ff00ff,USER_FPSR(%a6)
2763
2764 clr.l %d0
2765 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2766
2767 mov.b %d1,STAG(%a6)
2768 tst.b %d1
2769 bne.b _L12_2d
2770 bsr.l stentox # operand is a NORM
2771 bra.b _L12_6d
2772_L12_2d:
2773 cmpi.b %d1,&ZERO # is operand a ZERO?
2774 bne.b _L12_3d # no
2775 bsr.l ld_pone # yes
2776 bra.b _L12_6d
2777_L12_3d:
2778 cmpi.b %d1,&INF # is operand an INF?
2779 bne.b _L12_4d # no
2780 bsr.l szr_inf # yes
2781 bra.b _L12_6d
2782_L12_4d:
2783 cmpi.b %d1,&QNAN # is operand a QNAN?
2784 bne.b _L12_5d # no
2785 bsr.l src_qnan # yes
2786 bra.b _L12_6d
2787_L12_5d:
2788 bsr.l stentoxd # operand is a DENORM
2789_L12_6d:
2790
2791#
2792# Result is now in FP0
2793#
2794 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2795 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2796 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2797 unlk %a6
2798 rts
2799
2800 global _ftentoxx_
2801_ftentoxx_:
2802 link %a6,&-LOCAL_SIZE
2803
2804 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2805 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2806 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2807
2808 fmov.l &0x0,%fpcr # zero FPCR
2809
2810#
2811# copy, convert, and tag input argument
2812#
2813 lea FP_SRC(%a6),%a0
2814 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
2815 mov.l 0x8+0x4(%a6),0x4(%a0)
2816 mov.l 0x8+0x8(%a6),0x8(%a0)
2817 bsr.l tag # fetch operand type
2818 mov.b %d0,STAG(%a6)
2819 mov.b %d0,%d1
2820
2821 andi.l &0x00ff00ff,USER_FPSR(%a6)
2822
2823 clr.l %d0
2824 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2825
2826 tst.b %d1
2827 bne.b _L12_2x
2828 bsr.l stentox # operand is a NORM
2829 bra.b _L12_6x
2830_L12_2x:
2831 cmpi.b %d1,&ZERO # is operand a ZERO?
2832 bne.b _L12_3x # no
2833 bsr.l ld_pone # yes
2834 bra.b _L12_6x
2835_L12_3x:
2836 cmpi.b %d1,&INF # is operand an INF?
2837 bne.b _L12_4x # no
2838 bsr.l szr_inf # yes
2839 bra.b _L12_6x
2840_L12_4x:
2841 cmpi.b %d1,&QNAN # is operand a QNAN?
2842 bne.b _L12_5x # no
2843 bsr.l src_qnan # yes
2844 bra.b _L12_6x
2845_L12_5x:
2846 bsr.l stentoxd # operand is a DENORM
2847_L12_6x:
2848
2849#
2850# Result is now in FP0
2851#
2852 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2853 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2854 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2855 unlk %a6
2856 rts
2857
2858
2859#########################################################################
2860# MONADIC TEMPLATE #
2861#########################################################################
2862 global _flogns_
2863_flogns_:
2864 link %a6,&-LOCAL_SIZE
2865
2866 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2867 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2868 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2869
2870 fmov.l &0x0,%fpcr # zero FPCR
2871
2872#
2873# copy, convert, and tag input argument
2874#
2875 fmov.s 0x8(%a6),%fp0 # load sgl input
2876 fmov.x %fp0,FP_SRC(%a6)
2877 lea FP_SRC(%a6),%a0
2878 bsr.l tag # fetch operand type
2879 mov.b %d0,STAG(%a6)
2880 mov.b %d0,%d1
2881
2882 andi.l &0x00ff00ff,USER_FPSR(%a6)
2883
2884 clr.l %d0
2885 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2886
2887 tst.b %d1
2888 bne.b _L13_2s
2889 bsr.l slogn # operand is a NORM
2890 bra.b _L13_6s
2891_L13_2s:
2892 cmpi.b %d1,&ZERO # is operand a ZERO?
2893 bne.b _L13_3s # no
2894 bsr.l t_dz2 # yes
2895 bra.b _L13_6s
2896_L13_3s:
2897 cmpi.b %d1,&INF # is operand an INF?
2898 bne.b _L13_4s # no
2899 bsr.l sopr_inf # yes
2900 bra.b _L13_6s
2901_L13_4s:
2902 cmpi.b %d1,&QNAN # is operand a QNAN?
2903 bne.b _L13_5s # no
2904 bsr.l src_qnan # yes
2905 bra.b _L13_6s
2906_L13_5s:
2907 bsr.l slognd # operand is a DENORM
2908_L13_6s:
2909
2910#
2911# Result is now in FP0
2912#
2913 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2914 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2915 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2916 unlk %a6
2917 rts
2918
2919 global _flognd_
2920_flognd_:
2921 link %a6,&-LOCAL_SIZE
2922
2923 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2924 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2925 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2926
2927 fmov.l &0x0,%fpcr # zero FPCR
2928
2929#
2930# copy, convert, and tag input argument
2931#
2932 fmov.d 0x8(%a6),%fp0 # load dbl input
2933 fmov.x %fp0,FP_SRC(%a6)
2934 lea FP_SRC(%a6),%a0
2935 bsr.l tag # fetch operand type
2936 mov.b %d0,STAG(%a6)
2937 mov.b %d0,%d1
2938
2939 andi.l &0x00ff00ff,USER_FPSR(%a6)
2940
2941 clr.l %d0
2942 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2943
2944 mov.b %d1,STAG(%a6)
2945 tst.b %d1
2946 bne.b _L13_2d
2947 bsr.l slogn # operand is a NORM
2948 bra.b _L13_6d
2949_L13_2d:
2950 cmpi.b %d1,&ZERO # is operand a ZERO?
2951 bne.b _L13_3d # no
2952 bsr.l t_dz2 # yes
2953 bra.b _L13_6d
2954_L13_3d:
2955 cmpi.b %d1,&INF # is operand an INF?
2956 bne.b _L13_4d # no
2957 bsr.l sopr_inf # yes
2958 bra.b _L13_6d
2959_L13_4d:
2960 cmpi.b %d1,&QNAN # is operand a QNAN?
2961 bne.b _L13_5d # no
2962 bsr.l src_qnan # yes
2963 bra.b _L13_6d
2964_L13_5d:
2965 bsr.l slognd # operand is a DENORM
2966_L13_6d:
2967
2968#
2969# Result is now in FP0
2970#
2971 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2972 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2973 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2974 unlk %a6
2975 rts
2976
2977 global _flognx_
2978_flognx_:
2979 link %a6,&-LOCAL_SIZE
2980
2981 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2982 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2983 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2984
2985 fmov.l &0x0,%fpcr # zero FPCR
2986
2987#
2988# copy, convert, and tag input argument
2989#
2990 lea FP_SRC(%a6),%a0
2991 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
2992 mov.l 0x8+0x4(%a6),0x4(%a0)
2993 mov.l 0x8+0x8(%a6),0x8(%a0)
2994 bsr.l tag # fetch operand type
2995 mov.b %d0,STAG(%a6)
2996 mov.b %d0,%d1
2997
2998 andi.l &0x00ff00ff,USER_FPSR(%a6)
2999
3000 clr.l %d0
3001 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3002
3003 tst.b %d1
3004 bne.b _L13_2x
3005 bsr.l slogn # operand is a NORM
3006 bra.b _L13_6x
3007_L13_2x:
3008 cmpi.b %d1,&ZERO # is operand a ZERO?
3009 bne.b _L13_3x # no
3010 bsr.l t_dz2 # yes
3011 bra.b _L13_6x
3012_L13_3x:
3013 cmpi.b %d1,&INF # is operand an INF?
3014 bne.b _L13_4x # no
3015 bsr.l sopr_inf # yes
3016 bra.b _L13_6x
3017_L13_4x:
3018 cmpi.b %d1,&QNAN # is operand a QNAN?
3019 bne.b _L13_5x # no
3020 bsr.l src_qnan # yes
3021 bra.b _L13_6x
3022_L13_5x:
3023 bsr.l slognd # operand is a DENORM
3024_L13_6x:
3025
3026#
3027# Result is now in FP0
3028#
3029 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3030 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3031 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3032 unlk %a6
3033 rts
3034
3035
3036#########################################################################
3037# MONADIC TEMPLATE #
3038#########################################################################
3039 global _flog10s_
3040_flog10s_:
3041 link %a6,&-LOCAL_SIZE
3042
3043 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3044 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3045 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3046
3047 fmov.l &0x0,%fpcr # zero FPCR
3048
3049#
3050# copy, convert, and tag input argument
3051#
3052 fmov.s 0x8(%a6),%fp0 # load sgl input
3053 fmov.x %fp0,FP_SRC(%a6)
3054 lea FP_SRC(%a6),%a0
3055 bsr.l tag # fetch operand type
3056 mov.b %d0,STAG(%a6)
3057 mov.b %d0,%d1
3058
3059 andi.l &0x00ff00ff,USER_FPSR(%a6)
3060
3061 clr.l %d0
3062 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3063
3064 tst.b %d1
3065 bne.b _L14_2s
3066 bsr.l slog10 # operand is a NORM
3067 bra.b _L14_6s
3068_L14_2s:
3069 cmpi.b %d1,&ZERO # is operand a ZERO?
3070 bne.b _L14_3s # no
3071 bsr.l t_dz2 # yes
3072 bra.b _L14_6s
3073_L14_3s:
3074 cmpi.b %d1,&INF # is operand an INF?
3075 bne.b _L14_4s # no
3076 bsr.l sopr_inf # yes
3077 bra.b _L14_6s
3078_L14_4s:
3079 cmpi.b %d1,&QNAN # is operand a QNAN?
3080 bne.b _L14_5s # no
3081 bsr.l src_qnan # yes
3082 bra.b _L14_6s
3083_L14_5s:
3084 bsr.l slog10d # operand is a DENORM
3085_L14_6s:
3086
3087#
3088# Result is now in FP0
3089#
3090 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3091 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3092 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3093 unlk %a6
3094 rts
3095
3096 global _flog10d_
3097_flog10d_:
3098 link %a6,&-LOCAL_SIZE
3099
3100 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3101 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3102 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3103
3104 fmov.l &0x0,%fpcr # zero FPCR
3105
3106#
3107# copy, convert, and tag input argument
3108#
3109 fmov.d 0x8(%a6),%fp0 # load dbl input
3110 fmov.x %fp0,FP_SRC(%a6)
3111 lea FP_SRC(%a6),%a0
3112 bsr.l tag # fetch operand type
3113 mov.b %d0,STAG(%a6)
3114 mov.b %d0,%d1
3115
3116 andi.l &0x00ff00ff,USER_FPSR(%a6)
3117
3118 clr.l %d0
3119 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3120
3121 mov.b %d1,STAG(%a6)
3122 tst.b %d1
3123 bne.b _L14_2d
3124 bsr.l slog10 # operand is a NORM
3125 bra.b _L14_6d
3126_L14_2d:
3127 cmpi.b %d1,&ZERO # is operand a ZERO?
3128 bne.b _L14_3d # no
3129 bsr.l t_dz2 # yes
3130 bra.b _L14_6d
3131_L14_3d:
3132 cmpi.b %d1,&INF # is operand an INF?
3133 bne.b _L14_4d # no
3134 bsr.l sopr_inf # yes
3135 bra.b _L14_6d
3136_L14_4d:
3137 cmpi.b %d1,&QNAN # is operand a QNAN?
3138 bne.b _L14_5d # no
3139 bsr.l src_qnan # yes
3140 bra.b _L14_6d
3141_L14_5d:
3142 bsr.l slog10d # operand is a DENORM
3143_L14_6d:
3144
3145#
3146# Result is now in FP0
3147#
3148 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3149 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3150 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3151 unlk %a6
3152 rts
3153
3154 global _flog10x_
3155_flog10x_:
3156 link %a6,&-LOCAL_SIZE
3157
3158 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3159 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3160 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3161
3162 fmov.l &0x0,%fpcr # zero FPCR
3163
3164#
3165# copy, convert, and tag input argument
3166#
3167 lea FP_SRC(%a6),%a0
3168 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
3169 mov.l 0x8+0x4(%a6),0x4(%a0)
3170 mov.l 0x8+0x8(%a6),0x8(%a0)
3171 bsr.l tag # fetch operand type
3172 mov.b %d0,STAG(%a6)
3173 mov.b %d0,%d1
3174
3175 andi.l &0x00ff00ff,USER_FPSR(%a6)
3176
3177 clr.l %d0
3178 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3179
3180 tst.b %d1
3181 bne.b _L14_2x
3182 bsr.l slog10 # operand is a NORM
3183 bra.b _L14_6x
3184_L14_2x:
3185 cmpi.b %d1,&ZERO # is operand a ZERO?
3186 bne.b _L14_3x # no
3187 bsr.l t_dz2 # yes
3188 bra.b _L14_6x
3189_L14_3x:
3190 cmpi.b %d1,&INF # is operand an INF?
3191 bne.b _L14_4x # no
3192 bsr.l sopr_inf # yes
3193 bra.b _L14_6x
3194_L14_4x:
3195 cmpi.b %d1,&QNAN # is operand a QNAN?
3196 bne.b _L14_5x # no
3197 bsr.l src_qnan # yes
3198 bra.b _L14_6x
3199_L14_5x:
3200 bsr.l slog10d # operand is a DENORM
3201_L14_6x:
3202
3203#
3204# Result is now in FP0
3205#
3206 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3207 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3208 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3209 unlk %a6
3210 rts
3211
3212
3213#########################################################################
3214# MONADIC TEMPLATE #
3215#########################################################################
3216 global _flog2s_
3217_flog2s_:
3218 link %a6,&-LOCAL_SIZE
3219
3220 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3221 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3222 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3223
3224 fmov.l &0x0,%fpcr # zero FPCR
3225
3226#
3227# copy, convert, and tag input argument
3228#
3229 fmov.s 0x8(%a6),%fp0 # load sgl input
3230 fmov.x %fp0,FP_SRC(%a6)
3231 lea FP_SRC(%a6),%a0
3232 bsr.l tag # fetch operand type
3233 mov.b %d0,STAG(%a6)
3234 mov.b %d0,%d1
3235
3236 andi.l &0x00ff00ff,USER_FPSR(%a6)
3237
3238 clr.l %d0
3239 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3240
3241 tst.b %d1
3242 bne.b _L15_2s
3243 bsr.l slog2 # operand is a NORM
3244 bra.b _L15_6s
3245_L15_2s:
3246 cmpi.b %d1,&ZERO # is operand a ZERO?
3247 bne.b _L15_3s # no
3248 bsr.l t_dz2 # yes
3249 bra.b _L15_6s
3250_L15_3s:
3251 cmpi.b %d1,&INF # is operand an INF?
3252 bne.b _L15_4s # no
3253 bsr.l sopr_inf # yes
3254 bra.b _L15_6s
3255_L15_4s:
3256 cmpi.b %d1,&QNAN # is operand a QNAN?
3257 bne.b _L15_5s # no
3258 bsr.l src_qnan # yes
3259 bra.b _L15_6s
3260_L15_5s:
3261 bsr.l slog2d # operand is a DENORM
3262_L15_6s:
3263
3264#
3265# Result is now in FP0
3266#
3267 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3268 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3269 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3270 unlk %a6
3271 rts
3272
3273 global _flog2d_
3274_flog2d_:
3275 link %a6,&-LOCAL_SIZE
3276
3277 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3278 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3279 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3280
3281 fmov.l &0x0,%fpcr # zero FPCR
3282
3283#
3284# copy, convert, and tag input argument
3285#
3286 fmov.d 0x8(%a6),%fp0 # load dbl input
3287 fmov.x %fp0,FP_SRC(%a6)
3288 lea FP_SRC(%a6),%a0
3289 bsr.l tag # fetch operand type
3290 mov.b %d0,STAG(%a6)
3291 mov.b %d0,%d1
3292
3293 andi.l &0x00ff00ff,USER_FPSR(%a6)
3294
3295 clr.l %d0
3296 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3297
3298 mov.b %d1,STAG(%a6)
3299 tst.b %d1
3300 bne.b _L15_2d
3301 bsr.l slog2 # operand is a NORM
3302 bra.b _L15_6d
3303_L15_2d:
3304 cmpi.b %d1,&ZERO # is operand a ZERO?
3305 bne.b _L15_3d # no
3306 bsr.l t_dz2 # yes
3307 bra.b _L15_6d
3308_L15_3d:
3309 cmpi.b %d1,&INF # is operand an INF?
3310 bne.b _L15_4d # no
3311 bsr.l sopr_inf # yes
3312 bra.b _L15_6d
3313_L15_4d:
3314 cmpi.b %d1,&QNAN # is operand a QNAN?
3315 bne.b _L15_5d # no
3316 bsr.l src_qnan # yes
3317 bra.b _L15_6d
3318_L15_5d:
3319 bsr.l slog2d # operand is a DENORM
3320_L15_6d:
3321
3322#
3323# Result is now in FP0
3324#
3325 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3326 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3327 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3328 unlk %a6
3329 rts
3330
3331 global _flog2x_
3332_flog2x_:
3333 link %a6,&-LOCAL_SIZE
3334
3335 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3336 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3337 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3338
3339 fmov.l &0x0,%fpcr # zero FPCR
3340
3341#
3342# copy, convert, and tag input argument
3343#
3344 lea FP_SRC(%a6),%a0
3345 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
3346 mov.l 0x8+0x4(%a6),0x4(%a0)
3347 mov.l 0x8+0x8(%a6),0x8(%a0)
3348 bsr.l tag # fetch operand type
3349 mov.b %d0,STAG(%a6)
3350 mov.b %d0,%d1
3351
3352 andi.l &0x00ff00ff,USER_FPSR(%a6)
3353
3354 clr.l %d0
3355 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3356
3357 tst.b %d1
3358 bne.b _L15_2x
3359 bsr.l slog2 # operand is a NORM
3360 bra.b _L15_6x
3361_L15_2x:
3362 cmpi.b %d1,&ZERO # is operand a ZERO?
3363 bne.b _L15_3x # no
3364 bsr.l t_dz2 # yes
3365 bra.b _L15_6x
3366_L15_3x:
3367 cmpi.b %d1,&INF # is operand an INF?
3368 bne.b _L15_4x # no
3369 bsr.l sopr_inf # yes
3370 bra.b _L15_6x
3371_L15_4x:
3372 cmpi.b %d1,&QNAN # is operand a QNAN?
3373 bne.b _L15_5x # no
3374 bsr.l src_qnan # yes
3375 bra.b _L15_6x
3376_L15_5x:
3377 bsr.l slog2d # operand is a DENORM
3378_L15_6x:
3379
3380#
3381# Result is now in FP0
3382#
3383 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3384 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3385 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3386 unlk %a6
3387 rts
3388
3389
3390#########################################################################
3391# MONADIC TEMPLATE #
3392#########################################################################
3393 global _fcoshs_
3394_fcoshs_:
3395 link %a6,&-LOCAL_SIZE
3396
3397 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3398 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3399 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3400
3401 fmov.l &0x0,%fpcr # zero FPCR
3402
3403#
3404# copy, convert, and tag input argument
3405#
3406 fmov.s 0x8(%a6),%fp0 # load sgl input
3407 fmov.x %fp0,FP_SRC(%a6)
3408 lea FP_SRC(%a6),%a0
3409 bsr.l tag # fetch operand type
3410 mov.b %d0,STAG(%a6)
3411 mov.b %d0,%d1
3412
3413 andi.l &0x00ff00ff,USER_FPSR(%a6)
3414
3415 clr.l %d0
3416 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3417
3418 tst.b %d1
3419 bne.b _L16_2s
3420 bsr.l scosh # operand is a NORM
3421 bra.b _L16_6s
3422_L16_2s:
3423 cmpi.b %d1,&ZERO # is operand a ZERO?
3424 bne.b _L16_3s # no
3425 bsr.l ld_pone # yes
3426 bra.b _L16_6s
3427_L16_3s:
3428 cmpi.b %d1,&INF # is operand an INF?
3429 bne.b _L16_4s # no
3430 bsr.l ld_pinf # yes
3431 bra.b _L16_6s
3432_L16_4s:
3433 cmpi.b %d1,&QNAN # is operand a QNAN?
3434 bne.b _L16_5s # no
3435 bsr.l src_qnan # yes
3436 bra.b _L16_6s
3437_L16_5s:
3438 bsr.l scoshd # operand is a DENORM
3439_L16_6s:
3440
3441#
3442# Result is now in FP0
3443#
3444 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3445 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3446 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3447 unlk %a6
3448 rts
3449
3450 global _fcoshd_
3451_fcoshd_:
3452 link %a6,&-LOCAL_SIZE
3453
3454 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3455 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3456 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3457
3458 fmov.l &0x0,%fpcr # zero FPCR
3459
3460#
3461# copy, convert, and tag input argument
3462#
3463 fmov.d 0x8(%a6),%fp0 # load dbl input
3464 fmov.x %fp0,FP_SRC(%a6)
3465 lea FP_SRC(%a6),%a0
3466 bsr.l tag # fetch operand type
3467 mov.b %d0,STAG(%a6)
3468 mov.b %d0,%d1
3469
3470 andi.l &0x00ff00ff,USER_FPSR(%a6)
3471
3472 clr.l %d0
3473 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3474
3475 mov.b %d1,STAG(%a6)
3476 tst.b %d1
3477 bne.b _L16_2d
3478 bsr.l scosh # operand is a NORM
3479 bra.b _L16_6d
3480_L16_2d:
3481 cmpi.b %d1,&ZERO # is operand a ZERO?
3482 bne.b _L16_3d # no
3483 bsr.l ld_pone # yes
3484 bra.b _L16_6d
3485_L16_3d:
3486 cmpi.b %d1,&INF # is operand an INF?
3487 bne.b _L16_4d # no
3488 bsr.l ld_pinf # yes
3489 bra.b _L16_6d
3490_L16_4d:
3491 cmpi.b %d1,&QNAN # is operand a QNAN?
3492 bne.b _L16_5d # no
3493 bsr.l src_qnan # yes
3494 bra.b _L16_6d
3495_L16_5d:
3496 bsr.l scoshd # operand is a DENORM
3497_L16_6d:
3498
3499#
3500# Result is now in FP0
3501#
3502 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3503 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3504 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3505 unlk %a6
3506 rts
3507
3508 global _fcoshx_
3509_fcoshx_:
3510 link %a6,&-LOCAL_SIZE
3511
3512 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3513 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3514 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3515
3516 fmov.l &0x0,%fpcr # zero FPCR
3517
3518#
3519# copy, convert, and tag input argument
3520#
3521 lea FP_SRC(%a6),%a0
3522 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
3523 mov.l 0x8+0x4(%a6),0x4(%a0)
3524 mov.l 0x8+0x8(%a6),0x8(%a0)
3525 bsr.l tag # fetch operand type
3526 mov.b %d0,STAG(%a6)
3527 mov.b %d0,%d1
3528
3529 andi.l &0x00ff00ff,USER_FPSR(%a6)
3530
3531 clr.l %d0
3532 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3533
3534 tst.b %d1
3535 bne.b _L16_2x
3536 bsr.l scosh # operand is a NORM
3537 bra.b _L16_6x
3538_L16_2x:
3539 cmpi.b %d1,&ZERO # is operand a ZERO?
3540 bne.b _L16_3x # no
3541 bsr.l ld_pone # yes
3542 bra.b _L16_6x
3543_L16_3x:
3544 cmpi.b %d1,&INF # is operand an INF?
3545 bne.b _L16_4x # no
3546 bsr.l ld_pinf # yes
3547 bra.b _L16_6x
3548_L16_4x:
3549 cmpi.b %d1,&QNAN # is operand a QNAN?
3550 bne.b _L16_5x # no
3551 bsr.l src_qnan # yes
3552 bra.b _L16_6x
3553_L16_5x:
3554 bsr.l scoshd # operand is a DENORM
3555_L16_6x:
3556
3557#
3558# Result is now in FP0
3559#
3560 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3561 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3562 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3563 unlk %a6
3564 rts
3565
3566
3567#########################################################################
3568# MONADIC TEMPLATE #
3569#########################################################################
3570 global _facoss_
3571_facoss_:
3572 link %a6,&-LOCAL_SIZE
3573
3574 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3575 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3576 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3577
3578 fmov.l &0x0,%fpcr # zero FPCR
3579
3580#
3581# copy, convert, and tag input argument
3582#
3583 fmov.s 0x8(%a6),%fp0 # load sgl input
3584 fmov.x %fp0,FP_SRC(%a6)
3585 lea FP_SRC(%a6),%a0
3586 bsr.l tag # fetch operand type
3587 mov.b %d0,STAG(%a6)
3588 mov.b %d0,%d1
3589
3590 andi.l &0x00ff00ff,USER_FPSR(%a6)
3591
3592 clr.l %d0
3593 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3594
3595 tst.b %d1
3596 bne.b _L17_2s
3597 bsr.l sacos # operand is a NORM
3598 bra.b _L17_6s
3599_L17_2s:
3600 cmpi.b %d1,&ZERO # is operand a ZERO?
3601 bne.b _L17_3s # no
3602 bsr.l ld_ppi2 # yes
3603 bra.b _L17_6s
3604_L17_3s:
3605 cmpi.b %d1,&INF # is operand an INF?
3606 bne.b _L17_4s # no
3607 bsr.l t_operr # yes
3608 bra.b _L17_6s
3609_L17_4s:
3610 cmpi.b %d1,&QNAN # is operand a QNAN?
3611 bne.b _L17_5s # no
3612 bsr.l src_qnan # yes
3613 bra.b _L17_6s
3614_L17_5s:
3615 bsr.l sacosd # operand is a DENORM
3616_L17_6s:
3617
3618#
3619# Result is now in FP0
3620#
3621 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3622 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3623 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3624 unlk %a6
3625 rts
3626
3627 global _facosd_
3628_facosd_:
3629 link %a6,&-LOCAL_SIZE
3630
3631 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3632 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3633 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3634
3635 fmov.l &0x0,%fpcr # zero FPCR
3636
3637#
3638# copy, convert, and tag input argument
3639#
3640 fmov.d 0x8(%a6),%fp0 # load dbl input
3641 fmov.x %fp0,FP_SRC(%a6)
3642 lea FP_SRC(%a6),%a0
3643 bsr.l tag # fetch operand type
3644 mov.b %d0,STAG(%a6)
3645 mov.b %d0,%d1
3646
3647 andi.l &0x00ff00ff,USER_FPSR(%a6)
3648
3649 clr.l %d0
3650 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3651
3652 mov.b %d1,STAG(%a6)
3653 tst.b %d1
3654 bne.b _L17_2d
3655 bsr.l sacos # operand is a NORM
3656 bra.b _L17_6d
3657_L17_2d:
3658 cmpi.b %d1,&ZERO # is operand a ZERO?
3659 bne.b _L17_3d # no
3660 bsr.l ld_ppi2 # yes
3661 bra.b _L17_6d
3662_L17_3d:
3663 cmpi.b %d1,&INF # is operand an INF?
3664 bne.b _L17_4d # no
3665 bsr.l t_operr # yes
3666 bra.b _L17_6d
3667_L17_4d:
3668 cmpi.b %d1,&QNAN # is operand a QNAN?
3669 bne.b _L17_5d # no
3670 bsr.l src_qnan # yes
3671 bra.b _L17_6d
3672_L17_5d:
3673 bsr.l sacosd # operand is a DENORM
3674_L17_6d:
3675
3676#
3677# Result is now in FP0
3678#
3679 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3680 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3681 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3682 unlk %a6
3683 rts
3684
3685 global _facosx_
3686_facosx_:
3687 link %a6,&-LOCAL_SIZE
3688
3689 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3690 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3691 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3692
3693 fmov.l &0x0,%fpcr # zero FPCR
3694
3695#
3696# copy, convert, and tag input argument
3697#
3698 lea FP_SRC(%a6),%a0
3699 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
3700 mov.l 0x8+0x4(%a6),0x4(%a0)
3701 mov.l 0x8+0x8(%a6),0x8(%a0)
3702 bsr.l tag # fetch operand type
3703 mov.b %d0,STAG(%a6)
3704 mov.b %d0,%d1
3705
3706 andi.l &0x00ff00ff,USER_FPSR(%a6)
3707
3708 clr.l %d0
3709 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3710
3711 tst.b %d1
3712 bne.b _L17_2x
3713 bsr.l sacos # operand is a NORM
3714 bra.b _L17_6x
3715_L17_2x:
3716 cmpi.b %d1,&ZERO # is operand a ZERO?
3717 bne.b _L17_3x # no
3718 bsr.l ld_ppi2 # yes
3719 bra.b _L17_6x
3720_L17_3x:
3721 cmpi.b %d1,&INF # is operand an INF?
3722 bne.b _L17_4x # no
3723 bsr.l t_operr # yes
3724 bra.b _L17_6x
3725_L17_4x:
3726 cmpi.b %d1,&QNAN # is operand a QNAN?
3727 bne.b _L17_5x # no
3728 bsr.l src_qnan # yes
3729 bra.b _L17_6x
3730_L17_5x:
3731 bsr.l sacosd # operand is a DENORM
3732_L17_6x:
3733
3734#
3735# Result is now in FP0
3736#
3737 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3738 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3739 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3740 unlk %a6
3741 rts
3742
3743
3744#########################################################################
3745# MONADIC TEMPLATE #
3746#########################################################################
3747 global _fgetexps_
3748_fgetexps_:
3749 link %a6,&-LOCAL_SIZE
3750
3751 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3752 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3753 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3754
3755 fmov.l &0x0,%fpcr # zero FPCR
3756
3757#
3758# copy, convert, and tag input argument
3759#
3760 fmov.s 0x8(%a6),%fp0 # load sgl input
3761 fmov.x %fp0,FP_SRC(%a6)
3762 lea FP_SRC(%a6),%a0
3763 bsr.l tag # fetch operand type
3764 mov.b %d0,STAG(%a6)
3765 mov.b %d0,%d1
3766
3767 andi.l &0x00ff00ff,USER_FPSR(%a6)
3768
3769 clr.l %d0
3770 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3771
3772 tst.b %d1
3773 bne.b _L18_2s
3774 bsr.l sgetexp # operand is a NORM
3775 bra.b _L18_6s
3776_L18_2s:
3777 cmpi.b %d1,&ZERO # is operand a ZERO?
3778 bne.b _L18_3s # no
3779 bsr.l src_zero # yes
3780 bra.b _L18_6s
3781_L18_3s:
3782 cmpi.b %d1,&INF # is operand an INF?
3783 bne.b _L18_4s # no
3784 bsr.l t_operr # yes
3785 bra.b _L18_6s
3786_L18_4s:
3787 cmpi.b %d1,&QNAN # is operand a QNAN?
3788 bne.b _L18_5s # no
3789 bsr.l src_qnan # yes
3790 bra.b _L18_6s
3791_L18_5s:
3792 bsr.l sgetexpd # operand is a DENORM
3793_L18_6s:
3794
3795#
3796# Result is now in FP0
3797#
3798 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3799 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3800 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3801 unlk %a6
3802 rts
3803
3804 global _fgetexpd_
3805_fgetexpd_:
3806 link %a6,&-LOCAL_SIZE
3807
3808 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3809 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3810 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3811
3812 fmov.l &0x0,%fpcr # zero FPCR
3813
3814#
3815# copy, convert, and tag input argument
3816#
3817 fmov.d 0x8(%a6),%fp0 # load dbl input
3818 fmov.x %fp0,FP_SRC(%a6)
3819 lea FP_SRC(%a6),%a0
3820 bsr.l tag # fetch operand type
3821 mov.b %d0,STAG(%a6)
3822 mov.b %d0,%d1
3823
3824 andi.l &0x00ff00ff,USER_FPSR(%a6)
3825
3826 clr.l %d0
3827 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3828
3829 mov.b %d1,STAG(%a6)
3830 tst.b %d1
3831 bne.b _L18_2d
3832 bsr.l sgetexp # operand is a NORM
3833 bra.b _L18_6d
3834_L18_2d:
3835 cmpi.b %d1,&ZERO # is operand a ZERO?
3836 bne.b _L18_3d # no
3837 bsr.l src_zero # yes
3838 bra.b _L18_6d
3839_L18_3d:
3840 cmpi.b %d1,&INF # is operand an INF?
3841 bne.b _L18_4d # no
3842 bsr.l t_operr # yes
3843 bra.b _L18_6d
3844_L18_4d:
3845 cmpi.b %d1,&QNAN # is operand a QNAN?
3846 bne.b _L18_5d # no
3847 bsr.l src_qnan # yes
3848 bra.b _L18_6d
3849_L18_5d:
3850 bsr.l sgetexpd # operand is a DENORM
3851_L18_6d:
3852
3853#
3854# Result is now in FP0
3855#
3856 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3857 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3858 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3859 unlk %a6
3860 rts
3861
3862 global _fgetexpx_
3863_fgetexpx_:
3864 link %a6,&-LOCAL_SIZE
3865
3866 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3867 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3868 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3869
3870 fmov.l &0x0,%fpcr # zero FPCR
3871
3872#
3873# copy, convert, and tag input argument
3874#
3875 lea FP_SRC(%a6),%a0
3876 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
3877 mov.l 0x8+0x4(%a6),0x4(%a0)
3878 mov.l 0x8+0x8(%a6),0x8(%a0)
3879 bsr.l tag # fetch operand type
3880 mov.b %d0,STAG(%a6)
3881 mov.b %d0,%d1
3882
3883 andi.l &0x00ff00ff,USER_FPSR(%a6)
3884
3885 clr.l %d0
3886 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3887
3888 tst.b %d1
3889 bne.b _L18_2x
3890 bsr.l sgetexp # operand is a NORM
3891 bra.b _L18_6x
3892_L18_2x:
3893 cmpi.b %d1,&ZERO # is operand a ZERO?
3894 bne.b _L18_3x # no
3895 bsr.l src_zero # yes
3896 bra.b _L18_6x
3897_L18_3x:
3898 cmpi.b %d1,&INF # is operand an INF?
3899 bne.b _L18_4x # no
3900 bsr.l t_operr # yes
3901 bra.b _L18_6x
3902_L18_4x:
3903 cmpi.b %d1,&QNAN # is operand a QNAN?
3904 bne.b _L18_5x # no
3905 bsr.l src_qnan # yes
3906 bra.b _L18_6x
3907_L18_5x:
3908 bsr.l sgetexpd # operand is a DENORM
3909_L18_6x:
3910
3911#
3912# Result is now in FP0
3913#
3914 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3915 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3916 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3917 unlk %a6
3918 rts
3919
3920
3921#########################################################################
3922# MONADIC TEMPLATE #
3923#########################################################################
3924 global _fgetmans_
3925_fgetmans_:
3926 link %a6,&-LOCAL_SIZE
3927
3928 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3929 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3930 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3931
3932 fmov.l &0x0,%fpcr # zero FPCR
3933
3934#
3935# copy, convert, and tag input argument
3936#
3937 fmov.s 0x8(%a6),%fp0 # load sgl input
3938 fmov.x %fp0,FP_SRC(%a6)
3939 lea FP_SRC(%a6),%a0
3940 bsr.l tag # fetch operand type
3941 mov.b %d0,STAG(%a6)
3942 mov.b %d0,%d1
3943
3944 andi.l &0x00ff00ff,USER_FPSR(%a6)
3945
3946 clr.l %d0
3947 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3948
3949 tst.b %d1
3950 bne.b _L19_2s
3951 bsr.l sgetman # operand is a NORM
3952 bra.b _L19_6s
3953_L19_2s:
3954 cmpi.b %d1,&ZERO # is operand a ZERO?
3955 bne.b _L19_3s # no
3956 bsr.l src_zero # yes
3957 bra.b _L19_6s
3958_L19_3s:
3959 cmpi.b %d1,&INF # is operand an INF?
3960 bne.b _L19_4s # no
3961 bsr.l t_operr # yes
3962 bra.b _L19_6s
3963_L19_4s:
3964 cmpi.b %d1,&QNAN # is operand a QNAN?
3965 bne.b _L19_5s # no
3966 bsr.l src_qnan # yes
3967 bra.b _L19_6s
3968_L19_5s:
3969 bsr.l sgetmand # operand is a DENORM
3970_L19_6s:
3971
3972#
3973# Result is now in FP0
3974#
3975 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3976 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3977 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3978 unlk %a6
3979 rts
3980
3981 global _fgetmand_
3982_fgetmand_:
3983 link %a6,&-LOCAL_SIZE
3984
3985 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3986 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3987 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3988
3989 fmov.l &0x0,%fpcr # zero FPCR
3990
3991#
3992# copy, convert, and tag input argument
3993#
3994 fmov.d 0x8(%a6),%fp0 # load dbl input
3995 fmov.x %fp0,FP_SRC(%a6)
3996 lea FP_SRC(%a6),%a0
3997 bsr.l tag # fetch operand type
3998 mov.b %d0,STAG(%a6)
3999 mov.b %d0,%d1
4000
4001 andi.l &0x00ff00ff,USER_FPSR(%a6)
4002
4003 clr.l %d0
4004 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4005
4006 mov.b %d1,STAG(%a6)
4007 tst.b %d1
4008 bne.b _L19_2d
4009 bsr.l sgetman # operand is a NORM
4010 bra.b _L19_6d
4011_L19_2d:
4012 cmpi.b %d1,&ZERO # is operand a ZERO?
4013 bne.b _L19_3d # no
4014 bsr.l src_zero # yes
4015 bra.b _L19_6d
4016_L19_3d:
4017 cmpi.b %d1,&INF # is operand an INF?
4018 bne.b _L19_4d # no
4019 bsr.l t_operr # yes
4020 bra.b _L19_6d
4021_L19_4d:
4022 cmpi.b %d1,&QNAN # is operand a QNAN?
4023 bne.b _L19_5d # no
4024 bsr.l src_qnan # yes
4025 bra.b _L19_6d
4026_L19_5d:
4027 bsr.l sgetmand # operand is a DENORM
4028_L19_6d:
4029
4030#
4031# Result is now in FP0
4032#
4033 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4034 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4035 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4036 unlk %a6
4037 rts
4038
4039 global _fgetmanx_
4040_fgetmanx_:
4041 link %a6,&-LOCAL_SIZE
4042
4043 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4044 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4045 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4046
4047 fmov.l &0x0,%fpcr # zero FPCR
4048
4049#
4050# copy, convert, and tag input argument
4051#
4052 lea FP_SRC(%a6),%a0
4053 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
4054 mov.l 0x8+0x4(%a6),0x4(%a0)
4055 mov.l 0x8+0x8(%a6),0x8(%a0)
4056 bsr.l tag # fetch operand type
4057 mov.b %d0,STAG(%a6)
4058 mov.b %d0,%d1
4059
4060 andi.l &0x00ff00ff,USER_FPSR(%a6)
4061
4062 clr.l %d0
4063 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4064
4065 tst.b %d1
4066 bne.b _L19_2x
4067 bsr.l sgetman # operand is a NORM
4068 bra.b _L19_6x
4069_L19_2x:
4070 cmpi.b %d1,&ZERO # is operand a ZERO?
4071 bne.b _L19_3x # no
4072 bsr.l src_zero # yes
4073 bra.b _L19_6x
4074_L19_3x:
4075 cmpi.b %d1,&INF # is operand an INF?
4076 bne.b _L19_4x # no
4077 bsr.l t_operr # yes
4078 bra.b _L19_6x
4079_L19_4x:
4080 cmpi.b %d1,&QNAN # is operand a QNAN?
4081 bne.b _L19_5x # no
4082 bsr.l src_qnan # yes
4083 bra.b _L19_6x
4084_L19_5x:
4085 bsr.l sgetmand # operand is a DENORM
4086_L19_6x:
4087
4088#
4089# Result is now in FP0
4090#
4091 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4092 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4093 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4094 unlk %a6
4095 rts
4096
4097
4098#########################################################################
4099# MONADIC TEMPLATE #
4100#########################################################################
4101 global _fsincoss_
4102_fsincoss_:
4103 link %a6,&-LOCAL_SIZE
4104
4105 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4106 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4107 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4108
4109 fmov.l &0x0,%fpcr # zero FPCR
4110
4111#
4112# copy, convert, and tag input argument
4113#
4114 fmov.s 0x8(%a6),%fp0 # load sgl input
4115 fmov.x %fp0,FP_SRC(%a6)
4116 lea FP_SRC(%a6),%a0
4117 bsr.l tag # fetch operand type
4118 mov.b %d0,STAG(%a6)
4119 mov.b %d0,%d1
4120
4121 andi.l &0x00ff00ff,USER_FPSR(%a6)
4122
4123 clr.l %d0
4124 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4125
4126 tst.b %d1
4127 bne.b _L20_2s
4128 bsr.l ssincos # operand is a NORM
4129 bra.b _L20_6s
4130_L20_2s:
4131 cmpi.b %d1,&ZERO # is operand a ZERO?
4132 bne.b _L20_3s # no
4133 bsr.l ssincosz # yes
4134 bra.b _L20_6s
4135_L20_3s:
4136 cmpi.b %d1,&INF # is operand an INF?
4137 bne.b _L20_4s # no
4138 bsr.l ssincosi # yes
4139 bra.b _L20_6s
4140_L20_4s:
4141 cmpi.b %d1,&QNAN # is operand a QNAN?
4142 bne.b _L20_5s # no
4143 bsr.l ssincosqnan # yes
4144 bra.b _L20_6s
4145_L20_5s:
4146 bsr.l ssincosd # operand is a DENORM
4147_L20_6s:
4148
4149#
4150# Result is now in FP0
4151#
4152 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4153 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4154 fmovm.x &0x03,-(%sp) # store off fp0/fp1
4155 fmovm.x (%sp)+,&0x40 # fp0 now in fp1
4156 fmovm.x (%sp)+,&0x80 # fp1 now in fp0
4157 unlk %a6
4158 rts
4159
4160 global _fsincosd_
4161_fsincosd_:
4162 link %a6,&-LOCAL_SIZE
4163
4164 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4165 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4166 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4167
4168 fmov.l &0x0,%fpcr # zero FPCR
4169
4170#
4171# copy, convert, and tag input argument
4172#
4173 fmov.d 0x8(%a6),%fp0 # load dbl input
4174 fmov.x %fp0,FP_SRC(%a6)
4175 lea FP_SRC(%a6),%a0
4176 bsr.l tag # fetch operand type
4177 mov.b %d0,STAG(%a6)
4178 mov.b %d0,%d1
4179
4180 andi.l &0x00ff00ff,USER_FPSR(%a6)
4181
4182 clr.l %d0
4183 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4184
4185 mov.b %d1,STAG(%a6)
4186 tst.b %d1
4187 bne.b _L20_2d
4188 bsr.l ssincos # operand is a NORM
4189 bra.b _L20_6d
4190_L20_2d:
4191 cmpi.b %d1,&ZERO # is operand a ZERO?
4192 bne.b _L20_3d # no
4193 bsr.l ssincosz # yes
4194 bra.b _L20_6d
4195_L20_3d:
4196 cmpi.b %d1,&INF # is operand an INF?
4197 bne.b _L20_4d # no
4198 bsr.l ssincosi # yes
4199 bra.b _L20_6d
4200_L20_4d:
4201 cmpi.b %d1,&QNAN # is operand a QNAN?
4202 bne.b _L20_5d # no
4203 bsr.l ssincosqnan # yes
4204 bra.b _L20_6d
4205_L20_5d:
4206 bsr.l ssincosd # operand is a DENORM
4207_L20_6d:
4208
4209#
4210# Result is now in FP0
4211#
4212 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4213 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4214 fmovm.x &0x03,-(%sp) # store off fp0/fp1
4215 fmovm.x (%sp)+,&0x40 # fp0 now in fp1
4216 fmovm.x (%sp)+,&0x80 # fp1 now in fp0
4217 unlk %a6
4218 rts
4219
4220 global _fsincosx_
4221_fsincosx_:
4222 link %a6,&-LOCAL_SIZE
4223
4224 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4225 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4226 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4227
4228 fmov.l &0x0,%fpcr # zero FPCR
4229
4230#
4231# copy, convert, and tag input argument
4232#
4233 lea FP_SRC(%a6),%a0
4234 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
4235 mov.l 0x8+0x4(%a6),0x4(%a0)
4236 mov.l 0x8+0x8(%a6),0x8(%a0)
4237 bsr.l tag # fetch operand type
4238 mov.b %d0,STAG(%a6)
4239 mov.b %d0,%d1
4240
4241 andi.l &0x00ff00ff,USER_FPSR(%a6)
4242
4243 clr.l %d0
4244 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4245
4246 tst.b %d1
4247 bne.b _L20_2x
4248 bsr.l ssincos # operand is a NORM
4249 bra.b _L20_6x
4250_L20_2x:
4251 cmpi.b %d1,&ZERO # is operand a ZERO?
4252 bne.b _L20_3x # no
4253 bsr.l ssincosz # yes
4254 bra.b _L20_6x
4255_L20_3x:
4256 cmpi.b %d1,&INF # is operand an INF?
4257 bne.b _L20_4x # no
4258 bsr.l ssincosi # yes
4259 bra.b _L20_6x
4260_L20_4x:
4261 cmpi.b %d1,&QNAN # is operand a QNAN?
4262 bne.b _L20_5x # no
4263 bsr.l ssincosqnan # yes
4264 bra.b _L20_6x
4265_L20_5x:
4266 bsr.l ssincosd # operand is a DENORM
4267_L20_6x:
4268
4269#
4270# Result is now in FP0
4271#
4272 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4273 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4274 fmovm.x &0x03,-(%sp) # store off fp0/fp1
4275 fmovm.x (%sp)+,&0x40 # fp0 now in fp1
4276 fmovm.x (%sp)+,&0x80 # fp1 now in fp0
4277 unlk %a6
4278 rts
4279
4280
4281#########################################################################
4282# DYADIC TEMPLATE #
4283#########################################################################
4284 global _frems_
4285_frems_:
4286 link %a6,&-LOCAL_SIZE
4287
4288 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4289 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4290 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4291
4292 fmov.l &0x0,%fpcr # zero FPCR
4293
4294#
4295# copy, convert, and tag input argument
4296#
4297 fmov.s 0x8(%a6),%fp0 # load sgl dst
4298 fmov.x %fp0,FP_DST(%a6)
4299 lea FP_DST(%a6),%a0
4300 bsr.l tag # fetch operand type
4301 mov.b %d0,DTAG(%a6)
4302
4303 fmov.s 0xc(%a6),%fp0 # load sgl src
4304 fmov.x %fp0,FP_SRC(%a6)
4305 lea FP_SRC(%a6),%a0
4306 bsr.l tag # fetch operand type
4307 mov.b %d0,STAG(%a6)
4308 mov.l %d0,%d1
4309
4310 andi.l &0x00ff00ff,USER_FPSR(%a6)
4311
4312 clr.l %d0
4313 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4314
4315 lea FP_SRC(%a6),%a0 # pass ptr to src
4316 lea FP_DST(%a6),%a1 # pass ptr to dst
4317
4318 tst.b %d1
4319 bne.b _L21_2s
4320 bsr.l srem_snorm # operand is a NORM
4321 bra.b _L21_6s
4322_L21_2s:
4323 cmpi.b %d1,&ZERO # is operand a ZERO?
4324 bne.b _L21_3s # no
4325 bsr.l srem_szero # yes
4326 bra.b _L21_6s
4327_L21_3s:
4328 cmpi.b %d1,&INF # is operand an INF?
4329 bne.b _L21_4s # no
4330 bsr.l srem_sinf # yes
4331 bra.b _L21_6s
4332_L21_4s:
4333 cmpi.b %d1,&QNAN # is operand a QNAN?
4334 bne.b _L21_5s # no
4335 bsr.l sop_sqnan # yes
4336 bra.b _L21_6s
4337_L21_5s:
4338 bsr.l srem_sdnrm # operand is a DENORM
4339_L21_6s:
4340
4341#
4342# Result is now in FP0
4343#
4344 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4345 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4346 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4347 unlk %a6
4348 rts
4349
4350 global _fremd_
4351_fremd_:
4352 link %a6,&-LOCAL_SIZE
4353
4354 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4355 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4356 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4357
4358 fmov.l &0x0,%fpcr # zero FPCR
4359
4360#
4361# copy, convert, and tag input argument
4362#
4363 fmov.d 0x8(%a6),%fp0 # load dbl dst
4364 fmov.x %fp0,FP_DST(%a6)
4365 lea FP_DST(%a6),%a0
4366 bsr.l tag # fetch operand type
4367 mov.b %d0,DTAG(%a6)
4368
4369 fmov.d 0x10(%a6),%fp0 # load dbl src
4370 fmov.x %fp0,FP_SRC(%a6)
4371 lea FP_SRC(%a6),%a0
4372 bsr.l tag # fetch operand type
4373 mov.b %d0,STAG(%a6)
4374 mov.l %d0,%d1
4375
4376 andi.l &0x00ff00ff,USER_FPSR(%a6)
4377
4378 clr.l %d0
4379 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4380
4381 lea FP_SRC(%a6),%a0 # pass ptr to src
4382 lea FP_DST(%a6),%a1 # pass ptr to dst
4383
4384 tst.b %d1
4385 bne.b _L21_2d
4386 bsr.l srem_snorm # operand is a NORM
4387 bra.b _L21_6d
4388_L21_2d:
4389 cmpi.b %d1,&ZERO # is operand a ZERO?
4390 bne.b _L21_3d # no
4391 bsr.l srem_szero # yes
4392 bra.b _L21_6d
4393_L21_3d:
4394 cmpi.b %d1,&INF # is operand an INF?
4395 bne.b _L21_4d # no
4396 bsr.l srem_sinf # yes
4397 bra.b _L21_6d
4398_L21_4d:
4399 cmpi.b %d1,&QNAN # is operand a QNAN?
4400 bne.b _L21_5d # no
4401 bsr.l sop_sqnan # yes
4402 bra.b _L21_6d
4403_L21_5d:
4404 bsr.l srem_sdnrm # operand is a DENORM
4405_L21_6d:
4406
4407#
4408# Result is now in FP0
4409#
4410 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4411 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4412 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4413 unlk %a6
4414 rts
4415
4416 global _fremx_
4417_fremx_:
4418 link %a6,&-LOCAL_SIZE
4419
4420 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4421 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4422 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4423
4424 fmov.l &0x0,%fpcr # zero FPCR
4425
4426#
4427# copy, convert, and tag input argument
4428#
4429 lea FP_DST(%a6),%a0
4430 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext dst
4431 mov.l 0x8+0x4(%a6),0x4(%a0)
4432 mov.l 0x8+0x8(%a6),0x8(%a0)
4433 bsr.l tag # fetch operand type
4434 mov.b %d0,DTAG(%a6)
4435
4436 lea FP_SRC(%a6),%a0
4437 mov.l 0x14+0x0(%a6),0x0(%a0) # load ext src
4438 mov.l 0x14+0x4(%a6),0x4(%a0)
4439 mov.l 0x14+0x8(%a6),0x8(%a0)
4440 bsr.l tag # fetch operand type
4441 mov.b %d0,STAG(%a6)
4442 mov.l %d0,%d1
4443
4444 andi.l &0x00ff00ff,USER_FPSR(%a6)
4445
4446 clr.l %d0
4447 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4448
4449 lea FP_SRC(%a6),%a0 # pass ptr to src
4450 lea FP_DST(%a6),%a1 # pass ptr to dst
4451
4452 tst.b %d1
4453 bne.b _L21_2x
4454 bsr.l srem_snorm # operand is a NORM
4455 bra.b _L21_6x
4456_L21_2x:
4457 cmpi.b %d1,&ZERO # is operand a ZERO?
4458 bne.b _L21_3x # no
4459 bsr.l srem_szero # yes
4460 bra.b _L21_6x
4461_L21_3x:
4462 cmpi.b %d1,&INF # is operand an INF?
4463 bne.b _L21_4x # no
4464 bsr.l srem_sinf # yes
4465 bra.b _L21_6x
4466_L21_4x:
4467 cmpi.b %d1,&QNAN # is operand a QNAN?
4468 bne.b _L21_5x # no
4469 bsr.l sop_sqnan # yes
4470 bra.b _L21_6x
4471_L21_5x:
4472 bsr.l srem_sdnrm # operand is a DENORM
4473_L21_6x:
4474
4475#
4476# Result is now in FP0
4477#
4478 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4479 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4480 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4481 unlk %a6
4482 rts
4483
4484
4485#########################################################################
4486# DYADIC TEMPLATE #
4487#########################################################################
4488 global _fmods_
4489_fmods_:
4490 link %a6,&-LOCAL_SIZE
4491
4492 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4493 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4494 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4495
4496 fmov.l &0x0,%fpcr # zero FPCR
4497
4498#
4499# copy, convert, and tag input argument
4500#
4501 fmov.s 0x8(%a6),%fp0 # load sgl dst
4502 fmov.x %fp0,FP_DST(%a6)
4503 lea FP_DST(%a6),%a0
4504 bsr.l tag # fetch operand type
4505 mov.b %d0,DTAG(%a6)
4506
4507 fmov.s 0xc(%a6),%fp0 # load sgl src
4508 fmov.x %fp0,FP_SRC(%a6)
4509 lea FP_SRC(%a6),%a0
4510 bsr.l tag # fetch operand type
4511 mov.b %d0,STAG(%a6)
4512 mov.l %d0,%d1
4513
4514 andi.l &0x00ff00ff,USER_FPSR(%a6)
4515
4516 clr.l %d0
4517 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4518
4519 lea FP_SRC(%a6),%a0 # pass ptr to src
4520 lea FP_DST(%a6),%a1 # pass ptr to dst
4521
4522 tst.b %d1
4523 bne.b _L22_2s
4524 bsr.l smod_snorm # operand is a NORM
4525 bra.b _L22_6s
4526_L22_2s:
4527 cmpi.b %d1,&ZERO # is operand a ZERO?
4528 bne.b _L22_3s # no
4529 bsr.l smod_szero # yes
4530 bra.b _L22_6s
4531_L22_3s:
4532 cmpi.b %d1,&INF # is operand an INF?
4533 bne.b _L22_4s # no
4534 bsr.l smod_sinf # yes
4535 bra.b _L22_6s
4536_L22_4s:
4537 cmpi.b %d1,&QNAN # is operand a QNAN?
4538 bne.b _L22_5s # no
4539 bsr.l sop_sqnan # yes
4540 bra.b _L22_6s
4541_L22_5s:
4542 bsr.l smod_sdnrm # operand is a DENORM
4543_L22_6s:
4544
4545#
4546# Result is now in FP0
4547#
4548 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4549 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4550 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4551 unlk %a6
4552 rts
4553
4554 global _fmodd_
4555_fmodd_:
4556 link %a6,&-LOCAL_SIZE
4557
4558 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4559 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4560 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4561
4562 fmov.l &0x0,%fpcr # zero FPCR
4563
4564#
4565# copy, convert, and tag input argument
4566#
4567 fmov.d 0x8(%a6),%fp0 # load dbl dst
4568 fmov.x %fp0,FP_DST(%a6)
4569 lea FP_DST(%a6),%a0
4570 bsr.l tag # fetch operand type
4571 mov.b %d0,DTAG(%a6)
4572
4573 fmov.d 0x10(%a6),%fp0 # load dbl src
4574 fmov.x %fp0,FP_SRC(%a6)
4575 lea FP_SRC(%a6),%a0
4576 bsr.l tag # fetch operand type
4577 mov.b %d0,STAG(%a6)
4578 mov.l %d0,%d1
4579
4580 andi.l &0x00ff00ff,USER_FPSR(%a6)
4581
4582 clr.l %d0
4583 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4584
4585 lea FP_SRC(%a6),%a0 # pass ptr to src
4586 lea FP_DST(%a6),%a1 # pass ptr to dst
4587
4588 tst.b %d1
4589 bne.b _L22_2d
4590 bsr.l smod_snorm # operand is a NORM
4591 bra.b _L22_6d
4592_L22_2d:
4593 cmpi.b %d1,&ZERO # is operand a ZERO?
4594 bne.b _L22_3d # no
4595 bsr.l smod_szero # yes
4596 bra.b _L22_6d
4597_L22_3d:
4598 cmpi.b %d1,&INF # is operand an INF?
4599 bne.b _L22_4d # no
4600 bsr.l smod_sinf # yes
4601 bra.b _L22_6d
4602_L22_4d:
4603 cmpi.b %d1,&QNAN # is operand a QNAN?
4604 bne.b _L22_5d # no
4605 bsr.l sop_sqnan # yes
4606 bra.b _L22_6d
4607_L22_5d:
4608 bsr.l smod_sdnrm # operand is a DENORM
4609_L22_6d:
4610
4611#
4612# Result is now in FP0
4613#
4614 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4615 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4616 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4617 unlk %a6
4618 rts
4619
4620 global _fmodx_
4621_fmodx_:
4622 link %a6,&-LOCAL_SIZE
4623
4624 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4625 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4626 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4627
4628 fmov.l &0x0,%fpcr # zero FPCR
4629
4630#
4631# copy, convert, and tag input argument
4632#
4633 lea FP_DST(%a6),%a0
4634 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext dst
4635 mov.l 0x8+0x4(%a6),0x4(%a0)
4636 mov.l 0x8+0x8(%a6),0x8(%a0)
4637 bsr.l tag # fetch operand type
4638 mov.b %d0,DTAG(%a6)
4639
4640 lea FP_SRC(%a6),%a0
4641 mov.l 0x14+0x0(%a6),0x0(%a0) # load ext src
4642 mov.l 0x14+0x4(%a6),0x4(%a0)
4643 mov.l 0x14+0x8(%a6),0x8(%a0)
4644 bsr.l tag # fetch operand type
4645 mov.b %d0,STAG(%a6)
4646 mov.l %d0,%d1
4647
4648 andi.l &0x00ff00ff,USER_FPSR(%a6)
4649
4650 clr.l %d0
4651 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4652
4653 lea FP_SRC(%a6),%a0 # pass ptr to src
4654 lea FP_DST(%a6),%a1 # pass ptr to dst
4655
4656 tst.b %d1
4657 bne.b _L22_2x
4658 bsr.l smod_snorm # operand is a NORM
4659 bra.b _L22_6x
4660_L22_2x:
4661 cmpi.b %d1,&ZERO # is operand a ZERO?
4662 bne.b _L22_3x # no
4663 bsr.l smod_szero # yes
4664 bra.b _L22_6x
4665_L22_3x:
4666 cmpi.b %d1,&INF # is operand an INF?
4667 bne.b _L22_4x # no
4668 bsr.l smod_sinf # yes
4669 bra.b _L22_6x
4670_L22_4x:
4671 cmpi.b %d1,&QNAN # is operand a QNAN?
4672 bne.b _L22_5x # no
4673 bsr.l sop_sqnan # yes
4674 bra.b _L22_6x
4675_L22_5x:
4676 bsr.l smod_sdnrm # operand is a DENORM
4677_L22_6x:
4678
4679#
4680# Result is now in FP0
4681#
4682 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4683 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4684 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4685 unlk %a6
4686 rts
4687
4688
4689#########################################################################
4690# DYADIC TEMPLATE #
4691#########################################################################
4692 global _fscales_
4693_fscales_:
4694 link %a6,&-LOCAL_SIZE
4695
4696 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4697 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4698 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4699
4700 fmov.l &0x0,%fpcr # zero FPCR
4701
4702#
4703# copy, convert, and tag input argument
4704#
4705 fmov.s 0x8(%a6),%fp0 # load sgl dst
4706 fmov.x %fp0,FP_DST(%a6)
4707 lea FP_DST(%a6),%a0
4708 bsr.l tag # fetch operand type
4709 mov.b %d0,DTAG(%a6)
4710
4711 fmov.s 0xc(%a6),%fp0 # load sgl src
4712 fmov.x %fp0,FP_SRC(%a6)
4713 lea FP_SRC(%a6),%a0
4714 bsr.l tag # fetch operand type
4715 mov.b %d0,STAG(%a6)
4716 mov.l %d0,%d1
4717
4718 andi.l &0x00ff00ff,USER_FPSR(%a6)
4719
4720 clr.l %d0
4721 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4722
4723 lea FP_SRC(%a6),%a0 # pass ptr to src
4724 lea FP_DST(%a6),%a1 # pass ptr to dst
4725
4726 tst.b %d1
4727 bne.b _L23_2s
4728 bsr.l sscale_snorm # operand is a NORM
4729 bra.b _L23_6s
4730_L23_2s:
4731 cmpi.b %d1,&ZERO # is operand a ZERO?
4732 bne.b _L23_3s # no
4733 bsr.l sscale_szero # yes
4734 bra.b _L23_6s
4735_L23_3s:
4736 cmpi.b %d1,&INF # is operand an INF?
4737 bne.b _L23_4s # no
4738 bsr.l sscale_sinf # yes
4739 bra.b _L23_6s
4740_L23_4s:
4741 cmpi.b %d1,&QNAN # is operand a QNAN?
4742 bne.b _L23_5s # no
4743 bsr.l sop_sqnan # yes
4744 bra.b _L23_6s
4745_L23_5s:
4746 bsr.l sscale_sdnrm # operand is a DENORM
4747_L23_6s:
4748
4749#
4750# Result is now in FP0
4751#
4752 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4753 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4754 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4755 unlk %a6
4756 rts
4757
4758 global _fscaled_
4759_fscaled_:
4760 link %a6,&-LOCAL_SIZE
4761
4762 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4763 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4764 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4765
4766 fmov.l &0x0,%fpcr # zero FPCR
4767
4768#
4769# copy, convert, and tag input argument
4770#
4771 fmov.d 0x8(%a6),%fp0 # load dbl dst
4772 fmov.x %fp0,FP_DST(%a6)
4773 lea FP_DST(%a6),%a0
4774 bsr.l tag # fetch operand type
4775 mov.b %d0,DTAG(%a6)
4776
4777 fmov.d 0x10(%a6),%fp0 # load dbl src
4778 fmov.x %fp0,FP_SRC(%a6)
4779 lea FP_SRC(%a6),%a0
4780 bsr.l tag # fetch operand type
4781 mov.b %d0,STAG(%a6)
4782 mov.l %d0,%d1
4783
4784 andi.l &0x00ff00ff,USER_FPSR(%a6)
4785
4786 clr.l %d0
4787 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4788
4789 lea FP_SRC(%a6),%a0 # pass ptr to src
4790 lea FP_DST(%a6),%a1 # pass ptr to dst
4791
4792 tst.b %d1
4793 bne.b _L23_2d
4794 bsr.l sscale_snorm # operand is a NORM
4795 bra.b _L23_6d
4796_L23_2d:
4797 cmpi.b %d1,&ZERO # is operand a ZERO?
4798 bne.b _L23_3d # no
4799 bsr.l sscale_szero # yes
4800 bra.b _L23_6d
4801_L23_3d:
4802 cmpi.b %d1,&INF # is operand an INF?
4803 bne.b _L23_4d # no
4804 bsr.l sscale_sinf # yes
4805 bra.b _L23_6d
4806_L23_4d:
4807 cmpi.b %d1,&QNAN # is operand a QNAN?
4808 bne.b _L23_5d # no
4809 bsr.l sop_sqnan # yes
4810 bra.b _L23_6d
4811_L23_5d:
4812 bsr.l sscale_sdnrm # operand is a DENORM
4813_L23_6d:
4814
4815#
4816# Result is now in FP0
4817#
4818 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4819 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4820 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4821 unlk %a6
4822 rts
4823
4824 global _fscalex_
4825_fscalex_:
4826 link %a6,&-LOCAL_SIZE
4827
4828 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4829 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4830 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4831
4832 fmov.l &0x0,%fpcr # zero FPCR
4833
4834#
4835# copy, convert, and tag input argument
4836#
4837 lea FP_DST(%a6),%a0
4838 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext dst
4839 mov.l 0x8+0x4(%a6),0x4(%a0)
4840 mov.l 0x8+0x8(%a6),0x8(%a0)
4841 bsr.l tag # fetch operand type
4842 mov.b %d0,DTAG(%a6)
4843
4844 lea FP_SRC(%a6),%a0
4845 mov.l 0x14+0x0(%a6),0x0(%a0) # load ext src
4846 mov.l 0x14+0x4(%a6),0x4(%a0)
4847 mov.l 0x14+0x8(%a6),0x8(%a0)
4848 bsr.l tag # fetch operand type
4849 mov.b %d0,STAG(%a6)
4850 mov.l %d0,%d1
4851
4852 andi.l &0x00ff00ff,USER_FPSR(%a6)
4853
4854 clr.l %d0
4855 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4856
4857 lea FP_SRC(%a6),%a0 # pass ptr to src
4858 lea FP_DST(%a6),%a1 # pass ptr to dst
4859
4860 tst.b %d1
4861 bne.b _L23_2x
4862 bsr.l sscale_snorm # operand is a NORM
4863 bra.b _L23_6x
4864_L23_2x:
4865 cmpi.b %d1,&ZERO # is operand a ZERO?
4866 bne.b _L23_3x # no
4867 bsr.l sscale_szero # yes
4868 bra.b _L23_6x
4869_L23_3x:
4870 cmpi.b %d1,&INF # is operand an INF?
4871 bne.b _L23_4x # no
4872 bsr.l sscale_sinf # yes
4873 bra.b _L23_6x
4874_L23_4x:
4875 cmpi.b %d1,&QNAN # is operand a QNAN?
4876 bne.b _L23_5x # no
4877 bsr.l sop_sqnan # yes
4878 bra.b _L23_6x
4879_L23_5x:
4880 bsr.l sscale_sdnrm # operand is a DENORM
4881_L23_6x:
4882
4883#
4884# Result is now in FP0
4885#
4886 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4887 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4888 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4889 unlk %a6
4890 rts
4891
4892
4893#########################################################################
4894# ssin(): computes the sine of a normalized input #
4895# ssind(): computes the sine of a denormalized input #
4896# scos(): computes the cosine of a normalized input #
4897# scosd(): computes the cosine of a denormalized input #
4898# ssincos(): computes the sine and cosine of a normalized input #
4899# ssincosd(): computes the sine and cosine of a denormalized input #
4900# #
4901# INPUT *************************************************************** #
4902# a0 = pointer to extended precision input #
4903# d0 = round precision,mode #
4904# #
4905# OUTPUT ************************************************************** #
4906# fp0 = sin(X) or cos(X) #
4907# #
4908# For ssincos(X): #
4909# fp0 = sin(X) #
4910# fp1 = cos(X) #
4911# #
4912# ACCURACY and MONOTONICITY ******************************************* #
4913# The returned result is within 1 ulp in 64 significant bit, i.e. #
4914# within 0.5001 ulp to 53 bits if the result is subsequently #
4915# rounded to double precision. The result is provably monotonic #
4916# in double precision. #
4917# #
4918# ALGORITHM *********************************************************** #
4919# #
4920# SIN and COS: #
4921# 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1. #
4922# #
4923# 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7. #
4924# #
4925# 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #
4926# k = N mod 4, so in particular, k = 0,1,2,or 3. #
4927# Overwrite k by k := k + AdjN. #
4928# #
4929# 4. If k is even, go to 6. #
4930# #
4931# 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. #
4932# Return sgn*cos(r) where cos(r) is approximated by an #
4933# even polynomial in r, 1 + r*r*(B1+s*(B2+ ... + s*B8)), #
4934# s = r*r. #
4935# Exit. #
4936# #
4937# 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r) #
4938# where sin(r) is approximated by an odd polynomial in r #
4939# r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r. #
4940# Exit. #
4941# #
4942# 7. If |X| > 1, go to 9. #
4943# #
4944# 8. (|X|<2**(-40)) If SIN is invoked, return X; #
4945# otherwise return 1. #
4946# #
4947# 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, #
4948# go back to 3. #
4949# #
4950# SINCOS: #
4951# 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. #
4952# #
4953# 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #
4954# k = N mod 4, so in particular, k = 0,1,2,or 3. #
4955# #
4956# 3. If k is even, go to 5. #
4957# #
4958# 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), ie. #
4959# j1 exclusive or with the l.s.b. of k. #
4960# sgn1 := (-1)**j1, sgn2 := (-1)**j2. #
4961# SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where #
4962# sin(r) and cos(r) are computed as odd and even #
4963# polynomials in r, respectively. Exit #
4964# #
4965# 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1. #
4966# SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where #
4967# sin(r) and cos(r) are computed as odd and even #
4968# polynomials in r, respectively. Exit #
4969# #
4970# 6. If |X| > 1, go to 8. #
4971# #
4972# 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit. #
4973# #
4974# 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, #
4975# go back to 2. #
4976# #
4977#########################################################################
4978
4979SINA7: long 0xBD6AAA77,0xCCC994F5
4980SINA6: long 0x3DE61209,0x7AAE8DA1
4981SINA5: long 0xBE5AE645,0x2A118AE4
4982SINA4: long 0x3EC71DE3,0xA5341531
4983SINA3: long 0xBF2A01A0,0x1A018B59,0x00000000,0x00000000
4984SINA2: long 0x3FF80000,0x88888888,0x888859AF,0x00000000
4985SINA1: long 0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000
4986
4987COSB8: long 0x3D2AC4D0,0xD6011EE3
4988COSB7: long 0xBDA9396F,0x9F45AC19
4989COSB6: long 0x3E21EED9,0x0612C972
4990COSB5: long 0xBE927E4F,0xB79D9FCF
4991COSB4: long 0x3EFA01A0,0x1A01D423,0x00000000,0x00000000
4992COSB3: long 0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000
4993COSB2: long 0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E
4994COSB1: long 0xBF000000
4995
4996 set INARG,FP_SCR0
4997
4998 set X,FP_SCR0
4999# set XDCARE,X+2
5000 set XFRAC,X+4
5001
5002 set RPRIME,FP_SCR0
5003 set SPRIME,FP_SCR1
5004
5005 set POSNEG1,L_SCR1
5006 set TWOTO63,L_SCR1
5007
5008 set ENDFLAG,L_SCR2
5009 set INT,L_SCR2
5010
5011 set ADJN,L_SCR3
5012
5013############################################
5014 global ssin
5015ssin:
5016 mov.l &0,ADJN(%a6) # yes; SET ADJN TO 0
5017 bra.b SINBGN
5018
5019############################################
5020 global scos
5021scos:
5022 mov.l &1,ADJN(%a6) # yes; SET ADJN TO 1
5023
5024############################################
5025SINBGN:
5026#--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
5027
5028 fmov.x (%a0),%fp0 # LOAD INPUT
5029 fmov.x %fp0,X(%a6) # save input at X
5030
5031# "COMPACTIFY" X
5032 mov.l (%a0),%d1 # put exp in hi word
5033 mov.w 4(%a0),%d1 # fetch hi(man)
5034 and.l &0x7FFFFFFF,%d1 # strip sign
5035
5036 cmpi.l %d1,&0x3FD78000 # is |X| >= 2**(-40)?
5037 bge.b SOK1 # no
5038 bra.w SINSM # yes; input is very small
5039
5040SOK1:
5041 cmp.l %d1,&0x4004BC7E # is |X| < 15 PI?
5042 blt.b SINMAIN # no
5043 bra.w SREDUCEX # yes; input is very large
5044
5045#--THIS IS THE USUAL CASE, |X| <= 15 PI.
5046#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5047SINMAIN:
5048 fmov.x %fp0,%fp1
5049 fmul.d TWOBYPI(%pc),%fp1 # X*2/PI
5050
5051 lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32
5052
5053 fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER
5054
5055 mov.l INT(%a6),%d1 # make a copy of N
5056 asl.l &4,%d1 # N *= 16
5057 add.l %d1,%a1 # tbl_addr = a1 + (N*16)
5058
5059# A1 IS THE ADDRESS OF N*PIBY2
5060# ...WHICH IS IN TWO PIECES Y1 & Y2
5061 fsub.x (%a1)+,%fp0 # X-Y1
5062 fsub.s (%a1),%fp0 # fp0 = R = (X-Y1)-Y2
5063
5064SINCONT:
5065#--continuation from REDUCEX
5066
5067#--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
5068 mov.l INT(%a6),%d1
5069 add.l ADJN(%a6),%d1 # SEE IF D0 IS ODD OR EVEN
5070 ror.l &1,%d1 # D0 WAS ODD IFF D0 IS NEGATIVE
5071 cmp.l %d1,&0
5072 blt.w COSPOLY
5073
5074#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
5075#--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
5076#--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
5077#--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
5078#--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
5079#--WHERE T=S*S.
5080#--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
5081#--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
5082SINPOLY:
5083 fmovm.x &0x0c,-(%sp) # save fp2/fp3
5084
5085 fmov.x %fp0,X(%a6) # X IS R
5086 fmul.x %fp0,%fp0 # FP0 IS S
5087
5088 fmov.d SINA7(%pc),%fp3
5089 fmov.d SINA6(%pc),%fp2
5090
5091 fmov.x %fp0,%fp1
5092 fmul.x %fp1,%fp1 # FP1 IS T
5093
5094 ror.l &1,%d1
5095 and.l &0x80000000,%d1
5096# ...LEAST SIG. BIT OF D0 IN SIGN POSITION
5097 eor.l %d1,X(%a6) # X IS NOW R'= SGN*R
5098
5099 fmul.x %fp1,%fp3 # TA7
5100 fmul.x %fp1,%fp2 # TA6
5101
5102 fadd.d SINA5(%pc),%fp3 # A5+TA7
5103 fadd.d SINA4(%pc),%fp2 # A4+TA6
5104
5105 fmul.x %fp1,%fp3 # T(A5+TA7)
5106 fmul.x %fp1,%fp2 # T(A4+TA6)
5107
5108 fadd.d SINA3(%pc),%fp3 # A3+T(A5+TA7)
5109 fadd.x SINA2(%pc),%fp2 # A2+T(A4+TA6)
5110
5111 fmul.x %fp3,%fp1 # T(A3+T(A5+TA7))
5112
5113 fmul.x %fp0,%fp2 # S(A2+T(A4+TA6))
5114 fadd.x SINA1(%pc),%fp1 # A1+T(A3+T(A5+TA7))
5115 fmul.x X(%a6),%fp0 # R'*S
5116
5117 fadd.x %fp2,%fp1 # [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
5118
5119 fmul.x %fp1,%fp0 # SIN(R')-R'
5120
5121 fmovm.x (%sp)+,&0x30 # restore fp2/fp3
5122
5123 fmov.l %d0,%fpcr # restore users round mode,prec
5124 fadd.x X(%a6),%fp0 # last inst - possible exception set
5125 bra t_inx2
5126
5127#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
5128#--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY
5129#--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
5130#--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
5131#--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
5132#--WHERE T=S*S.
5133#--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
5134#--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
5135#--AND IS THEREFORE STORED AS SINGLE PRECISION.
5136COSPOLY:
5137 fmovm.x &0x0c,-(%sp) # save fp2/fp3
5138
5139 fmul.x %fp0,%fp0 # FP0 IS S
5140
5141 fmov.d COSB8(%pc),%fp2
5142 fmov.d COSB7(%pc),%fp3
5143
5144 fmov.x %fp0,%fp1
5145 fmul.x %fp1,%fp1 # FP1 IS T
5146
5147 fmov.x %fp0,X(%a6) # X IS S
5148 ror.l &1,%d1
5149 and.l &0x80000000,%d1
5150# ...LEAST SIG. BIT OF D0 IN SIGN POSITION
5151
5152 fmul.x %fp1,%fp2 # TB8
5153
5154 eor.l %d1,X(%a6) # X IS NOW S'= SGN*S
5155 and.l &0x80000000,%d1
5156
5157 fmul.x %fp1,%fp3 # TB7
5158
5159 or.l &0x3F800000,%d1 # D0 IS SGN IN SINGLE
5160 mov.l %d1,POSNEG1(%a6)
5161
5162 fadd.d COSB6(%pc),%fp2 # B6+TB8
5163 fadd.d COSB5(%pc),%fp3 # B5+TB7
5164
5165 fmul.x %fp1,%fp2 # T(B6+TB8)
5166 fmul.x %fp1,%fp3 # T(B5+TB7)
5167
5168 fadd.d COSB4(%pc),%fp2 # B4+T(B6+TB8)
5169 fadd.x COSB3(%pc),%fp3 # B3+T(B5+TB7)
5170
5171 fmul.x %fp1,%fp2 # T(B4+T(B6+TB8))
5172 fmul.x %fp3,%fp1 # T(B3+T(B5+TB7))
5173
5174 fadd.x COSB2(%pc),%fp2 # B2+T(B4+T(B6+TB8))
5175 fadd.s COSB1(%pc),%fp1 # B1+T(B3+T(B5+TB7))
5176
5177 fmul.x %fp2,%fp0 # S(B2+T(B4+T(B6+TB8)))
5178
5179 fadd.x %fp1,%fp0
5180
5181 fmul.x X(%a6),%fp0
5182
5183 fmovm.x (%sp)+,&0x30 # restore fp2/fp3
5184
5185 fmov.l %d0,%fpcr # restore users round mode,prec
5186 fadd.s POSNEG1(%a6),%fp0 # last inst - possible exception set
5187 bra t_inx2
5188
5189##############################################
5190
5191# SINe: Big OR Small?
5192#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
5193#--IF |X| < 2**(-40), RETURN X OR 1.
5194SINBORS:
5195 cmp.l %d1,&0x3FFF8000
5196 bgt.l SREDUCEX
5197
5198SINSM:
5199 mov.l ADJN(%a6),%d1
5200 cmp.l %d1,&0
5201 bgt.b COSTINY
5202
5203# here, the operation may underflow iff the precision is sgl or dbl.
5204# extended denorms are handled through another entry point.
5205SINTINY:
5206# mov.w &0x0000,XDCARE(%a6) # JUST IN CASE
5207
5208 fmov.l %d0,%fpcr # restore users round mode,prec
5209 mov.b &FMOV_OP,%d1 # last inst is MOVE
5210 fmov.x X(%a6),%fp0 # last inst - possible exception set
5211 bra t_catch
5212
5213COSTINY:
5214 fmov.s &0x3F800000,%fp0 # fp0 = 1.0
5215 fmov.l %d0,%fpcr # restore users round mode,prec
5216 fadd.s &0x80800000,%fp0 # last inst - possible exception set
5217 bra t_pinx2
5218
5219################################################
5220 global ssind
5221#--SIN(X) = X FOR DENORMALIZED X
5222ssind:
5223 bra t_extdnrm
5224
5225############################################
5226 global scosd
5227#--COS(X) = 1 FOR DENORMALIZED X
5228scosd:
5229 fmov.s &0x3F800000,%fp0 # fp0 = 1.0
5230 bra t_pinx2
5231
5232##################################################
5233
5234 global ssincos
5235ssincos:
5236#--SET ADJN TO 4
5237 mov.l &4,ADJN(%a6)
5238
5239 fmov.x (%a0),%fp0 # LOAD INPUT
5240 fmov.x %fp0,X(%a6)
5241
5242 mov.l (%a0),%d1
5243 mov.w 4(%a0),%d1
5244 and.l &0x7FFFFFFF,%d1 # COMPACTIFY X
5245
5246 cmp.l %d1,&0x3FD78000 # |X| >= 2**(-40)?
5247 bge.b SCOK1
5248 bra.w SCSM
5249
5250SCOK1:
5251 cmp.l %d1,&0x4004BC7E # |X| < 15 PI?
5252 blt.b SCMAIN
5253 bra.w SREDUCEX
5254
5255
5256#--THIS IS THE USUAL CASE, |X| <= 15 PI.
5257#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5258SCMAIN:
5259 fmov.x %fp0,%fp1
5260
5261 fmul.d TWOBYPI(%pc),%fp1 # X*2/PI
5262
5263 lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32
5264
5265 fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER
5266
5267 mov.l INT(%a6),%d1
5268 asl.l &4,%d1
5269 add.l %d1,%a1 # ADDRESS OF N*PIBY2, IN Y1, Y2
5270
5271 fsub.x (%a1)+,%fp0 # X-Y1
5272 fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y2
5273
5274SCCONT:
5275#--continuation point from REDUCEX
5276
5277 mov.l INT(%a6),%d1
5278 ror.l &1,%d1
5279 cmp.l %d1,&0 # D0 < 0 IFF N IS ODD
5280 bge.w NEVEN
5281
5282SNODD:
5283#--REGISTERS SAVED SO FAR: D0, A0, FP2.
5284 fmovm.x &0x04,-(%sp) # save fp2
5285
5286 fmov.x %fp0,RPRIME(%a6)
5287 fmul.x %fp0,%fp0 # FP0 IS S = R*R
5288 fmov.d SINA7(%pc),%fp1 # A7
5289 fmov.d COSB8(%pc),%fp2 # B8
5290 fmul.x %fp0,%fp1 # SA7
5291 fmul.x %fp0,%fp2 # SB8
5292
5293 mov.l %d2,-(%sp)
5294 mov.l %d1,%d2
5295 ror.l &1,%d2
5296 and.l &0x80000000,%d2
5297 eor.l %d1,%d2
5298 and.l &0x80000000,%d2
5299
5300 fadd.d SINA6(%pc),%fp1 # A6+SA7
5301 fadd.d COSB7(%pc),%fp2 # B7+SB8
5302
5303 fmul.x %fp0,%fp1 # S(A6+SA7)
5304 eor.l %d2,RPRIME(%a6)
5305 mov.l (%sp)+,%d2
5306 fmul.x %fp0,%fp2 # S(B7+SB8)
5307 ror.l &1,%d1
5308 and.l &0x80000000,%d1
5309 mov.l &0x3F800000,POSNEG1(%a6)
5310 eor.l %d1,POSNEG1(%a6)
5311
5312 fadd.d SINA5(%pc),%fp1 # A5+S(A6+SA7)
5313 fadd.d COSB6(%pc),%fp2 # B6+S(B7+SB8)
5314
5315 fmul.x %fp0,%fp1 # S(A5+S(A6+SA7))
5316 fmul.x %fp0,%fp2 # S(B6+S(B7+SB8))
5317 fmov.x %fp0,SPRIME(%a6)
5318
5319 fadd.d SINA4(%pc),%fp1 # A4+S(A5+S(A6+SA7))
5320 eor.l %d1,SPRIME(%a6)
5321 fadd.d COSB5(%pc),%fp2 # B5+S(B6+S(B7+SB8))
5322
5323 fmul.x %fp0,%fp1 # S(A4+...)
5324 fmul.x %fp0,%fp2 # S(B5+...)
5325
5326 fadd.d SINA3(%pc),%fp1 # A3+S(A4+...)
5327 fadd.d COSB4(%pc),%fp2 # B4+S(B5+...)
5328
5329 fmul.x %fp0,%fp1 # S(A3+...)
5330 fmul.x %fp0,%fp2 # S(B4+...)
5331
5332 fadd.x SINA2(%pc),%fp1 # A2+S(A3+...)
5333 fadd.x COSB3(%pc),%fp2 # B3+S(B4+...)
5334
5335 fmul.x %fp0,%fp1 # S(A2+...)
5336 fmul.x %fp0,%fp2 # S(B3+...)
5337
5338 fadd.x SINA1(%pc),%fp1 # A1+S(A2+...)
5339 fadd.x COSB2(%pc),%fp2 # B2+S(B3+...)
5340
5341 fmul.x %fp0,%fp1 # S(A1+...)
5342 fmul.x %fp2,%fp0 # S(B2+...)
5343
5344 fmul.x RPRIME(%a6),%fp1 # R'S(A1+...)
5345 fadd.s COSB1(%pc),%fp0 # B1+S(B2...)
5346 fmul.x SPRIME(%a6),%fp0 # S'(B1+S(B2+...))
5347
5348 fmovm.x (%sp)+,&0x20 # restore fp2
5349
5350 fmov.l %d0,%fpcr
5351 fadd.x RPRIME(%a6),%fp1 # COS(X)
5352 bsr sto_cos # store cosine result
5353 fadd.s POSNEG1(%a6),%fp0 # SIN(X)
5354 bra t_inx2
5355
5356NEVEN:
5357#--REGISTERS SAVED SO FAR: FP2.
5358 fmovm.x &0x04,-(%sp) # save fp2
5359
5360 fmov.x %fp0,RPRIME(%a6)
5361 fmul.x %fp0,%fp0 # FP0 IS S = R*R
5362
5363 fmov.d COSB8(%pc),%fp1 # B8
5364 fmov.d SINA7(%pc),%fp2 # A7
5365
5366 fmul.x %fp0,%fp1 # SB8
5367 fmov.x %fp0,SPRIME(%a6)
5368 fmul.x %fp0,%fp2 # SA7
5369
5370 ror.l &1,%d1
5371 and.l &0x80000000,%d1
5372
5373 fadd.d COSB7(%pc),%fp1 # B7+SB8
5374 fadd.d SINA6(%pc),%fp2 # A6+SA7
5375
5376 eor.l %d1,RPRIME(%a6)
5377 eor.l %d1,SPRIME(%a6)
5378
5379 fmul.x %fp0,%fp1 # S(B7+SB8)
5380
5381 or.l &0x3F800000,%d1
5382 mov.l %d1,POSNEG1(%a6)
5383
5384 fmul.x %fp0,%fp2 # S(A6+SA7)
5385
5386 fadd.d COSB6(%pc),%fp1 # B6+S(B7+SB8)
5387 fadd.d SINA5(%pc),%fp2 # A5+S(A6+SA7)
5388
5389 fmul.x %fp0,%fp1 # S(B6+S(B7+SB8))
5390 fmul.x %fp0,%fp2 # S(A5+S(A6+SA7))
5391
5392 fadd.d COSB5(%pc),%fp1 # B5+S(B6+S(B7+SB8))
5393 fadd.d SINA4(%pc),%fp2 # A4+S(A5+S(A6+SA7))
5394
5395 fmul.x %fp0,%fp1 # S(B5+...)
5396 fmul.x %fp0,%fp2 # S(A4+...)
5397
5398 fadd.d COSB4(%pc),%fp1 # B4+S(B5+...)
5399 fadd.d SINA3(%pc),%fp2 # A3+S(A4+...)
5400
5401 fmul.x %fp0,%fp1 # S(B4+...)
5402 fmul.x %fp0,%fp2 # S(A3+...)
5403
5404 fadd.x COSB3(%pc),%fp1 # B3+S(B4+...)
5405 fadd.x SINA2(%pc),%fp2 # A2+S(A3+...)
5406
5407 fmul.x %fp0,%fp1 # S(B3+...)
5408 fmul.x %fp0,%fp2 # S(A2+...)
5409
5410 fadd.x COSB2(%pc),%fp1 # B2+S(B3+...)
5411 fadd.x SINA1(%pc),%fp2 # A1+S(A2+...)
5412
5413 fmul.x %fp0,%fp1 # S(B2+...)
5414 fmul.x %fp2,%fp0 # s(a1+...)
5415
5416
5417 fadd.s COSB1(%pc),%fp1 # B1+S(B2...)
5418 fmul.x RPRIME(%a6),%fp0 # R'S(A1+...)
5419 fmul.x SPRIME(%a6),%fp1 # S'(B1+S(B2+...))
5420
5421 fmovm.x (%sp)+,&0x20 # restore fp2
5422
5423 fmov.l %d0,%fpcr
5424 fadd.s POSNEG1(%a6),%fp1 # COS(X)
5425 bsr sto_cos # store cosine result
5426 fadd.x RPRIME(%a6),%fp0 # SIN(X)
5427 bra t_inx2
5428
5429################################################
5430
5431SCBORS:
5432 cmp.l %d1,&0x3FFF8000
5433 bgt.w SREDUCEX
5434
5435################################################
5436
5437SCSM:
5438# mov.w &0x0000,XDCARE(%a6)
5439 fmov.s &0x3F800000,%fp1
5440
5441 fmov.l %d0,%fpcr
5442 fsub.s &0x00800000,%fp1
5443 bsr sto_cos # store cosine result
5444 fmov.l %fpcr,%d0 # d0 must have fpcr,too
5445 mov.b &FMOV_OP,%d1 # last inst is MOVE
5446 fmov.x X(%a6),%fp0
5447 bra t_catch
5448
5449##############################################
5450
5451 global ssincosd
5452#--SIN AND COS OF X FOR DENORMALIZED X
5453ssincosd:
5454 mov.l %d0,-(%sp) # save d0
5455 fmov.s &0x3F800000,%fp1
5456 bsr sto_cos # store cosine result
5457 mov.l (%sp)+,%d0 # restore d0
5458 bra t_extdnrm
5459
5460############################################
5461
5462#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
5463#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
5464#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
5465SREDUCEX:
5466 fmovm.x &0x3c,-(%sp) # save {fp2-fp5}
5467 mov.l %d2,-(%sp) # save d2
5468 fmov.s &0x00000000,%fp1 # fp1 = 0
5469
5470#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
5471#--there is a danger of unwanted overflow in first LOOP iteration. In this
5472#--case, reduce argument by one remainder step to make subsequent reduction
5473#--safe.
5474 cmp.l %d1,&0x7ffeffff # is arg dangerously large?
5475 bne.b SLOOP # no
5476
5477# yes; create 2**16383*PI/2
5478 mov.w &0x7ffe,FP_SCR0_EX(%a6)
5479 mov.l &0xc90fdaa2,FP_SCR0_HI(%a6)
5480 clr.l FP_SCR0_LO(%a6)
5481
5482# create low half of 2**16383*PI/2 at FP_SCR1
5483 mov.w &0x7fdc,FP_SCR1_EX(%a6)
5484 mov.l &0x85a308d3,FP_SCR1_HI(%a6)
5485 clr.l FP_SCR1_LO(%a6)
5486
5487 ftest.x %fp0 # test sign of argument
5488 fblt.w sred_neg
5489
5490 or.b &0x80,FP_SCR0_EX(%a6) # positive arg
5491 or.b &0x80,FP_SCR1_EX(%a6)
5492sred_neg:
5493 fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact
5494 fmov.x %fp0,%fp1 # save high result in fp1
5495 fadd.x FP_SCR1(%a6),%fp0 # low part of reduction
5496 fsub.x %fp0,%fp1 # determine low component of result
5497 fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument.
5498
5499#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
5500#--integer quotient will be stored in N
5501#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
5502SLOOP:
5503 fmov.x %fp0,INARG(%a6) # +-2**K * F, 1 <= F < 2
5504 mov.w INARG(%a6),%d1
5505 mov.l %d1,%a1 # save a copy of D0
5506 and.l &0x00007FFF,%d1
5507 sub.l &0x00003FFF,%d1 # d0 = K
5508 cmp.l %d1,&28
5509 ble.b SLASTLOOP
5510SCONTLOOP:
5511 sub.l &27,%d1 # d0 = L := K-27
5512 mov.b &0,ENDFLAG(%a6)
5513 bra.b SWORK
5514SLASTLOOP:
5515 clr.l %d1 # d0 = L := 0
5516 mov.b &1,ENDFLAG(%a6)
5517
5518SWORK:
5519#--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
5520#--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
5521
5522#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
5523#--2**L * (PIby2_1), 2**L * (PIby2_2)
5524
5525 mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI
5526 sub.l %d1,%d2 # BIASED EXP OF 2**(-L)*(2/PI)
5527
5528 mov.l &0xA2F9836E,FP_SCR0_HI(%a6)
5529 mov.l &0x4E44152A,FP_SCR0_LO(%a6)
5530 mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2**(-L)*(2/PI)
5531
5532 fmov.x %fp0,%fp2
5533 fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2**(-L)*(2/PI)
5534
5535#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
5536#--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
5537#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
5538#--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
5539#--US THE DESIRED VALUE IN FLOATING POINT.
5540 mov.l %a1,%d2
5541 swap %d2
5542 and.l &0x80000000,%d2
5543 or.l &0x5F000000,%d2 # d2 = SIGN(INARG)*2**63 IN SGL
5544 mov.l %d2,TWOTO63(%a6)
5545 fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED
5546 fsub.s TWOTO63(%a6),%fp2 # fp2 = N
5547# fint.x %fp2
5548
5549#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
5550 mov.l %d1,%d2 # d2 = L
5551
5552 add.l &0x00003FFF,%d2 # BIASED EXP OF 2**L * (PI/2)
5553 mov.w %d2,FP_SCR0_EX(%a6)
5554 mov.l &0xC90FDAA2,FP_SCR0_HI(%a6)
5555 clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2**(L) * Piby2_1
5556
5557 add.l &0x00003FDD,%d1
5558 mov.w %d1,FP_SCR1_EX(%a6)
5559 mov.l &0x85A308D3,FP_SCR1_HI(%a6)
5560 clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2**(L) * Piby2_2
5561
5562 mov.b ENDFLAG(%a6),%d1
5563
5564#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
5565#--P2 = 2**(L) * Piby2_2
5566 fmov.x %fp2,%fp4 # fp4 = N
5567 fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P1
5568 fmov.x %fp2,%fp5 # fp5 = N
5569 fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P2
5570 fmov.x %fp4,%fp3 # fp3 = W = N*P1
5571
5572#--we want P+p = W+w but |p| <= half ulp of P
5573#--Then, we need to compute A := R-P and a := r-p
5574 fadd.x %fp5,%fp3 # fp3 = P
5575 fsub.x %fp3,%fp4 # fp4 = W-P
5576
5577 fsub.x %fp3,%fp0 # fp0 = A := R - P
5578 fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w
5579
5580 fmov.x %fp0,%fp3 # fp3 = A
5581 fsub.x %fp4,%fp1 # fp1 = a := r - p
5582
5583#--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
5584#--|r| <= half ulp of R.
5585 fadd.x %fp1,%fp0 # fp0 = R := A+a
5586#--No need to calculate r if this is the last loop
5587 cmp.b %d1,&0
5588 bgt.w SRESTORE
5589
5590#--Need to calculate r
5591 fsub.x %fp0,%fp3 # fp3 = A-R
5592 fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a
5593 bra.w SLOOP
5594
5595SRESTORE:
5596 fmov.l %fp2,INT(%a6)
5597 mov.l (%sp)+,%d2 # restore d2
5598 fmovm.x (%sp)+,&0x3c # restore {fp2-fp5}
5599
5600 mov.l ADJN(%a6),%d1
5601 cmp.l %d1,&4
5602
5603 blt.w SINCONT
5604 bra.w SCCONT
5605
5606#########################################################################
5607# stan(): computes the tangent of a normalized input #
5608# stand(): computes the tangent of a denormalized input #
5609# #
5610# INPUT *************************************************************** #
5611# a0 = pointer to extended precision input #
5612# d0 = round precision,mode #
5613# #
5614# OUTPUT ************************************************************** #
5615# fp0 = tan(X) #
5616# #
5617# ACCURACY and MONOTONICITY ******************************************* #
5618# The returned result is within 3 ulp in 64 significant bit, i.e. #
5619# within 0.5001 ulp to 53 bits if the result is subsequently #
5620# rounded to double precision. The result is provably monotonic #
5621# in double precision. #
5622# #
5623# ALGORITHM *********************************************************** #
5624# #
5625# 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. #
5626# #
5627# 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #
5628# k = N mod 2, so in particular, k = 0 or 1. #
5629# #
5630# 3. If k is odd, go to 5. #
5631# #
5632# 4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a #
5633# rational function U/V where #
5634# U = r + r*s*(P1 + s*(P2 + s*P3)), and #
5635# V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r. #
5636# Exit. #
5637# #
5638# 4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by #
5639# a rational function U/V where #
5640# U = r + r*s*(P1 + s*(P2 + s*P3)), and #
5641# V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r, #
5642# -Cot(r) = -V/U. Exit. #
5643# #
5644# 6. If |X| > 1, go to 8. #
5645# #
5646# 7. (|X|<2**(-40)) Tan(X) = X. Exit. #
5647# #
5648# 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back #
5649# to 2. #
5650# #
5651#########################################################################
5652
5653TANQ4:
5654 long 0x3EA0B759,0xF50F8688
5655TANP3:
5656 long 0xBEF2BAA5,0xA8924F04
5657
5658TANQ3:
5659 long 0xBF346F59,0xB39BA65F,0x00000000,0x00000000
5660
5661TANP2:
5662 long 0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
5663
5664TANQ2:
5665 long 0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
5666
5667TANP1:
5668 long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
5669
5670TANQ1:
5671 long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
5672
5673INVTWOPI:
5674 long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
5675
5676TWOPI1:
5677 long 0x40010000,0xC90FDAA2,0x00000000,0x00000000
5678TWOPI2:
5679 long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000
5680
5681#--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
5682#--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
5683#--MOST 69 BITS LONG.
5684# global PITBL
5685PITBL:
5686 long 0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
5687 long 0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
5688 long 0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
5689 long 0xC0040000,0xB6365E22,0xEE46F000,0x21480000
5690 long 0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
5691 long 0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
5692 long 0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
5693 long 0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
5694 long 0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
5695 long 0xC0040000,0x90836524,0x88034B96,0x20B00000
5696 long 0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
5697 long 0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
5698 long 0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
5699 long 0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
5700 long 0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
5701 long 0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
5702 long 0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
5703 long 0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
5704 long 0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
5705 long 0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
5706 long 0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
5707 long 0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
5708 long 0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
5709 long 0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
5710 long 0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
5711 long 0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
5712 long 0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
5713 long 0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
5714 long 0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
5715 long 0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
5716 long 0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
5717 long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
5718 long 0x00000000,0x00000000,0x00000000,0x00000000
5719 long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
5720 long 0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
5721 long 0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
5722 long 0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
5723 long 0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
5724 long 0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
5725 long 0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
5726 long 0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
5727 long 0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
5728 long 0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
5729 long 0x40030000,0x8A3AE64F,0x76F80584,0x21080000
5730 long 0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
5731 long 0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
5732 long 0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
5733 long 0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
5734 long 0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
5735 long 0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
5736 long 0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
5737 long 0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
5738 long 0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
5739 long 0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
5740 long 0x40040000,0x8A3AE64F,0x76F80584,0x21880000
5741 long 0x40040000,0x90836524,0x88034B96,0xA0B00000
5742 long 0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
5743 long 0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
5744 long 0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
5745 long 0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
5746 long 0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
5747 long 0x40040000,0xB6365E22,0xEE46F000,0xA1480000
5748 long 0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
5749 long 0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
5750 long 0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
5751
5752 set INARG,FP_SCR0
5753
5754 set TWOTO63,L_SCR1
5755 set INT,L_SCR1
5756 set ENDFLAG,L_SCR2
5757
5758 global stan
5759stan:
5760 fmov.x (%a0),%fp0 # LOAD INPUT
5761
5762 mov.l (%a0),%d1
5763 mov.w 4(%a0),%d1
5764 and.l &0x7FFFFFFF,%d1
5765
5766 cmp.l %d1,&0x3FD78000 # |X| >= 2**(-40)?
5767 bge.b TANOK1
5768 bra.w TANSM
5769TANOK1:
5770 cmp.l %d1,&0x4004BC7E # |X| < 15 PI?
5771 blt.b TANMAIN
5772 bra.w REDUCEX
5773
5774TANMAIN:
5775#--THIS IS THE USUAL CASE, |X| <= 15 PI.
5776#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5777 fmov.x %fp0,%fp1
5778 fmul.d TWOBYPI(%pc),%fp1 # X*2/PI
5779
5780 lea.l PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32
5781
5782 fmov.l %fp1,%d1 # CONVERT TO INTEGER
5783
5784 asl.l &4,%d1
5785 add.l %d1,%a1 # ADDRESS N*PIBY2 IN Y1, Y2
5786
5787 fsub.x (%a1)+,%fp0 # X-Y1
5788
5789 fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y2
5790
5791 ror.l &5,%d1
5792 and.l &0x80000000,%d1 # D0 WAS ODD IFF D0 < 0
5793
5794TANCONT:
5795 fmovm.x &0x0c,-(%sp) # save fp2,fp3
5796
5797 cmp.l %d1,&0
5798 blt.w NODD
5799
5800 fmov.x %fp0,%fp1
5801 fmul.x %fp1,%fp1 # S = R*R
5802
5803 fmov.d TANQ4(%pc),%fp3
5804 fmov.d TANP3(%pc),%fp2
5805
5806 fmul.x %fp1,%fp3 # SQ4
5807 fmul.x %fp1,%fp2 # SP3
5808
5809 fadd.d TANQ3(%pc),%fp3 # Q3+SQ4
5810 fadd.x TANP2(%pc),%fp2 # P2+SP3
5811
5812 fmul.x %fp1,%fp3 # S(Q3+SQ4)
5813 fmul.x %fp1,%fp2 # S(P2+SP3)
5814
5815 fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4)
5816 fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3)
5817
5818 fmul.x %fp1,%fp3 # S(Q2+S(Q3+SQ4))
5819 fmul.x %fp1,%fp2 # S(P1+S(P2+SP3))
5820
5821 fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4))
5822 fmul.x %fp0,%fp2 # RS(P1+S(P2+SP3))
5823
5824 fmul.x %fp3,%fp1 # S(Q1+S(Q2+S(Q3+SQ4)))
5825
5826 fadd.x %fp2,%fp0 # R+RS(P1+S(P2+SP3))
5827
5828 fadd.s &0x3F800000,%fp1 # 1+S(Q1+...)
5829
5830 fmovm.x (%sp)+,&0x30 # restore fp2,fp3
5831
5832 fmov.l %d0,%fpcr # restore users round mode,prec
5833 fdiv.x %fp1,%fp0 # last inst - possible exception set
5834 bra t_inx2
5835
5836NODD:
5837 fmov.x %fp0,%fp1
5838 fmul.x %fp0,%fp0 # S = R*R
5839
5840 fmov.d TANQ4(%pc),%fp3
5841 fmov.d TANP3(%pc),%fp2
5842
5843 fmul.x %fp0,%fp3 # SQ4
5844 fmul.x %fp0,%fp2 # SP3
5845
5846 fadd.d TANQ3(%pc),%fp3 # Q3+SQ4
5847 fadd.x TANP2(%pc),%fp2 # P2+SP3
5848
5849 fmul.x %fp0,%fp3 # S(Q3+SQ4)
5850 fmul.x %fp0,%fp2 # S(P2+SP3)
5851
5852 fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4)
5853 fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3)
5854
5855 fmul.x %fp0,%fp3 # S(Q2+S(Q3+SQ4))
5856 fmul.x %fp0,%fp2 # S(P1+S(P2+SP3))
5857
5858 fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4))
5859 fmul.x %fp1,%fp2 # RS(P1+S(P2+SP3))
5860
5861 fmul.x %fp3,%fp0 # S(Q1+S(Q2+S(Q3+SQ4)))
5862
5863 fadd.x %fp2,%fp1 # R+RS(P1+S(P2+SP3))
5864 fadd.s &0x3F800000,%fp0 # 1+S(Q1+...)
5865
5866 fmovm.x (%sp)+,&0x30 # restore fp2,fp3
5867
5868 fmov.x %fp1,-(%sp)
5869 eor.l &0x80000000,(%sp)
5870
5871 fmov.l %d0,%fpcr # restore users round mode,prec
5872 fdiv.x (%sp)+,%fp0 # last inst - possible exception set
5873 bra t_inx2
5874
5875TANBORS:
5876#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
5877#--IF |X| < 2**(-40), RETURN X OR 1.
5878 cmp.l %d1,&0x3FFF8000
5879 bgt.b REDUCEX
5880
5881TANSM:
5882 fmov.x %fp0,-(%sp)
5883 fmov.l %d0,%fpcr # restore users round mode,prec
5884 mov.b &FMOV_OP,%d1 # last inst is MOVE
5885 fmov.x (%sp)+,%fp0 # last inst - posibble exception set
5886 bra t_catch
5887
5888 global stand
5889#--TAN(X) = X FOR DENORMALIZED X
5890stand:
5891 bra t_extdnrm
5892
5893#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
5894#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
5895#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
5896REDUCEX:
5897 fmovm.x &0x3c,-(%sp) # save {fp2-fp5}
5898 mov.l %d2,-(%sp) # save d2
5899 fmov.s &0x00000000,%fp1 # fp1 = 0
5900
5901#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
5902#--there is a danger of unwanted overflow in first LOOP iteration. In this
5903#--case, reduce argument by one remainder step to make subsequent reduction
5904#--safe.
5905 cmp.l %d1,&0x7ffeffff # is arg dangerously large?
5906 bne.b LOOP # no
5907
5908# yes; create 2**16383*PI/2
5909 mov.w &0x7ffe,FP_SCR0_EX(%a6)
5910 mov.l &0xc90fdaa2,FP_SCR0_HI(%a6)
5911 clr.l FP_SCR0_LO(%a6)
5912
5913# create low half of 2**16383*PI/2 at FP_SCR1
5914 mov.w &0x7fdc,FP_SCR1_EX(%a6)
5915 mov.l &0x85a308d3,FP_SCR1_HI(%a6)
5916 clr.l FP_SCR1_LO(%a6)
5917
5918 ftest.x %fp0 # test sign of argument
5919 fblt.w red_neg
5920
5921 or.b &0x80,FP_SCR0_EX(%a6) # positive arg
5922 or.b &0x80,FP_SCR1_EX(%a6)
5923red_neg:
5924 fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact
5925 fmov.x %fp0,%fp1 # save high result in fp1
5926 fadd.x FP_SCR1(%a6),%fp0 # low part of reduction
5927 fsub.x %fp0,%fp1 # determine low component of result
5928 fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument.
5929
5930#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
5931#--integer quotient will be stored in N
5932#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
5933LOOP:
5934 fmov.x %fp0,INARG(%a6) # +-2**K * F, 1 <= F < 2
5935 mov.w INARG(%a6),%d1
5936 mov.l %d1,%a1 # save a copy of D0
5937 and.l &0x00007FFF,%d1
5938 sub.l &0x00003FFF,%d1 # d0 = K
5939 cmp.l %d1,&28
5940 ble.b LASTLOOP
5941CONTLOOP:
5942 sub.l &27,%d1 # d0 = L := K-27
5943 mov.b &0,ENDFLAG(%a6)
5944 bra.b WORK
5945LASTLOOP:
5946 clr.l %d1 # d0 = L := 0
5947 mov.b &1,ENDFLAG(%a6)
5948
5949WORK:
5950#--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
5951#--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
5952
5953#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
5954#--2**L * (PIby2_1), 2**L * (PIby2_2)
5955
5956 mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI
5957 sub.l %d1,%d2 # BIASED EXP OF 2**(-L)*(2/PI)
5958
5959 mov.l &0xA2F9836E,FP_SCR0_HI(%a6)
5960 mov.l &0x4E44152A,FP_SCR0_LO(%a6)
5961 mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2**(-L)*(2/PI)
5962
5963 fmov.x %fp0,%fp2
5964 fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2**(-L)*(2/PI)
5965
5966#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
5967#--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
5968#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
5969#--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
5970#--US THE DESIRED VALUE IN FLOATING POINT.
5971 mov.l %a1,%d2
5972 swap %d2
5973 and.l &0x80000000,%d2
5974 or.l &0x5F000000,%d2 # d2 = SIGN(INARG)*2**63 IN SGL
5975 mov.l %d2,TWOTO63(%a6)
5976 fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED
5977 fsub.s TWOTO63(%a6),%fp2 # fp2 = N
5978# fintrz.x %fp2,%fp2
5979
5980#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
5981 mov.l %d1,%d2 # d2 = L
5982
5983 add.l &0x00003FFF,%d2 # BIASED EXP OF 2**L * (PI/2)
5984 mov.w %d2,FP_SCR0_EX(%a6)
5985 mov.l &0xC90FDAA2,FP_SCR0_HI(%a6)
5986 clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2**(L) * Piby2_1
5987
5988 add.l &0x00003FDD,%d1
5989 mov.w %d1,FP_SCR1_EX(%a6)
5990 mov.l &0x85A308D3,FP_SCR1_HI(%a6)
5991 clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2**(L) * Piby2_2
5992
5993 mov.b ENDFLAG(%a6),%d1
5994
5995#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
5996#--P2 = 2**(L) * Piby2_2
5997 fmov.x %fp2,%fp4 # fp4 = N
5998 fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P1
5999 fmov.x %fp2,%fp5 # fp5 = N
6000 fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P2
6001 fmov.x %fp4,%fp3 # fp3 = W = N*P1
6002
6003#--we want P+p = W+w but |p| <= half ulp of P
6004#--Then, we need to compute A := R-P and a := r-p
6005 fadd.x %fp5,%fp3 # fp3 = P
6006 fsub.x %fp3,%fp4 # fp4 = W-P
6007
6008 fsub.x %fp3,%fp0 # fp0 = A := R - P
6009 fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w
6010
6011 fmov.x %fp0,%fp3 # fp3 = A
6012 fsub.x %fp4,%fp1 # fp1 = a := r - p
6013
6014#--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
6015#--|r| <= half ulp of R.
6016 fadd.x %fp1,%fp0 # fp0 = R := A+a
6017#--No need to calculate r if this is the last loop
6018 cmp.b %d1,&0
6019 bgt.w RESTORE
6020
6021#--Need to calculate r
6022 fsub.x %fp0,%fp3 # fp3 = A-R
6023 fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a
6024 bra.w LOOP
6025
6026RESTORE:
6027 fmov.l %fp2,INT(%a6)
6028 mov.l (%sp)+,%d2 # restore d2
6029 fmovm.x (%sp)+,&0x3c # restore {fp2-fp5}
6030
6031 mov.l INT(%a6),%d1
6032 ror.l &1,%d1
6033
6034 bra.w TANCONT
6035
6036#########################################################################
6037# satan(): computes the arctangent of a normalized number #
6038# satand(): computes the arctangent of a denormalized number #
6039# #
6040# INPUT *************************************************************** #
6041# a0 = pointer to extended precision input #
6042# d0 = round precision,mode #
6043# #
6044# OUTPUT ************************************************************** #
6045# fp0 = arctan(X) #
6046# #
6047# ACCURACY and MONOTONICITY ******************************************* #
6048# The returned result is within 2 ulps in 64 significant bit, #
6049# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6050# rounded to double precision. The result is provably monotonic #
6051# in double precision. #
6052# #
6053# ALGORITHM *********************************************************** #
6054# Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5. #
6055# #
6056# Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. #
6057# Note that k = -4, -3,..., or 3. #
6058# Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 #
6059# significant bits of X with a bit-1 attached at the 6-th #
6060# bit position. Define u to be u = (X-F) / (1 + X*F). #
6061# #
6062# Step 3. Approximate arctan(u) by a polynomial poly. #
6063# #
6064# Step 4. Return arctan(F) + poly, arctan(F) is fetched from a #
6065# table of values calculated beforehand. Exit. #
6066# #
6067# Step 5. If |X| >= 16, go to Step 7. #
6068# #
6069# Step 6. Approximate arctan(X) by an odd polynomial in X. Exit. #
6070# #
6071# Step 7. Define X' = -1/X. Approximate arctan(X') by an odd #
6072# polynomial in X'. #
6073# Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit. #
6074# #
6075#########################################################################
6076
6077ATANA3: long 0xBFF6687E,0x314987D8
6078ATANA2: long 0x4002AC69,0x34A26DB3
6079ATANA1: long 0xBFC2476F,0x4E1DA28E
6080
6081ATANB6: long 0x3FB34444,0x7F876989
6082ATANB5: long 0xBFB744EE,0x7FAF45DB
6083ATANB4: long 0x3FBC71C6,0x46940220
6084ATANB3: long 0xBFC24924,0x921872F9
6085ATANB2: long 0x3FC99999,0x99998FA9
6086ATANB1: long 0xBFD55555,0x55555555
6087
6088ATANC5: long 0xBFB70BF3,0x98539E6A
6089ATANC4: long 0x3FBC7187,0x962D1D7D
6090ATANC3: long 0xBFC24924,0x827107B8
6091ATANC2: long 0x3FC99999,0x9996263E
6092ATANC1: long 0xBFD55555,0x55555536
6093
6094PPIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
6095NPIBY2: long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
6096
6097PTINY: long 0x00010000,0x80000000,0x00000000,0x00000000
6098NTINY: long 0x80010000,0x80000000,0x00000000,0x00000000
6099
6100ATANTBL:
6101 long 0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
6102 long 0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
6103 long 0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
6104 long 0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
6105 long 0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
6106 long 0x3FFB0000,0xAB98E943,0x62765619,0x00000000
6107 long 0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
6108 long 0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
6109 long 0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
6110 long 0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
6111 long 0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
6112 long 0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
6113 long 0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
6114 long 0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
6115 long 0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
6116 long 0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
6117 long 0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
6118 long 0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
6119 long 0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
6120 long 0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
6121 long 0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
6122 long 0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
6123 long 0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
6124 long 0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
6125 long 0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
6126 long 0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
6127 long 0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
6128 long 0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
6129 long 0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
6130 long 0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
6131 long 0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
6132 long 0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
6133 long 0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
6134 long 0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
6135 long 0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
6136 long 0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
6137 long 0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
6138 long 0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
6139 long 0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
6140 long 0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
6141 long 0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
6142 long 0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
6143 long 0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
6144 long 0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
6145 long 0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
6146 long 0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
6147 long 0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
6148 long 0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
6149 long 0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
6150 long 0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
6151 long 0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
6152 long 0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
6153 long 0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
6154 long 0x3FFE0000,0x97731420,0x365E538C,0x00000000
6155 long 0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
6156 long 0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
6157 long 0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
6158 long 0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
6159 long 0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
6160 long 0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
6161 long 0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
6162 long 0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
6163 long 0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
6164 long 0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
6165 long 0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
6166 long 0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
6167 long 0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
6168 long 0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
6169 long 0x3FFE0000,0xE8771129,0xC4353259,0x00000000
6170 long 0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
6171 long 0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
6172 long 0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
6173 long 0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
6174 long 0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
6175 long 0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
6176 long 0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
6177 long 0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
6178 long 0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
6179 long 0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
6180 long 0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
6181 long 0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
6182 long 0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
6183 long 0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
6184 long 0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
6185 long 0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
6186 long 0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
6187 long 0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
6188 long 0x3FFF0000,0x9F100575,0x006CC571,0x00000000
6189 long 0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
6190 long 0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
6191 long 0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
6192 long 0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
6193 long 0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
6194 long 0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
6195 long 0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
6196 long 0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
6197 long 0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
6198 long 0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
6199 long 0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
6200 long 0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
6201 long 0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
6202 long 0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
6203 long 0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
6204 long 0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
6205 long 0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
6206 long 0x3FFF0000,0xB525529D,0x562246BD,0x00000000
6207 long 0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
6208 long 0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
6209 long 0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
6210 long 0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
6211 long 0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
6212 long 0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
6213 long 0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
6214 long 0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
6215 long 0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
6216 long 0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
6217 long 0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
6218 long 0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
6219 long 0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
6220 long 0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
6221 long 0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
6222 long 0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
6223 long 0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
6224 long 0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
6225 long 0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
6226 long 0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
6227 long 0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
6228 long 0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
6229
6230 set X,FP_SCR0
6231 set XDCARE,X+2
6232 set XFRAC,X+4
6233 set XFRACLO,X+8
6234
6235 set ATANF,FP_SCR1
6236 set ATANFHI,ATANF+4
6237 set ATANFLO,ATANF+8
6238
6239 global satan
6240#--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
6241satan:
6242 fmov.x (%a0),%fp0 # LOAD INPUT
6243
6244 mov.l (%a0),%d1
6245 mov.w 4(%a0),%d1
6246 fmov.x %fp0,X(%a6)
6247 and.l &0x7FFFFFFF,%d1
6248
6249 cmp.l %d1,&0x3FFB8000 # |X| >= 1/16?
6250 bge.b ATANOK1
6251 bra.w ATANSM
6252
6253ATANOK1:
6254 cmp.l %d1,&0x4002FFFF # |X| < 16 ?
6255 ble.b ATANMAIN
6256 bra.w ATANBIG
6257
6258#--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
6259#--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
6260#--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
6261#--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
6262#--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
6263#--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
6264#--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
6265#--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
6266#--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
6267#--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
6268#--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
6269#--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
6270#--WILL INVOLVE A VERY LONG POLYNOMIAL.
6271
6272#--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
6273#--WE CHOSE F TO BE +-2^K * 1.BBBB1
6274#--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
6275#--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
6276#--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
6277#-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
6278
6279ATANMAIN:
6280
6281 and.l &0xF8000000,XFRAC(%a6) # FIRST 5 BITS
6282 or.l &0x04000000,XFRAC(%a6) # SET 6-TH BIT TO 1
6283 mov.l &0x00000000,XFRACLO(%a6) # LOCATION OF X IS NOW F
6284
6285 fmov.x %fp0,%fp1 # FP1 IS X
6286 fmul.x X(%a6),%fp1 # FP1 IS X*F, NOTE THAT X*F > 0
6287 fsub.x X(%a6),%fp0 # FP0 IS X-F
6288 fadd.s &0x3F800000,%fp1 # FP1 IS 1 + X*F
6289 fdiv.x %fp1,%fp0 # FP0 IS U = (X-F)/(1+X*F)
6290
6291#--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
6292#--CREATE ATAN(F) AND STORE IT IN ATANF, AND
6293#--SAVE REGISTERS FP2.
6294
6295 mov.l %d2,-(%sp) # SAVE d2 TEMPORARILY
6296 mov.l %d1,%d2 # THE EXP AND 16 BITS OF X
6297 and.l &0x00007800,%d1 # 4 VARYING BITS OF F'S FRACTION
6298 and.l &0x7FFF0000,%d2 # EXPONENT OF F
6299 sub.l &0x3FFB0000,%d2 # K+4
6300 asr.l &1,%d2
6301 add.l %d2,%d1 # THE 7 BITS IDENTIFYING F
6302 asr.l &7,%d1 # INDEX INTO TBL OF ATAN(|F|)
6303 lea ATANTBL(%pc),%a1
6304 add.l %d1,%a1 # ADDRESS OF ATAN(|F|)
6305 mov.l (%a1)+,ATANF(%a6)
6306 mov.l (%a1)+,ATANFHI(%a6)
6307 mov.l (%a1)+,ATANFLO(%a6) # ATANF IS NOW ATAN(|F|)
6308 mov.l X(%a6),%d1 # LOAD SIGN AND EXPO. AGAIN
6309 and.l &0x80000000,%d1 # SIGN(F)
6310 or.l %d1,ATANF(%a6) # ATANF IS NOW SIGN(F)*ATAN(|F|)
6311 mov.l (%sp)+,%d2 # RESTORE d2
6312
6313#--THAT'S ALL I HAVE TO DO FOR NOW,
6314#--BUT ALAS, THE DIVIDE IS STILL CRANKING!
6315
6316#--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
6317#--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
6318#--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
6319#--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
6320#--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3.
6321#--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
6322#--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
6323
6324 fmovm.x &0x04,-(%sp) # save fp2
6325
6326 fmov.x %fp0,%fp1
6327 fmul.x %fp1,%fp1
6328 fmov.d ATANA3(%pc),%fp2
6329 fadd.x %fp1,%fp2 # A3+V
6330 fmul.x %fp1,%fp2 # V*(A3+V)
6331 fmul.x %fp0,%fp1 # U*V
6332 fadd.d ATANA2(%pc),%fp2 # A2+V*(A3+V)
6333 fmul.d ATANA1(%pc),%fp1 # A1*U*V
6334 fmul.x %fp2,%fp1 # A1*U*V*(A2+V*(A3+V))
6335 fadd.x %fp1,%fp0 # ATAN(U), FP1 RELEASED
6336
6337 fmovm.x (%sp)+,&0x20 # restore fp2
6338
6339 fmov.l %d0,%fpcr # restore users rnd mode,prec
6340 fadd.x ATANF(%a6),%fp0 # ATAN(X)
6341 bra t_inx2
6342
6343ATANBORS:
6344#--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
6345#--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
6346 cmp.l %d1,&0x3FFF8000
6347 bgt.w ATANBIG # I.E. |X| >= 16
6348
6349ATANSM:
6350#--|X| <= 1/16
6351#--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
6352#--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
6353#--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
6354#--WHERE Y = X*X, AND Z = Y*Y.
6355
6356 cmp.l %d1,&0x3FD78000
6357 blt.w ATANTINY
6358
6359#--COMPUTE POLYNOMIAL
6360 fmovm.x &0x0c,-(%sp) # save fp2/fp3
6361
6362 fmul.x %fp0,%fp0 # FPO IS Y = X*X
6363
6364 fmov.x %fp0,%fp1
6365 fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y
6366
6367 fmov.d ATANB6(%pc),%fp2
6368 fmov.d ATANB5(%pc),%fp3
6369
6370 fmul.x %fp1,%fp2 # Z*B6
6371 fmul.x %fp1,%fp3 # Z*B5
6372
6373 fadd.d ATANB4(%pc),%fp2 # B4+Z*B6
6374 fadd.d ATANB3(%pc),%fp3 # B3+Z*B5
6375
6376 fmul.x %fp1,%fp2 # Z*(B4+Z*B6)
6377 fmul.x %fp3,%fp1 # Z*(B3+Z*B5)
6378
6379 fadd.d ATANB2(%pc),%fp2 # B2+Z*(B4+Z*B6)
6380 fadd.d ATANB1(%pc),%fp1 # B1+Z*(B3+Z*B5)
6381
6382 fmul.x %fp0,%fp2 # Y*(B2+Z*(B4+Z*B6))
6383 fmul.x X(%a6),%fp0 # X*Y
6384
6385 fadd.x %fp2,%fp1 # [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
6386
6387 fmul.x %fp1,%fp0 # X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
6388
6389 fmovm.x (%sp)+,&0x30 # restore fp2/fp3
6390
6391 fmov.l %d0,%fpcr # restore users rnd mode,prec
6392 fadd.x X(%a6),%fp0
6393 bra t_inx2
6394
6395ATANTINY:
6396#--|X| < 2^(-40), ATAN(X) = X
6397
6398 fmov.l %d0,%fpcr # restore users rnd mode,prec
6399 mov.b &FMOV_OP,%d1 # last inst is MOVE
6400 fmov.x X(%a6),%fp0 # last inst - possible exception set
6401
6402 bra t_catch
6403
6404ATANBIG:
6405#--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE,
6406#--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
6407 cmp.l %d1,&0x40638000
6408 bgt.w ATANHUGE
6409
6410#--APPROXIMATE ATAN(-1/X) BY
6411#--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
6412#--THIS CAN BE RE-WRITTEN AS
6413#--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
6414
6415 fmovm.x &0x0c,-(%sp) # save fp2/fp3
6416
6417 fmov.s &0xBF800000,%fp1 # LOAD -1
6418 fdiv.x %fp0,%fp1 # FP1 IS -1/X
6419
6420#--DIVIDE IS STILL CRANKING
6421
6422 fmov.x %fp1,%fp0 # FP0 IS X'
6423 fmul.x %fp0,%fp0 # FP0 IS Y = X'*X'
6424 fmov.x %fp1,X(%a6) # X IS REALLY X'
6425
6426 fmov.x %fp0,%fp1
6427 fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y
6428
6429 fmov.d ATANC5(%pc),%fp3
6430 fmov.d ATANC4(%pc),%fp2
6431
6432 fmul.x %fp1,%fp3 # Z*C5
6433 fmul.x %fp1,%fp2 # Z*B4
6434
6435 fadd.d ATANC3(%pc),%fp3 # C3+Z*C5
6436 fadd.d ATANC2(%pc),%fp2 # C2+Z*C4
6437
6438 fmul.x %fp3,%fp1 # Z*(C3+Z*C5), FP3 RELEASED
6439 fmul.x %fp0,%fp2 # Y*(C2+Z*C4)
6440
6441 fadd.d ATANC1(%pc),%fp1 # C1+Z*(C3+Z*C5)
6442 fmul.x X(%a6),%fp0 # X'*Y
6443
6444 fadd.x %fp2,%fp1 # [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
6445
6446 fmul.x %fp1,%fp0 # X'*Y*([B1+Z*(B3+Z*B5)]
6447# ... +[Y*(B2+Z*(B4+Z*B6))])
6448 fadd.x X(%a6),%fp0
6449
6450 fmovm.x (%sp)+,&0x30 # restore fp2/fp3
6451
6452 fmov.l %d0,%fpcr # restore users rnd mode,prec
6453 tst.b (%a0)
6454 bpl.b pos_big
6455
6456neg_big:
6457 fadd.x NPIBY2(%pc),%fp0
6458 bra t_minx2
6459
6460pos_big:
6461 fadd.x PPIBY2(%pc),%fp0
6462 bra t_pinx2
6463
6464ATANHUGE:
6465#--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
6466 tst.b (%a0)
6467 bpl.b pos_huge
6468
6469neg_huge:
6470 fmov.x NPIBY2(%pc),%fp0
6471 fmov.l %d0,%fpcr
6472 fadd.x PTINY(%pc),%fp0
6473 bra t_minx2
6474
6475pos_huge:
6476 fmov.x PPIBY2(%pc),%fp0
6477 fmov.l %d0,%fpcr
6478 fadd.x NTINY(%pc),%fp0
6479 bra t_pinx2
6480
6481 global satand
6482#--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
6483satand:
6484 bra t_extdnrm
6485
6486#########################################################################
6487# sasin(): computes the inverse sine of a normalized input #
6488# sasind(): computes the inverse sine of a denormalized input #
6489# #
6490# INPUT *************************************************************** #
6491# a0 = pointer to extended precision input #
6492# d0 = round precision,mode #
6493# #
6494# OUTPUT ************************************************************** #
6495# fp0 = arcsin(X) #
6496# #
6497# ACCURACY and MONOTONICITY ******************************************* #
6498# The returned result is within 3 ulps in 64 significant bit, #
6499# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6500# rounded to double precision. The result is provably monotonic #
6501# in double precision. #
6502# #
6503# ALGORITHM *********************************************************** #
6504# #
6505# ASIN #
6506# 1. If |X| >= 1, go to 3. #
6507# #
6508# 2. (|X| < 1) Calculate asin(X) by #
6509# z := sqrt( [1-X][1+X] ) #
6510# asin(X) = atan( x / z ). #
6511# Exit. #
6512# #
6513# 3. If |X| > 1, go to 5. #
6514# #
6515# 4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.#
6516# #
6517# 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #
6518# Exit. #
6519# #
6520#########################################################################
6521
6522 global sasin
6523sasin:
6524 fmov.x (%a0),%fp0 # LOAD INPUT
6525
6526 mov.l (%a0),%d1
6527 mov.w 4(%a0),%d1
6528 and.l &0x7FFFFFFF,%d1
6529 cmp.l %d1,&0x3FFF8000
6530 bge.b ASINBIG
6531
6532# This catch is added here for the '060 QSP. Originally, the call to
6533# satan() would handle this case by causing the exception which would
6534# not be caught until gen_except(). Now, with the exceptions being
6535# detected inside of satan(), the exception would have been handled there
6536# instead of inside sasin() as expected.
6537 cmp.l %d1,&0x3FD78000
6538 blt.w ASINTINY
6539
6540#--THIS IS THE USUAL CASE, |X| < 1
6541#--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) )
6542
6543ASINMAIN:
6544 fmov.s &0x3F800000,%fp1
6545 fsub.x %fp0,%fp1 # 1-X
6546 fmovm.x &0x4,-(%sp) # {fp2}
6547 fmov.s &0x3F800000,%fp2
6548 fadd.x %fp0,%fp2 # 1+X
6549 fmul.x %fp2,%fp1 # (1+X)(1-X)
6550 fmovm.x (%sp)+,&0x20 # {fp2}
6551 fsqrt.x %fp1 # SQRT([1-X][1+X])
6552 fdiv.x %fp1,%fp0 # X/SQRT([1-X][1+X])
6553 fmovm.x &0x01,-(%sp) # save X/SQRT(...)
6554 lea (%sp),%a0 # pass ptr to X/SQRT(...)
6555 bsr satan
6556 add.l &0xc,%sp # clear X/SQRT(...) from stack
6557 bra t_inx2
6558
6559ASINBIG:
6560 fabs.x %fp0 # |X|
6561 fcmp.s %fp0,&0x3F800000
6562 fbgt t_operr # cause an operr exception
6563
6564#--|X| = 1, ASIN(X) = +- PI/2.
6565ASINONE:
6566 fmov.x PIBY2(%pc),%fp0
6567 mov.l (%a0),%d1
6568 and.l &0x80000000,%d1 # SIGN BIT OF X
6569 or.l &0x3F800000,%d1 # +-1 IN SGL FORMAT
6570 mov.l %d1,-(%sp) # push SIGN(X) IN SGL-FMT
6571 fmov.l %d0,%fpcr
6572 fmul.s (%sp)+,%fp0
6573 bra t_inx2
6574
6575#--|X| < 2^(-40), ATAN(X) = X
6576ASINTINY:
6577 fmov.l %d0,%fpcr # restore users rnd mode,prec
6578 mov.b &FMOV_OP,%d1 # last inst is MOVE
6579 fmov.x (%a0),%fp0 # last inst - possible exception
6580 bra t_catch
6581
6582 global sasind
6583#--ASIN(X) = X FOR DENORMALIZED X
6584sasind:
6585 bra t_extdnrm
6586
6587#########################################################################
6588# sacos(): computes the inverse cosine of a normalized input #
6589# sacosd(): computes the inverse cosine of a denormalized input #
6590# #
6591# INPUT *************************************************************** #
6592# a0 = pointer to extended precision input #
6593# d0 = round precision,mode #
6594# #
6595# OUTPUT ************************************************************** #
6596# fp0 = arccos(X) #
6597# #
6598# ACCURACY and MONOTONICITY ******************************************* #
6599# The returned result is within 3 ulps in 64 significant bit, #
6600# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6601# rounded to double precision. The result is provably monotonic #
6602# in double precision. #
6603# #
6604# ALGORITHM *********************************************************** #
6605# #
6606# ACOS #
6607# 1. If |X| >= 1, go to 3. #
6608# #
6609# 2. (|X| < 1) Calculate acos(X) by #
6610# z := (1-X) / (1+X) #
6611# acos(X) = 2 * atan( sqrt(z) ). #
6612# Exit. #
6613# #
6614# 3. If |X| > 1, go to 5. #
6615# #
6616# 4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit. #
6617# #
6618# 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #
6619# Exit. #
6620# #
6621#########################################################################
6622
6623 global sacos
6624sacos:
6625 fmov.x (%a0),%fp0 # LOAD INPUT
6626
6627 mov.l (%a0),%d1 # pack exp w/ upper 16 fraction
6628 mov.w 4(%a0),%d1
6629 and.l &0x7FFFFFFF,%d1
6630 cmp.l %d1,&0x3FFF8000
6631 bge.b ACOSBIG
6632
6633#--THIS IS THE USUAL CASE, |X| < 1
6634#--ACOS(X) = 2 * ATAN( SQRT( (1-X)/(1+X) ) )
6635
6636ACOSMAIN:
6637 fmov.s &0x3F800000,%fp1
6638 fadd.x %fp0,%fp1 # 1+X
6639 fneg.x %fp0 # -X
6640 fadd.s &0x3F800000,%fp0 # 1-X
6641 fdiv.x %fp1,%fp0 # (1-X)/(1+X)
6642 fsqrt.x %fp0 # SQRT((1-X)/(1+X))
6643 mov.l %d0,-(%sp) # save original users fpcr
6644 clr.l %d0
6645 fmovm.x &0x01,-(%sp) # save SQRT(...) to stack
6646 lea (%sp),%a0 # pass ptr to sqrt
6647 bsr satan # ATAN(SQRT([1-X]/[1+X]))
6648 add.l &0xc,%sp # clear SQRT(...) from stack
6649
6650 fmov.l (%sp)+,%fpcr # restore users round prec,mode
6651 fadd.x %fp0,%fp0 # 2 * ATAN( STUFF )
6652 bra t_pinx2
6653
6654ACOSBIG:
6655 fabs.x %fp0
6656 fcmp.s %fp0,&0x3F800000
6657 fbgt t_operr # cause an operr exception
6658
6659#--|X| = 1, ACOS(X) = 0 OR PI
6660 tst.b (%a0) # is X positive or negative?
6661 bpl.b ACOSP1
6662
6663#--X = -1
6664#Returns PI and inexact exception
6665ACOSM1:
6666 fmov.x PI(%pc),%fp0 # load PI
6667 fmov.l %d0,%fpcr # load round mode,prec
6668 fadd.s &0x00800000,%fp0 # add a small value
6669 bra t_pinx2
6670
6671ACOSP1:
6672 bra ld_pzero # answer is positive zero
6673
6674 global sacosd
6675#--ACOS(X) = PI/2 FOR DENORMALIZED X
6676sacosd:
6677 fmov.l %d0,%fpcr # load user's rnd mode/prec
6678 fmov.x PIBY2(%pc),%fp0
6679 bra t_pinx2
6680
6681#########################################################################
6682# setox(): computes the exponential for a normalized input #
6683# setoxd(): computes the exponential for a denormalized input #
6684# setoxm1(): computes the exponential minus 1 for a normalized input #
6685# setoxm1d(): computes the exponential minus 1 for a denormalized input #
6686# #
6687# INPUT *************************************************************** #
6688# a0 = pointer to extended precision input #
6689# d0 = round precision,mode #
6690# #
6691# OUTPUT ************************************************************** #
6692# fp0 = exp(X) or exp(X)-1 #
6693# #
6694# ACCURACY and MONOTONICITY ******************************************* #
6695# The returned result is within 0.85 ulps in 64 significant bit, #
6696# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6697# rounded to double precision. The result is provably monotonic #
6698# in double precision. #
6699# #
6700# ALGORITHM and IMPLEMENTATION **************************************** #
6701# #
6702# setoxd #
6703# ------ #
6704# Step 1. Set ans := 1.0 #
6705# #
6706# Step 2. Return ans := ans + sign(X)*2^(-126). Exit. #
6707# Notes: This will always generate one exception -- inexact. #
6708# #
6709# #
6710# setox #
6711# ----- #
6712# #
6713# Step 1. Filter out extreme cases of input argument. #
6714# 1.1 If |X| >= 2^(-65), go to Step 1.3. #
6715# 1.2 Go to Step 7. #
6716# 1.3 If |X| < 16380 log(2), go to Step 2. #
6717# 1.4 Go to Step 8. #
6718# Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.#
6719# To avoid the use of floating-point comparisons, a #
6720# compact representation of |X| is used. This format is a #
6721# 32-bit integer, the upper (more significant) 16 bits #
6722# are the sign and biased exponent field of |X|; the #
6723# lower 16 bits are the 16 most significant fraction #
6724# (including the explicit bit) bits of |X|. Consequently, #
6725# the comparisons in Steps 1.1 and 1.3 can be performed #
6726# by integer comparison. Note also that the constant #
6727# 16380 log(2) used in Step 1.3 is also in the compact #
6728# form. Thus taking the branch to Step 2 guarantees #
6729# |X| < 16380 log(2). There is no harm to have a small #
6730# number of cases where |X| is less than, but close to, #
6731# 16380 log(2) and the branch to Step 9 is taken. #
6732# #
6733# Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). #
6734# 2.1 Set AdjFlag := 0 (indicates the branch 1.3 -> 2 #
6735# was taken) #
6736# 2.2 N := round-to-nearest-integer( X * 64/log2 ). #
6737# 2.3 Calculate J = N mod 64; so J = 0,1,2,..., #
6738# or 63. #
6739# 2.4 Calculate M = (N - J)/64; so N = 64M + J. #
6740# 2.5 Calculate the address of the stored value of #
6741# 2^(J/64). #
6742# 2.6 Create the value Scale = 2^M. #
6743# Notes: The calculation in 2.2 is really performed by #
6744# Z := X * constant #
6745# N := round-to-nearest-integer(Z) #
6746# where #
6747# constant := single-precision( 64/log 2 ). #
6748# #
6749# Using a single-precision constant avoids memory #
6750# access. Another effect of using a single-precision #
6751# "constant" is that the calculated value Z is #
6752# #
6753# Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24). #
6754# #
6755# This error has to be considered later in Steps 3 and 4. #
6756# #
6757# Step 3. Calculate X - N*log2/64. #
6758# 3.1 R := X + N*L1, #
6759# where L1 := single-precision(-log2/64). #
6760# 3.2 R := R + N*L2, #
6761# L2 := extended-precision(-log2/64 - L1).#
6762# Notes: a) The way L1 and L2 are chosen ensures L1+L2 #
6763# approximate the value -log2/64 to 88 bits of accuracy. #
6764# b) N*L1 is exact because N is no longer than 22 bits #
6765# and L1 is no longer than 24 bits. #
6766# c) The calculation X+N*L1 is also exact due to #
6767# cancellation. Thus, R is practically X+N(L1+L2) to full #
6768# 64 bits. #
6769# d) It is important to estimate how large can |R| be #
6770# after Step 3.2. #
6771# #
6772# N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24) #
6773# X*64/log2 (1+eps) = N + f, |f| <= 0.5 #
6774# X*64/log2 - N = f - eps*X 64/log2 #
6775# X - N*log2/64 = f*log2/64 - eps*X #
6776# #
6777# #
6778# Now |X| <= 16446 log2, thus #
6779# #
6780# |X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64 #
6781# <= 0.57 log2/64. #
6782# This bound will be used in Step 4. #
6783# #
6784# Step 4. Approximate exp(R)-1 by a polynomial #
6785# p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) #
6786# Notes: a) In order to reduce memory access, the coefficients #
6787# are made as "short" as possible: A1 (which is 1/2), A4 #
6788# and A5 are single precision; A2 and A3 are double #
6789# precision. #
6790# b) Even with the restrictions above, #
6791# |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062. #
6792# Note that 0.0062 is slightly bigger than 0.57 log2/64. #
6793# c) To fully utilize the pipeline, p is separated into #
6794# two independent pieces of roughly equal complexities #
6795# p = [ R + R*S*(A2 + S*A4) ] + #
6796# [ S*(A1 + S*(A3 + S*A5)) ] #
6797# where S = R*R. #
6798# #
6799# Step 5. Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by #
6800# ans := T + ( T*p + t) #
6801# where T and t are the stored values for 2^(J/64). #
6802# Notes: 2^(J/64) is stored as T and t where T+t approximates #
6803# 2^(J/64) to roughly 85 bits; T is in extended precision #
6804# and t is in single precision. Note also that T is #
6805# rounded to 62 bits so that the last two bits of T are #
6806# zero. The reason for such a special form is that T-1, #
6807# T-2, and T-8 will all be exact --- a property that will #
6808# give much more accurate computation of the function #
6809# EXPM1. #
6810# #
6811# Step 6. Reconstruction of exp(X) #
6812# exp(X) = 2^M * 2^(J/64) * exp(R). #
6813# 6.1 If AdjFlag = 0, go to 6.3 #
6814# 6.2 ans := ans * AdjScale #
6815# 6.3 Restore the user FPCR #
6816# 6.4 Return ans := ans * Scale. Exit. #
6817# Notes: If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R, #
6818# |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will #
6819# neither overflow nor underflow. If AdjFlag = 1, that #
6820# means that #
6821# X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380. #
6822# Hence, exp(X) may overflow or underflow or neither. #
6823# When that is the case, AdjScale = 2^(M1) where M1 is #
6824# approximately M. Thus 6.2 will never cause #
6825# over/underflow. Possible exception in 6.4 is overflow #
6826# or underflow. The inexact exception is not generated in #
6827# 6.4. Although one can argue that the inexact flag #
6828# should always be raised, to simulate that exception #
6829# cost to much than the flag is worth in practical uses. #
6830# #
6831# Step 7. Return 1 + X. #
6832# 7.1 ans := X #
6833# 7.2 Restore user FPCR. #
6834# 7.3 Return ans := 1 + ans. Exit #
6835# Notes: For non-zero X, the inexact exception will always be #
6836# raised by 7.3. That is the only exception raised by 7.3.#
6837# Note also that we use the FMOVEM instruction to move X #
6838# in Step 7.1 to avoid unnecessary trapping. (Although #
6839# the FMOVEM may not seem relevant since X is normalized, #
6840# the precaution will be useful in the library version of #
6841# this code where the separate entry for denormalized #
6842# inputs will be done away with.) #
6843# #
6844# Step 8. Handle exp(X) where |X| >= 16380log2. #
6845# 8.1 If |X| > 16480 log2, go to Step 9. #
6846# (mimic 2.2 - 2.6) #
6847# 8.2 N := round-to-integer( X * 64/log2 ) #
6848# 8.3 Calculate J = N mod 64, J = 0,1,...,63 #
6849# 8.4 K := (N-J)/64, M1 := truncate(K/2), M = K-M1, #
6850# AdjFlag := 1. #
6851# 8.5 Calculate the address of the stored value #
6852# 2^(J/64). #
6853# 8.6 Create the values Scale = 2^M, AdjScale = 2^M1. #
6854# 8.7 Go to Step 3. #
6855# Notes: Refer to notes for 2.2 - 2.6. #
6856# #
6857# Step 9. Handle exp(X), |X| > 16480 log2. #
6858# 9.1 If X < 0, go to 9.3 #
6859# 9.2 ans := Huge, go to 9.4 #
6860# 9.3 ans := Tiny. #
6861# 9.4 Restore user FPCR. #
6862# 9.5 Return ans := ans * ans. Exit. #
6863# Notes: Exp(X) will surely overflow or underflow, depending on #
6864# X's sign. "Huge" and "Tiny" are respectively large/tiny #
6865# extended-precision numbers whose square over/underflow #
6866# with an inexact result. Thus, 9.5 always raises the #
6867# inexact together with either overflow or underflow. #
6868# #
6869# setoxm1d #
6870# -------- #
6871# #
6872# Step 1. Set ans := 0 #
6873# #
6874# Step 2. Return ans := X + ans. Exit. #
6875# Notes: This will return X with the appropriate rounding #
6876# precision prescribed by the user FPCR. #
6877# #
6878# setoxm1 #
6879# ------- #
6880# #
6881# Step 1. Check |X| #
6882# 1.1 If |X| >= 1/4, go to Step 1.3. #
6883# 1.2 Go to Step 7. #
6884# 1.3 If |X| < 70 log(2), go to Step 2. #
6885# 1.4 Go to Step 10. #
6886# Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.#
6887# However, it is conceivable |X| can be small very often #
6888# because EXPM1 is intended to evaluate exp(X)-1 #
6889# accurately when |X| is small. For further details on #
6890# the comparisons, see the notes on Step 1 of setox. #
6891# #
6892# Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). #
6893# 2.1 N := round-to-nearest-integer( X * 64/log2 ). #
6894# 2.2 Calculate J = N mod 64; so J = 0,1,2,..., #
6895# or 63. #
6896# 2.3 Calculate M = (N - J)/64; so N = 64M + J. #
6897# 2.4 Calculate the address of the stored value of #
6898# 2^(J/64). #
6899# 2.5 Create the values Sc = 2^M and #
6900# OnebySc := -2^(-M). #
6901# Notes: See the notes on Step 2 of setox. #
6902# #
6903# Step 3. Calculate X - N*log2/64. #
6904# 3.1 R := X + N*L1, #
6905# where L1 := single-precision(-log2/64). #
6906# 3.2 R := R + N*L2, #
6907# L2 := extended-precision(-log2/64 - L1).#
6908# Notes: Applying the analysis of Step 3 of setox in this case #
6909# shows that |R| <= 0.0055 (note that |X| <= 70 log2 in #
6910# this case). #
6911# #
6912# Step 4. Approximate exp(R)-1 by a polynomial #
6913# p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6))))) #
6914# Notes: a) In order to reduce memory access, the coefficients #
6915# are made as "short" as possible: A1 (which is 1/2), A5 #
6916# and A6 are single precision; A2, A3 and A4 are double #
6917# precision. #
6918# b) Even with the restriction above, #
6919# |p - (exp(R)-1)| < |R| * 2^(-72.7) #
6920# for all |R| <= 0.0055. #
6921# c) To fully utilize the pipeline, p is separated into #
6922# two independent pieces of roughly equal complexity #
6923# p = [ R*S*(A2 + S*(A4 + S*A6)) ] + #
6924# [ R + S*(A1 + S*(A3 + S*A5)) ] #
6925# where S = R*R. #
6926# #
6927# Step 5. Compute 2^(J/64)*p by #
6928# p := T*p #
6929# where T and t are the stored values for 2^(J/64). #
6930# Notes: 2^(J/64) is stored as T and t where T+t approximates #
6931# 2^(J/64) to roughly 85 bits; T is in extended precision #
6932# and t is in single precision. Note also that T is #
6933# rounded to 62 bits so that the last two bits of T are #
6934# zero. The reason for such a special form is that T-1, #
6935# T-2, and T-8 will all be exact --- a property that will #
6936# be exploited in Step 6 below. The total relative error #
6937# in p is no bigger than 2^(-67.7) compared to the final #
6938# result. #
6939# #
6940# Step 6. Reconstruction of exp(X)-1 #
6941# exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ). #
6942# 6.1 If M <= 63, go to Step 6.3. #
6943# 6.2 ans := T + (p + (t + OnebySc)). Go to 6.6 #
6944# 6.3 If M >= -3, go to 6.5. #
6945# 6.4 ans := (T + (p + t)) + OnebySc. Go to 6.6 #
6946# 6.5 ans := (T + OnebySc) + (p + t). #
6947# 6.6 Restore user FPCR. #
6948# 6.7 Return ans := Sc * ans. Exit. #
6949# Notes: The various arrangements of the expressions give #
6950# accurate evaluations. #
6951# #
6952# Step 7. exp(X)-1 for |X| < 1/4. #
6953# 7.1 If |X| >= 2^(-65), go to Step 9. #
6954# 7.2 Go to Step 8. #
6955# #
6956# Step 8. Calculate exp(X)-1, |X| < 2^(-65). #
6957# 8.1 If |X| < 2^(-16312), goto 8.3 #
6958# 8.2 Restore FPCR; return ans := X - 2^(-16382). #
6959# Exit. #
6960# 8.3 X := X * 2^(140). #
6961# 8.4 Restore FPCR; ans := ans - 2^(-16382). #
6962# Return ans := ans*2^(140). Exit #
6963# Notes: The idea is to return "X - tiny" under the user #
6964# precision and rounding modes. To avoid unnecessary #
6965# inefficiency, we stay away from denormalized numbers #
6966# the best we can. For |X| >= 2^(-16312), the #
6967# straightforward 8.2 generates the inexact exception as #
6968# the case warrants. #
6969# #
6970# Step 9. Calculate exp(X)-1, |X| < 1/4, by a polynomial #
6971# p = X + X*X*(B1 + X*(B2 + ... + X*B12)) #
6972# Notes: a) In order to reduce memory access, the coefficients #
6973# are made as "short" as possible: B1 (which is 1/2), B9 #
6974# to B12 are single precision; B3 to B8 are double #
6975# precision; and B2 is double extended. #
6976# b) Even with the restriction above, #
6977# |p - (exp(X)-1)| < |X| 2^(-70.6) #
6978# for all |X| <= 0.251. #
6979# Note that 0.251 is slightly bigger than 1/4. #
6980# c) To fully preserve accuracy, the polynomial is #
6981# computed as #
6982# X + ( S*B1 + Q ) where S = X*X and #
6983# Q = X*S*(B2 + X*(B3 + ... + X*B12)) #
6984# d) To fully utilize the pipeline, Q is separated into #
6985# two independent pieces of roughly equal complexity #
6986# Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] + #
6987# [ S*S*(B3 + S*(B5 + ... + S*B11)) ] #
6988# #
6989# Step 10. Calculate exp(X)-1 for |X| >= 70 log 2. #
6990# 10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all #
6991# practical purposes. Therefore, go to Step 1 of setox. #
6992# 10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical #
6993# purposes. #
6994# ans := -1 #
6995# Restore user FPCR #
6996# Return ans := ans + 2^(-126). Exit. #
6997# Notes: 10.2 will always create an inexact and return -1 + tiny #
6998# in the user rounding precision and mode. #
6999# #
7000#########################################################################
7001
7002L2: long 0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000
7003
7004EEXPA3: long 0x3FA55555,0x55554CC1
7005EEXPA2: long 0x3FC55555,0x55554A54
7006
7007EM1A4: long 0x3F811111,0x11174385
7008EM1A3: long 0x3FA55555,0x55554F5A
7009
7010EM1A2: long 0x3FC55555,0x55555555,0x00000000,0x00000000
7011
7012EM1B8: long 0x3EC71DE3,0xA5774682
7013EM1B7: long 0x3EFA01A0,0x19D7CB68
7014
7015EM1B6: long 0x3F2A01A0,0x1A019DF3
7016EM1B5: long 0x3F56C16C,0x16C170E2
7017
7018EM1B4: long 0x3F811111,0x11111111
7019EM1B3: long 0x3FA55555,0x55555555
7020
7021EM1B2: long 0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB
7022 long 0x00000000
7023
7024TWO140: long 0x48B00000,0x00000000
7025TWON140:
7026 long 0x37300000,0x00000000
7027
7028EEXPTBL:
7029 long 0x3FFF0000,0x80000000,0x00000000,0x00000000
7030 long 0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B
7031 long 0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9
7032 long 0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369
7033 long 0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C
7034 long 0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F
7035 long 0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729
7036 long 0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF
7037 long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF
7038 long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA
7039 long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051
7040 long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029
7041 long 0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494
7042 long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0
7043 long 0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D
7044 long 0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537
7045 long 0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD
7046 long 0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087
7047 long 0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818
7048 long 0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D
7049 long 0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890
7050 long 0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C
7051 long 0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05
7052 long 0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126
7053 long 0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140
7054 long 0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA
7055 long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A
7056 long 0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC
7057 long 0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC
7058 long 0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610
7059 long 0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90
7060 long 0x3FFF0000,0xB311C412,0xA9112488,0x201F678A
7061 long 0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13
7062 long 0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30
7063 long 0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC
7064 long 0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6
7065 long 0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70
7066 long 0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518
7067 long 0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41
7068 long 0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B
7069 long 0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568
7070 long 0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E
7071 long 0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03
7072 long 0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D
7073 long 0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4
7074 long 0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C
7075 long 0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9
7076 long 0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21
7077 long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F
7078 long 0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F
7079 long 0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207
7080 long 0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175
7081 long 0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B
7082 long 0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5
7083 long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A
7084 long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22
7085 long 0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945
7086 long 0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B
7087 long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3
7088 long 0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05
7089 long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19
7090 long 0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5
7091 long 0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22
7092 long 0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A
7093
7094 set ADJFLAG,L_SCR2
7095 set SCALE,FP_SCR0
7096 set ADJSCALE,FP_SCR1
7097 set SC,FP_SCR0
7098 set ONEBYSC,FP_SCR1
7099
7100 global setox
7101setox:
7102#--entry point for EXP(X), here X is finite, non-zero, and not NaN's
7103
7104#--Step 1.
7105 mov.l (%a0),%d1 # load part of input X
7106 and.l &0x7FFF0000,%d1 # biased expo. of X
7107 cmp.l %d1,&0x3FBE0000 # 2^(-65)
7108 bge.b EXPC1 # normal case
7109 bra EXPSM
7110
7111EXPC1:
7112#--The case |X| >= 2^(-65)
7113 mov.w 4(%a0),%d1 # expo. and partial sig. of |X|
7114 cmp.l %d1,&0x400CB167 # 16380 log2 trunc. 16 bits
7115 blt.b EXPMAIN # normal case
7116 bra EEXPBIG
7117
7118EXPMAIN:
7119#--Step 2.
7120#--This is the normal branch: 2^(-65) <= |X| < 16380 log2.
7121 fmov.x (%a0),%fp0 # load input from (a0)
7122
7123 fmov.x %fp0,%fp1
7124 fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X
7125 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
7126 mov.l &0,ADJFLAG(%a6)
7127 fmov.l %fp0,%d1 # N = int( X * 64/log2 )
7128 lea EEXPTBL(%pc),%a1
7129 fmov.l %d1,%fp0 # convert to floating-format
7130
7131 mov.l %d1,L_SCR1(%a6) # save N temporarily
7132 and.l &0x3F,%d1 # D0 is J = N mod 64
7133 lsl.l &4,%d1
7134 add.l %d1,%a1 # address of 2^(J/64)
7135 mov.l L_SCR1(%a6),%d1
7136 asr.l &6,%d1 # D0 is M
7137 add.w &0x3FFF,%d1 # biased expo. of 2^(M)
7138 mov.w L2(%pc),L_SCR1(%a6) # prefetch L2, no need in CB
7139
7140EXPCONT1:
7141#--Step 3.
7142#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
7143#--a0 points to 2^(J/64), D0 is biased expo. of 2^(M)
7144 fmov.x %fp0,%fp2
7145 fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64)
7146 fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/64
7147 fadd.x %fp1,%fp0 # X + N*L1
7148 fadd.x %fp2,%fp0 # fp0 is R, reduced arg.
7149
7150#--Step 4.
7151#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
7152#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
7153#--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
7154#--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))]
7155
7156 fmov.x %fp0,%fp1
7157 fmul.x %fp1,%fp1 # fp1 IS S = R*R
7158
7159 fmov.s &0x3AB60B70,%fp2 # fp2 IS A5
7160
7161 fmul.x %fp1,%fp2 # fp2 IS S*A5
7162 fmov.x %fp1,%fp3
7163 fmul.s &0x3C088895,%fp3 # fp3 IS S*A4
7164
7165 fadd.d EEXPA3(%pc),%fp2 # fp2 IS A3+S*A5
7166 fadd.d EEXPA2(%pc),%fp3 # fp3 IS A2+S*A4
7167
7168 fmul.x %fp1,%fp2 # fp2 IS S*(A3+S*A5)
7169 mov.w %d1,SCALE(%a6) # SCALE is 2^(M) in extended
7170 mov.l &0x80000000,SCALE+4(%a6)
7171 clr.l SCALE+8(%a6)
7172
7173 fmul.x %fp1,%fp3 # fp3 IS S*(A2+S*A4)
7174
7175 fadd.s &0x3F000000,%fp2 # fp2 IS A1+S*(A3+S*A5)
7176 fmul.x %fp0,%fp3 # fp3 IS R*S*(A2+S*A4)
7177
7178 fmul.x %fp1,%fp2 # fp2 IS S*(A1+S*(A3+S*A5))
7179 fadd.x %fp3,%fp0 # fp0 IS R+R*S*(A2+S*A4),
7180
7181 fmov.x (%a1)+,%fp1 # fp1 is lead. pt. of 2^(J/64)
7182 fadd.x %fp2,%fp0 # fp0 is EXP(R) - 1
7183
7184#--Step 5
7185#--final reconstruction process
7186#--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) )
7187
7188 fmul.x %fp1,%fp0 # 2^(J/64)*(Exp(R)-1)
7189 fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3}
7190 fadd.s (%a1),%fp0 # accurate 2^(J/64)
7191
7192 fadd.x %fp1,%fp0 # 2^(J/64) + 2^(J/64)*...
7193 mov.l ADJFLAG(%a6),%d1
7194
7195#--Step 6
7196 tst.l %d1
7197 beq.b NORMAL
7198ADJUST:
7199 fmul.x ADJSCALE(%a6),%fp0
7200NORMAL:
7201 fmov.l %d0,%fpcr # restore user FPCR
7202 mov.b &FMUL_OP,%d1 # last inst is MUL
7203 fmul.x SCALE(%a6),%fp0 # multiply 2^(M)
7204 bra t_catch
7205
7206EXPSM:
7207#--Step 7
7208 fmovm.x (%a0),&0x80 # load X
7209 fmov.l %d0,%fpcr
7210 fadd.s &0x3F800000,%fp0 # 1+X in user mode
7211 bra t_pinx2
7212
7213EEXPBIG:
7214#--Step 8
7215 cmp.l %d1,&0x400CB27C # 16480 log2
7216 bgt.b EXP2BIG
7217#--Steps 8.2 -- 8.6
7218 fmov.x (%a0),%fp0 # load input from (a0)
7219
7220 fmov.x %fp0,%fp1
7221 fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X
7222 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
7223 mov.l &1,ADJFLAG(%a6)
7224 fmov.l %fp0,%d1 # N = int( X * 64/log2 )
7225 lea EEXPTBL(%pc),%a1
7226 fmov.l %d1,%fp0 # convert to floating-format
7227 mov.l %d1,L_SCR1(%a6) # save N temporarily
7228 and.l &0x3F,%d1 # D0 is J = N mod 64
7229 lsl.l &4,%d1
7230 add.l %d1,%a1 # address of 2^(J/64)
7231 mov.l L_SCR1(%a6),%d1
7232 asr.l &6,%d1 # D0 is K
7233 mov.l %d1,L_SCR1(%a6) # save K temporarily
7234 asr.l &1,%d1 # D0 is M1
7235 sub.l %d1,L_SCR1(%a6) # a1 is M
7236 add.w &0x3FFF,%d1 # biased expo. of 2^(M1)
7237 mov.w %d1,ADJSCALE(%a6) # ADJSCALE := 2^(M1)
7238 mov.l &0x80000000,ADJSCALE+4(%a6)
7239 clr.l ADJSCALE+8(%a6)
7240 mov.l L_SCR1(%a6),%d1 # D0 is M
7241 add.w &0x3FFF,%d1 # biased expo. of 2^(M)
7242 bra.w EXPCONT1 # go back to Step 3
7243
7244EXP2BIG:
7245#--Step 9
7246 tst.b (%a0) # is X positive or negative?
7247 bmi t_unfl2
7248 bra t_ovfl2
7249
7250 global setoxd
7251setoxd:
7252#--entry point for EXP(X), X is denormalized
7253 mov.l (%a0),-(%sp)
7254 andi.l &0x80000000,(%sp)
7255 ori.l &0x00800000,(%sp) # sign(X)*2^(-126)
7256
7257 fmov.s &0x3F800000,%fp0
7258
7259 fmov.l %d0,%fpcr
7260 fadd.s (%sp)+,%fp0
7261 bra t_pinx2
7262
7263 global setoxm1
7264setoxm1:
7265#--entry point for EXPM1(X), here X is finite, non-zero, non-NaN
7266
7267#--Step 1.
7268#--Step 1.1
7269 mov.l (%a0),%d1 # load part of input X
7270 and.l &0x7FFF0000,%d1 # biased expo. of X
7271 cmp.l %d1,&0x3FFD0000 # 1/4
7272 bge.b EM1CON1 # |X| >= 1/4
7273 bra EM1SM
7274
7275EM1CON1:
7276#--Step 1.3
7277#--The case |X| >= 1/4
7278 mov.w 4(%a0),%d1 # expo. and partial sig. of |X|
7279 cmp.l %d1,&0x4004C215 # 70log2 rounded up to 16 bits
7280 ble.b EM1MAIN # 1/4 <= |X| <= 70log2
7281 bra EM1BIG
7282
7283EM1MAIN:
7284#--Step 2.
7285#--This is the case: 1/4 <= |X| <= 70 log2.
7286 fmov.x (%a0),%fp0 # load input from (a0)
7287
7288 fmov.x %fp0,%fp1
7289 fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X
7290 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
7291 fmov.l %fp0,%d1 # N = int( X * 64/log2 )
7292 lea EEXPTBL(%pc),%a1
7293 fmov.l %d1,%fp0 # convert to floating-format
7294
7295 mov.l %d1,L_SCR1(%a6) # save N temporarily
7296 and.l &0x3F,%d1 # D0 is J = N mod 64
7297 lsl.l &4,%d1
7298 add.l %d1,%a1 # address of 2^(J/64)
7299 mov.l L_SCR1(%a6),%d1
7300 asr.l &6,%d1 # D0 is M
7301 mov.l %d1,L_SCR1(%a6) # save a copy of M
7302
7303#--Step 3.
7304#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
7305#--a0 points to 2^(J/64), D0 and a1 both contain M
7306 fmov.x %fp0,%fp2
7307 fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64)
7308 fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/64
7309 fadd.x %fp1,%fp0 # X + N*L1
7310 fadd.x %fp2,%fp0 # fp0 is R, reduced arg.
7311 add.w &0x3FFF,%d1 # D0 is biased expo. of 2^M
7312
7313#--Step 4.
7314#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
7315#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6)))))
7316#--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
7317#--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))]
7318
7319 fmov.x %fp0,%fp1
7320 fmul.x %fp1,%fp1 # fp1 IS S = R*R
7321
7322 fmov.s &0x3950097B,%fp2 # fp2 IS a6
7323
7324 fmul.x %fp1,%fp2 # fp2 IS S*A6
7325 fmov.x %fp1,%fp3
7326 fmul.s &0x3AB60B6A,%fp3 # fp3 IS S*A5
7327
7328 fadd.d EM1A4(%pc),%fp2 # fp2 IS A4+S*A6
7329 fadd.d EM1A3(%pc),%fp3 # fp3 IS A3+S*A5
7330 mov.w %d1,SC(%a6) # SC is 2^(M) in extended
7331 mov.l &0x80000000,SC+4(%a6)
7332 clr.l SC+8(%a6)
7333
7334 fmul.x %fp1,%fp2 # fp2 IS S*(A4+S*A6)
7335 mov.l L_SCR1(%a6),%d1 # D0 is M
7336 neg.w %d1 # D0 is -M
7337 fmul.x %fp1,%fp3 # fp3 IS S*(A3+S*A5)
7338 add.w &0x3FFF,%d1 # biased expo. of 2^(-M)
7339 fadd.d EM1A2(%pc),%fp2 # fp2 IS A2+S*(A4+S*A6)
7340 fadd.s &0x3F000000,%fp3 # fp3 IS A1+S*(A3+S*A5)
7341
7342 fmul.x %fp1,%fp2 # fp2 IS S*(A2+S*(A4+S*A6))
7343 or.w &0x8000,%d1 # signed/expo. of -2^(-M)
7344 mov.w %d1,ONEBYSC(%a6) # OnebySc is -2^(-M)
7345 mov.l &0x80000000,ONEBYSC+4(%a6)
7346 clr.l ONEBYSC+8(%a6)
7347 fmul.x %fp3,%fp1 # fp1 IS S*(A1+S*(A3+S*A5))
7348
7349 fmul.x %fp0,%fp2 # fp2 IS R*S*(A2+S*(A4+S*A6))
7350 fadd.x %fp1,%fp0 # fp0 IS R+S*(A1+S*(A3+S*A5))
7351
7352 fadd.x %fp2,%fp0 # fp0 IS EXP(R)-1
7353
7354 fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3}
7355
7356#--Step 5
7357#--Compute 2^(J/64)*p
7358
7359 fmul.x (%a1),%fp0 # 2^(J/64)*(Exp(R)-1)
7360
7361#--Step 6
7362#--Step 6.1
7363 mov.l L_SCR1(%a6),%d1 # retrieve M
7364 cmp.l %d1,&63
7365 ble.b MLE63
7366#--Step 6.2 M >= 64
7367 fmov.s 12(%a1),%fp1 # fp1 is t
7368 fadd.x ONEBYSC(%a6),%fp1 # fp1 is t+OnebySc
7369 fadd.x %fp1,%fp0 # p+(t+OnebySc), fp1 released
7370 fadd.x (%a1),%fp0 # T+(p+(t+OnebySc))
7371 bra EM1SCALE
7372MLE63:
7373#--Step 6.3 M <= 63
7374 cmp.l %d1,&-3
7375 bge.b MGEN3
7376MLTN3:
7377#--Step 6.4 M <= -4
7378 fadd.s 12(%a1),%fp0 # p+t
7379 fadd.x (%a1),%fp0 # T+(p+t)
7380 fadd.x ONEBYSC(%a6),%fp0 # OnebySc + (T+(p+t))
7381 bra EM1SCALE
7382MGEN3:
7383#--Step 6.5 -3 <= M <= 63
7384 fmov.x (%a1)+,%fp1 # fp1 is T
7385 fadd.s (%a1),%fp0 # fp0 is p+t
7386 fadd.x ONEBYSC(%a6),%fp1 # fp1 is T+OnebySc
7387 fadd.x %fp1,%fp0 # (T+OnebySc)+(p+t)
7388
7389EM1SCALE:
7390#--Step 6.6
7391 fmov.l %d0,%fpcr
7392 fmul.x SC(%a6),%fp0
7393 bra t_inx2
7394
7395EM1SM:
7396#--Step 7 |X| < 1/4.
7397 cmp.l %d1,&0x3FBE0000 # 2^(-65)
7398 bge.b EM1POLY
7399
7400EM1TINY:
7401#--Step 8 |X| < 2^(-65)
7402 cmp.l %d1,&0x00330000 # 2^(-16312)
7403 blt.b EM12TINY
7404#--Step 8.2
7405 mov.l &0x80010000,SC(%a6) # SC is -2^(-16382)
7406 mov.l &0x80000000,SC+4(%a6)
7407 clr.l SC+8(%a6)
7408 fmov.x (%a0),%fp0
7409 fmov.l %d0,%fpcr
7410 mov.b &FADD_OP,%d1 # last inst is ADD
7411 fadd.x SC(%a6),%fp0
7412 bra t_catch
7413
7414EM12TINY:
7415#--Step 8.3
7416 fmov.x (%a0),%fp0
7417 fmul.d TWO140(%pc),%fp0
7418 mov.l &0x80010000,SC(%a6)
7419 mov.l &0x80000000,SC+4(%a6)
7420 clr.l SC+8(%a6)
7421 fadd.x SC(%a6),%fp0
7422 fmov.l %d0,%fpcr
7423 mov.b &FMUL_OP,%d1 # last inst is MUL
7424 fmul.d TWON140(%pc),%fp0
7425 bra t_catch
7426
7427EM1POLY:
7428#--Step 9 exp(X)-1 by a simple polynomial
7429 fmov.x (%a0),%fp0 # fp0 is X
7430 fmul.x %fp0,%fp0 # fp0 is S := X*X
7431 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
7432 fmov.s &0x2F30CAA8,%fp1 # fp1 is B12
7433 fmul.x %fp0,%fp1 # fp1 is S*B12
7434 fmov.s &0x310F8290,%fp2 # fp2 is B11
7435 fadd.s &0x32D73220,%fp1 # fp1 is B10+S*B12
7436
7437 fmul.x %fp0,%fp2 # fp2 is S*B11
7438 fmul.x %fp0,%fp1 # fp1 is S*(B10 + ...
7439
7440 fadd.s &0x3493F281,%fp2 # fp2 is B9+S*...
7441 fadd.d EM1B8(%pc),%fp1 # fp1 is B8+S*...
7442
7443 fmul.x %fp0,%fp2 # fp2 is S*(B9+...
7444 fmul.x %fp0,%fp1 # fp1 is S*(B8+...
7445
7446 fadd.d EM1B7(%pc),%fp2 # fp2 is B7+S*...
7447 fadd.d EM1B6(%pc),%fp1 # fp1 is B6+S*...
7448
7449 fmul.x %fp0,%fp2 # fp2 is S*(B7+...
7450 fmul.x %fp0,%fp1 # fp1 is S*(B6+...
7451
7452 fadd.d EM1B5(%pc),%fp2 # fp2 is B5+S*...
7453 fadd.d EM1B4(%pc),%fp1 # fp1 is B4+S*...
7454
7455 fmul.x %fp0,%fp2 # fp2 is S*(B5+...
7456 fmul.x %fp0,%fp1 # fp1 is S*(B4+...
7457
7458 fadd.d EM1B3(%pc),%fp2 # fp2 is B3+S*...
7459 fadd.x EM1B2(%pc),%fp1 # fp1 is B2+S*...
7460
7461 fmul.x %fp0,%fp2 # fp2 is S*(B3+...
7462 fmul.x %fp0,%fp1 # fp1 is S*(B2+...
7463
7464 fmul.x %fp0,%fp2 # fp2 is S*S*(B3+...)
7465 fmul.x (%a0),%fp1 # fp1 is X*S*(B2...
7466
7467 fmul.s &0x3F000000,%fp0 # fp0 is S*B1
7468 fadd.x %fp2,%fp1 # fp1 is Q
7469
7470 fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3}
7471
7472 fadd.x %fp1,%fp0 # fp0 is S*B1+Q
7473
7474 fmov.l %d0,%fpcr
7475 fadd.x (%a0),%fp0
7476 bra t_inx2
7477
7478EM1BIG:
7479#--Step 10 |X| > 70 log2
7480 mov.l (%a0),%d1
7481 cmp.l %d1,&0
7482 bgt.w EXPC1
7483#--Step 10.2
7484 fmov.s &0xBF800000,%fp0 # fp0 is -1
7485 fmov.l %d0,%fpcr
7486 fadd.s &0x00800000,%fp0 # -1 + 2^(-126)
7487 bra t_minx2
7488
7489 global setoxm1d
7490setoxm1d:
7491#--entry point for EXPM1(X), here X is denormalized
7492#--Step 0.
7493 bra t_extdnrm
7494
7495#########################################################################
7496# sgetexp(): returns the exponent portion of the input argument. #
7497# The exponent bias is removed and the exponent value is #
7498# returned as an extended precision number in fp0. #
7499# sgetexpd(): handles denormalized numbers. #
7500# #
7501# sgetman(): extracts the mantissa of the input argument. The #
7502# mantissa is converted to an extended precision number w/ #
7503# an exponent of $3fff and is returned in fp0. The range of #
7504# the result is [1.0 - 2.0). #
7505# sgetmand(): handles denormalized numbers. #
7506# #
7507# INPUT *************************************************************** #
7508# a0 = pointer to extended precision input #
7509# #
7510# OUTPUT ************************************************************** #
7511# fp0 = exponent(X) or mantissa(X) #
7512# #
7513#########################################################################
7514
7515 global sgetexp
7516sgetexp:
7517 mov.w SRC_EX(%a0),%d0 # get the exponent
7518 bclr &0xf,%d0 # clear the sign bit
7519 subi.w &0x3fff,%d0 # subtract off the bias
7520 fmov.w %d0,%fp0 # return exp in fp0
7521 blt.b sgetexpn # it's negative
7522 rts
7523
7524sgetexpn:
7525 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
7526 rts
7527
7528 global sgetexpd
7529sgetexpd:
7530 bsr.l norm # normalize
7531 neg.w %d0 # new exp = -(shft amt)
7532 subi.w &0x3fff,%d0 # subtract off the bias
7533 fmov.w %d0,%fp0 # return exp in fp0
7534 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
7535 rts
7536
7537 global sgetman
7538sgetman:
7539 mov.w SRC_EX(%a0),%d0 # get the exp
7540 ori.w &0x7fff,%d0 # clear old exp
7541 bclr &0xe,%d0 # make it the new exp +-3fff
7542
7543# here, we build the result in a tmp location so as not to disturb the input
7544 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy to tmp loc
7545 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy to tmp loc
7546 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
7547 fmov.x FP_SCR0(%a6),%fp0 # put new value back in fp0
7548 bmi.b sgetmann # it's negative
7549 rts
7550
7551sgetmann:
7552 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
7553 rts
7554
7555#
7556# For denormalized numbers, shift the mantissa until the j-bit = 1,
7557# then load the exponent with +/1 $3fff.
7558#
7559 global sgetmand
7560sgetmand:
7561 bsr.l norm # normalize exponent
7562 bra.b sgetman
7563
7564#########################################################################
7565# scosh(): computes the hyperbolic cosine of a normalized input #
7566# scoshd(): computes the hyperbolic cosine of a denormalized input #
7567# #
7568# INPUT *************************************************************** #
7569# a0 = pointer to extended precision input #
7570# d0 = round precision,mode #
7571# #
7572# OUTPUT ************************************************************** #
7573# fp0 = cosh(X) #
7574# #
7575# ACCURACY and MONOTONICITY ******************************************* #
7576# The returned result is within 3 ulps in 64 significant bit, #
7577# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7578# rounded to double precision. The result is provably monotonic #
7579# in double precision. #
7580# #
7581# ALGORITHM *********************************************************** #
7582# #
7583# COSH #
7584# 1. If |X| > 16380 log2, go to 3. #
7585# #
7586# 2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae #
7587# y = |X|, z = exp(Y), and #
7588# cosh(X) = (1/2)*( z + 1/z ). #
7589# Exit. #
7590# #
7591# 3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5. #
7592# #
7593# 4. (16380 log2 < |X| <= 16480 log2) #
7594# cosh(X) = sign(X) * exp(|X|)/2. #
7595# However, invoking exp(|X|) may cause premature #
7596# overflow. Thus, we calculate sinh(X) as follows: #
7597# Y := |X| #
7598# Fact := 2**(16380) #
7599# Y' := Y - 16381 log2 #
7600# cosh(X) := Fact * exp(Y'). #
7601# Exit. #
7602# #
7603# 5. (|X| > 16480 log2) sinh(X) must overflow. Return #
7604# Huge*Huge to generate overflow and an infinity with #
7605# the appropriate sign. Huge is the largest finite number #
7606# in extended format. Exit. #
7607# #
7608#########################################################################
7609
7610TWO16380:
7611 long 0x7FFB0000,0x80000000,0x00000000,0x00000000
7612
7613 global scosh
7614scosh:
7615 fmov.x (%a0),%fp0 # LOAD INPUT
7616
7617 mov.l (%a0),%d1
7618 mov.w 4(%a0),%d1
7619 and.l &0x7FFFFFFF,%d1
7620 cmp.l %d1,&0x400CB167
7621 bgt.b COSHBIG
7622
7623#--THIS IS THE USUAL CASE, |X| < 16380 LOG2
7624#--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) )
7625
7626 fabs.x %fp0 # |X|
7627
7628 mov.l %d0,-(%sp)
7629 clr.l %d0
7630 fmovm.x &0x01,-(%sp) # save |X| to stack
7631 lea (%sp),%a0 # pass ptr to |X|
7632 bsr setox # FP0 IS EXP(|X|)
7633 add.l &0xc,%sp # erase |X| from stack
7634 fmul.s &0x3F000000,%fp0 # (1/2)EXP(|X|)
7635 mov.l (%sp)+,%d0
7636
7637 fmov.s &0x3E800000,%fp1 # (1/4)
7638 fdiv.x %fp0,%fp1 # 1/(2 EXP(|X|))
7639
7640 fmov.l %d0,%fpcr
7641 mov.b &FADD_OP,%d1 # last inst is ADD
7642 fadd.x %fp1,%fp0
7643 bra t_catch
7644
7645COSHBIG:
7646 cmp.l %d1,&0x400CB2B3
7647 bgt.b COSHHUGE
7648
7649 fabs.x %fp0
7650 fsub.d T1(%pc),%fp0 # (|X|-16381LOG2_LEAD)
7651 fsub.d T2(%pc),%fp0 # |X| - 16381 LOG2, ACCURATE
7652
7653 mov.l %d0,-(%sp)
7654 clr.l %d0
7655 fmovm.x &0x01,-(%sp) # save fp0 to stack
7656 lea (%sp),%a0 # pass ptr to fp0
7657 bsr setox
7658 add.l &0xc,%sp # clear fp0 from stack
7659 mov.l (%sp)+,%d0
7660
7661 fmov.l %d0,%fpcr
7662 mov.b &FMUL_OP,%d1 # last inst is MUL
7663 fmul.x TWO16380(%pc),%fp0
7664 bra t_catch
7665
7666COSHHUGE:
7667 bra t_ovfl2
7668
7669 global scoshd
7670#--COSH(X) = 1 FOR DENORMALIZED X
7671scoshd:
7672 fmov.s &0x3F800000,%fp0
7673
7674 fmov.l %d0,%fpcr
7675 fadd.s &0x00800000,%fp0
7676 bra t_pinx2
7677
7678#########################################################################
7679# ssinh(): computes the hyperbolic sine of a normalized input #
7680# ssinhd(): computes the hyperbolic sine of a denormalized input #
7681# #
7682# INPUT *************************************************************** #
7683# a0 = pointer to extended precision input #
7684# d0 = round precision,mode #
7685# #
7686# OUTPUT ************************************************************** #
7687# fp0 = sinh(X) #
7688# #
7689# ACCURACY and MONOTONICITY ******************************************* #
7690# The returned result is within 3 ulps in 64 significant bit, #
7691# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7692# rounded to double precision. The result is provably monotonic #
7693# in double precision. #
7694# #
7695# ALGORITHM *********************************************************** #
7696# #
7697# SINH #
7698# 1. If |X| > 16380 log2, go to 3. #
7699# #
7700# 2. (|X| <= 16380 log2) Sinh(X) is obtained by the formula #
7701# y = |X|, sgn = sign(X), and z = expm1(Y), #
7702# sinh(X) = sgn*(1/2)*( z + z/(1+z) ). #
7703# Exit. #
7704# #
7705# 3. If |X| > 16480 log2, go to 5. #
7706# #
7707# 4. (16380 log2 < |X| <= 16480 log2) #
7708# sinh(X) = sign(X) * exp(|X|)/2. #
7709# However, invoking exp(|X|) may cause premature overflow. #
7710# Thus, we calculate sinh(X) as follows: #
7711# Y := |X| #
7712# sgn := sign(X) #
7713# sgnFact := sgn * 2**(16380) #
7714# Y' := Y - 16381 log2 #
7715# sinh(X) := sgnFact * exp(Y'). #
7716# Exit. #
7717# #
7718# 5. (|X| > 16480 log2) sinh(X) must overflow. Return #
7719# sign(X)*Huge*Huge to generate overflow and an infinity with #
7720# the appropriate sign. Huge is the largest finite number in #
7721# extended format. Exit. #
7722# #
7723#########################################################################
7724
7725 global ssinh
7726ssinh:
7727 fmov.x (%a0),%fp0 # LOAD INPUT
7728
7729 mov.l (%a0),%d1
7730 mov.w 4(%a0),%d1
7731 mov.l %d1,%a1 # save (compacted) operand
7732 and.l &0x7FFFFFFF,%d1
7733 cmp.l %d1,&0x400CB167
7734 bgt.b SINHBIG
7735
7736#--THIS IS THE USUAL CASE, |X| < 16380 LOG2
7737#--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) )
7738
7739 fabs.x %fp0 # Y = |X|
7740
7741 movm.l &0x8040,-(%sp) # {a1/d0}
7742 fmovm.x &0x01,-(%sp) # save Y on stack
7743 lea (%sp),%a0 # pass ptr to Y
7744 clr.l %d0
7745 bsr setoxm1 # FP0 IS Z = EXPM1(Y)
7746 add.l &0xc,%sp # clear Y from stack
7747 fmov.l &0,%fpcr
7748 movm.l (%sp)+,&0x0201 # {a1/d0}
7749
7750 fmov.x %fp0,%fp1
7751 fadd.s &0x3F800000,%fp1 # 1+Z
7752 fmov.x %fp0,-(%sp)
7753 fdiv.x %fp1,%fp0 # Z/(1+Z)
7754 mov.l %a1,%d1
7755 and.l &0x80000000,%d1
7756 or.l &0x3F000000,%d1
7757 fadd.x (%sp)+,%fp0
7758 mov.l %d1,-(%sp)
7759
7760 fmov.l %d0,%fpcr
7761 mov.b &FMUL_OP,%d1 # last inst is MUL
7762 fmul.s (%sp)+,%fp0 # last fp inst - possible exceptions set
7763 bra t_catch
7764
7765SINHBIG:
7766 cmp.l %d1,&0x400CB2B3
7767 bgt t_ovfl
7768 fabs.x %fp0
7769 fsub.d T1(%pc),%fp0 # (|X|-16381LOG2_LEAD)
7770 mov.l &0,-(%sp)
7771 mov.l &0x80000000,-(%sp)
7772 mov.l %a1,%d1
7773 and.l &0x80000000,%d1
7774 or.l &0x7FFB0000,%d1
7775 mov.l %d1,-(%sp) # EXTENDED FMT
7776 fsub.d T2(%pc),%fp0 # |X| - 16381 LOG2, ACCURATE
7777
7778 mov.l %d0,-(%sp)
7779 clr.l %d0
7780 fmovm.x &0x01,-(%sp) # save fp0 on stack
7781 lea (%sp),%a0 # pass ptr to fp0
7782 bsr setox
7783 add.l &0xc,%sp # clear fp0 from stack
7784
7785 mov.l (%sp)+,%d0
7786 fmov.l %d0,%fpcr
7787 mov.b &FMUL_OP,%d1 # last inst is MUL
7788 fmul.x (%sp)+,%fp0 # possible exception
7789 bra t_catch
7790
7791 global ssinhd
7792#--SINH(X) = X FOR DENORMALIZED X
7793ssinhd:
7794 bra t_extdnrm
7795
7796#########################################################################
7797# stanh(): computes the hyperbolic tangent of a normalized input #
7798# stanhd(): computes the hyperbolic tangent of a denormalized input #
7799# #
7800# INPUT *************************************************************** #
7801# a0 = pointer to extended precision input #
7802# d0 = round precision,mode #
7803# #
7804# OUTPUT ************************************************************** #
7805# fp0 = tanh(X) #
7806# #
7807# ACCURACY and MONOTONICITY ******************************************* #
7808# The returned result is within 3 ulps in 64 significant bit, #
7809# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7810# rounded to double precision. The result is provably monotonic #
7811# in double precision. #
7812# #
7813# ALGORITHM *********************************************************** #
7814# #
7815# TANH #
7816# 1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3. #
7817# #
7818# 2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by #
7819# sgn := sign(X), y := 2|X|, z := expm1(Y), and #
7820# tanh(X) = sgn*( z/(2+z) ). #
7821# Exit. #
7822# #
7823# 3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1, #
7824# go to 7. #
7825# #
7826# 4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6. #
7827# #
7828# 5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by #
7829# sgn := sign(X), y := 2|X|, z := exp(Y), #
7830# tanh(X) = sgn - [ sgn*2/(1+z) ]. #
7831# Exit. #
7832# #
7833# 6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we #
7834# calculate Tanh(X) by #
7835# sgn := sign(X), Tiny := 2**(-126), #
7836# tanh(X) := sgn - sgn*Tiny. #
7837# Exit. #
7838# #
7839# 7. (|X| < 2**(-40)). Tanh(X) = X. Exit. #
7840# #
7841#########################################################################
7842
7843 set X,FP_SCR0
7844 set XFRAC,X+4
7845
7846 set SGN,L_SCR3
7847
7848 set V,FP_SCR0
7849
7850 global stanh
7851stanh:
7852 fmov.x (%a0),%fp0 # LOAD INPUT
7853
7854 fmov.x %fp0,X(%a6)
7855 mov.l (%a0),%d1
7856 mov.w 4(%a0),%d1
7857 mov.l %d1,X(%a6)
7858 and.l &0x7FFFFFFF,%d1
7859 cmp.l %d1, &0x3fd78000 # is |X| < 2^(-40)?
7860 blt.w TANHBORS # yes
7861 cmp.l %d1, &0x3fffddce # is |X| > (5/2)LOG2?
7862 bgt.w TANHBORS # yes
7863
7864#--THIS IS THE USUAL CASE
7865#--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).
7866
7867 mov.l X(%a6),%d1
7868 mov.l %d1,SGN(%a6)
7869 and.l &0x7FFF0000,%d1
7870 add.l &0x00010000,%d1 # EXPONENT OF 2|X|
7871 mov.l %d1,X(%a6)
7872 and.l &0x80000000,SGN(%a6)
7873 fmov.x X(%a6),%fp0 # FP0 IS Y = 2|X|
7874
7875 mov.l %d0,-(%sp)
7876 clr.l %d0
7877 fmovm.x &0x1,-(%sp) # save Y on stack
7878 lea (%sp),%a0 # pass ptr to Y
7879 bsr setoxm1 # FP0 IS Z = EXPM1(Y)
7880 add.l &0xc,%sp # clear Y from stack
7881 mov.l (%sp)+,%d0
7882
7883 fmov.x %fp0,%fp1
7884 fadd.s &0x40000000,%fp1 # Z+2
7885 mov.l SGN(%a6),%d1
7886 fmov.x %fp1,V(%a6)
7887 eor.l %d1,V(%a6)
7888
7889 fmov.l %d0,%fpcr # restore users round prec,mode
7890 fdiv.x V(%a6),%fp0
7891 bra t_inx2
7892
7893TANHBORS:
7894 cmp.l %d1,&0x3FFF8000
7895 blt.w TANHSM
7896
7897 cmp.l %d1,&0x40048AA1
7898 bgt.w TANHHUGE
7899
7900#-- (5/2) LOG2 < |X| < 50 LOG2,
7901#--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X),
7902#--TANH(X) = SGN - SGN*2/[EXP(Y)+1].
7903
7904 mov.l X(%a6),%d1
7905 mov.l %d1,SGN(%a6)
7906 and.l &0x7FFF0000,%d1
7907 add.l &0x00010000,%d1 # EXPO OF 2|X|
7908 mov.l %d1,X(%a6) # Y = 2|X|
7909 and.l &0x80000000,SGN(%a6)
7910 mov.l SGN(%a6),%d1
7911 fmov.x X(%a6),%fp0 # Y = 2|X|
7912
7913 mov.l %d0,-(%sp)
7914 clr.l %d0
7915 fmovm.x &0x01,-(%sp) # save Y on stack
7916 lea (%sp),%a0 # pass ptr to Y
7917 bsr setox # FP0 IS EXP(Y)
7918 add.l &0xc,%sp # clear Y from stack
7919 mov.l (%sp)+,%d0
7920 mov.l SGN(%a6),%d1
7921 fadd.s &0x3F800000,%fp0 # EXP(Y)+1
7922
7923 eor.l &0xC0000000,%d1 # -SIGN(X)*2
7924 fmov.s %d1,%fp1 # -SIGN(X)*2 IN SGL FMT
7925 fdiv.x %fp0,%fp1 # -SIGN(X)2 / [EXP(Y)+1 ]
7926
7927 mov.l SGN(%a6),%d1
7928 or.l &0x3F800000,%d1 # SGN
7929 fmov.s %d1,%fp0 # SGN IN SGL FMT
7930
7931 fmov.l %d0,%fpcr # restore users round prec,mode
7932 mov.b &FADD_OP,%d1 # last inst is ADD
7933 fadd.x %fp1,%fp0
7934 bra t_inx2
7935
7936TANHSM:
7937 fmov.l %d0,%fpcr # restore users round prec,mode
7938 mov.b &FMOV_OP,%d1 # last inst is MOVE
7939 fmov.x X(%a6),%fp0 # last inst - possible exception set
7940 bra t_catch
7941
7942#---RETURN SGN(X) - SGN(X)EPS
7943TANHHUGE:
7944 mov.l X(%a6),%d1
7945 and.l &0x80000000,%d1
7946 or.l &0x3F800000,%d1
7947 fmov.s %d1,%fp0
7948 and.l &0x80000000,%d1
7949 eor.l &0x80800000,%d1 # -SIGN(X)*EPS
7950
7951 fmov.l %d0,%fpcr # restore users round prec,mode
7952 fadd.s %d1,%fp0
7953 bra t_inx2
7954
7955 global stanhd
7956#--TANH(X) = X FOR DENORMALIZED X
7957stanhd:
7958 bra t_extdnrm
7959
7960#########################################################################
7961# slogn(): computes the natural logarithm of a normalized input #
7962# slognd(): computes the natural logarithm of a denormalized input #
7963# slognp1(): computes the log(1+X) of a normalized input #
7964# slognp1d(): computes the log(1+X) of a denormalized input #
7965# #
7966# INPUT *************************************************************** #
7967# a0 = pointer to extended precision input #
7968# d0 = round precision,mode #
7969# #
7970# OUTPUT ************************************************************** #
7971# fp0 = log(X) or log(1+X) #
7972# #
7973# ACCURACY and MONOTONICITY ******************************************* #
7974# The returned result is within 2 ulps in 64 significant bit, #
7975# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7976# rounded to double precision. The result is provably monotonic #
7977# in double precision. #
7978# #
7979# ALGORITHM *********************************************************** #
7980# LOGN: #
7981# Step 1. If |X-1| < 1/16, approximate log(X) by an odd #
7982# polynomial in u, where u = 2(X-1)/(X+1). Otherwise, #
7983# move on to Step 2. #
7984# #
7985# Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first #
7986# seven significant bits of Y plus 2**(-7), i.e. #
7987# F = 1.xxxxxx1 in base 2 where the six "x" match those #
7988# of Y. Note that |Y-F| <= 2**(-7). #
7989# #
7990# Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a #
7991# polynomial in u, log(1+u) = poly. #
7992# #
7993# Step 4. Reconstruct #
7994# log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u) #
7995# by k*log(2) + (log(F) + poly). The values of log(F) are #
7996# calculated beforehand and stored in the program. #
7997# #
7998# lognp1: #
7999# Step 1: If |X| < 1/16, approximate log(1+X) by an odd #
8000# polynomial in u where u = 2X/(2+X). Otherwise, move on #
8001# to Step 2. #
8002# #
8003# Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done #
8004# in Step 2 of the algorithm for LOGN and compute #
8005# log(1+X) as k*log(2) + log(F) + poly where poly #
8006# approximates log(1+u), u = (Y-F)/F. #
8007# #
8008# Implementation Notes: #
8009# Note 1. There are 64 different possible values for F, thus 64 #
8010# log(F)'s need to be tabulated. Moreover, the values of #
8011# 1/F are also tabulated so that the division in (Y-F)/F #
8012# can be performed by a multiplication. #
8013# #
8014# Note 2. In Step 2 of lognp1, in order to preserved accuracy, #
8015# the value Y-F has to be calculated carefully when #
8016# 1/2 <= X < 3/2. #
8017# #
8018# Note 3. To fully exploit the pipeline, polynomials are usually #
8019# separated into two parts evaluated independently before #
8020# being added up. #
8021# #
8022#########################################################################
8023LOGOF2:
8024 long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
8025
8026one:
8027 long 0x3F800000
8028zero:
8029 long 0x00000000
8030infty:
8031 long 0x7F800000
8032negone:
8033 long 0xBF800000
8034
8035LOGA6:
8036 long 0x3FC2499A,0xB5E4040B
8037LOGA5:
8038 long 0xBFC555B5,0x848CB7DB
8039
8040LOGA4:
8041 long 0x3FC99999,0x987D8730
8042LOGA3:
8043 long 0xBFCFFFFF,0xFF6F7E97
8044
8045LOGA2:
8046 long 0x3FD55555,0x555555A4
8047LOGA1:
8048 long 0xBFE00000,0x00000008
8049
8050LOGB5:
8051 long 0x3F175496,0xADD7DAD6
8052LOGB4:
8053 long 0x3F3C71C2,0xFE80C7E0
8054
8055LOGB3:
8056 long 0x3F624924,0x928BCCFF
8057LOGB2:
8058 long 0x3F899999,0x999995EC
8059
8060LOGB1:
8061 long 0x3FB55555,0x55555555
8062TWO:
8063 long 0x40000000,0x00000000
8064
8065LTHOLD:
8066 long 0x3f990000,0x80000000,0x00000000,0x00000000
8067
8068LOGTBL:
8069 long 0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
8070 long 0x3FF70000,0xFF015358,0x833C47E2,0x00000000
8071 long 0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
8072 long 0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
8073 long 0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
8074 long 0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
8075 long 0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
8076 long 0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
8077 long 0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
8078 long 0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
8079 long 0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
8080 long 0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
8081 long 0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
8082 long 0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
8083 long 0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
8084 long 0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
8085 long 0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
8086 long 0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
8087 long 0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
8088 long 0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
8089 long 0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
8090 long 0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
8091 long 0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
8092 long 0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
8093 long 0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
8094 long 0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
8095 long 0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
8096 long 0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
8097 long 0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
8098 long 0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
8099 long 0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
8100 long 0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
8101 long 0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
8102 long 0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
8103 long 0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
8104 long 0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
8105 long 0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
8106 long 0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
8107 long 0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
8108 long 0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
8109 long 0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
8110 long 0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
8111 long 0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
8112 long 0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
8113 long 0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
8114 long 0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
8115 long 0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
8116 long 0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
8117 long 0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
8118 long 0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
8119 long 0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
8120 long 0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
8121 long 0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
8122 long 0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
8123 long 0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
8124 long 0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
8125 long 0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
8126 long 0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
8127 long 0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
8128 long 0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
8129 long 0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
8130 long 0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
8131 long 0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
8132 long 0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
8133 long 0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
8134 long 0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
8135 long 0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
8136 long 0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
8137 long 0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
8138 long 0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
8139 long 0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
8140 long 0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
8141 long 0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
8142 long 0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
8143 long 0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
8144 long 0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
8145 long 0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
8146 long 0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
8147 long 0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
8148 long 0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
8149 long 0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
8150 long 0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
8151 long 0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
8152 long 0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
8153 long 0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
8154 long 0x3FFE0000,0x825EFCED,0x49369330,0x00000000
8155 long 0x3FFE0000,0x9868C809,0x868C8098,0x00000000
8156 long 0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
8157 long 0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
8158 long 0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
8159 long 0x3FFE0000,0x95A02568,0x095A0257,0x00000000
8160 long 0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
8161 long 0x3FFE0000,0x94458094,0x45809446,0x00000000
8162 long 0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
8163 long 0x3FFE0000,0x92F11384,0x0497889C,0x00000000
8164 long 0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
8165 long 0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
8166 long 0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
8167 long 0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
8168 long 0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
8169 long 0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
8170 long 0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
8171 long 0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
8172 long 0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
8173 long 0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
8174 long 0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
8175 long 0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
8176 long 0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
8177 long 0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
8178 long 0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
8179 long 0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
8180 long 0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
8181 long 0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
8182 long 0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
8183 long 0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
8184 long 0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
8185 long 0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
8186 long 0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
8187 long 0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
8188 long 0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
8189 long 0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
8190 long 0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
8191 long 0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
8192 long 0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
8193 long 0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
8194 long 0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
8195 long 0x3FFE0000,0x80808080,0x80808081,0x00000000
8196 long 0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
8197
8198 set ADJK,L_SCR1
8199
8200 set X,FP_SCR0
8201 set XDCARE,X+2
8202 set XFRAC,X+4
8203
8204 set F,FP_SCR1
8205 set FFRAC,F+4
8206
8207 set KLOG2,FP_SCR0
8208
8209 set SAVEU,FP_SCR0
8210
8211 global slogn
8212#--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
8213slogn:
8214 fmov.x (%a0),%fp0 # LOAD INPUT
8215 mov.l &0x00000000,ADJK(%a6)
8216
8217LOGBGN:
8218#--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
8219#--A FINITE, NON-ZERO, NORMALIZED NUMBER.
8220
8221 mov.l (%a0),%d1
8222 mov.w 4(%a0),%d1
8223
8224 mov.l (%a0),X(%a6)
8225 mov.l 4(%a0),X+4(%a6)
8226 mov.l 8(%a0),X+8(%a6)
8227
8228 cmp.l %d1,&0 # CHECK IF X IS NEGATIVE
8229 blt.w LOGNEG # LOG OF NEGATIVE ARGUMENT IS INVALID
8230# X IS POSITIVE, CHECK IF X IS NEAR 1
8231 cmp.l %d1,&0x3ffef07d # IS X < 15/16?
8232 blt.b LOGMAIN # YES
8233 cmp.l %d1,&0x3fff8841 # IS X > 17/16?
8234 ble.w LOGNEAR1 # NO
8235
8236LOGMAIN:
8237#--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
8238
8239#--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
8240#--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
8241#--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
8242#-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
8243#--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
8244#--LOG(1+U) CAN BE VERY EFFICIENT.
8245#--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
8246#--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
8247
8248#--GET K, Y, F, AND ADDRESS OF 1/F.
8249 asr.l &8,%d1
8250 asr.l &8,%d1 # SHIFTED 16 BITS, BIASED EXPO. OF X
8251 sub.l &0x3FFF,%d1 # THIS IS K
8252 add.l ADJK(%a6),%d1 # ADJUST K, ORIGINAL INPUT MAY BE DENORM.
8253 lea LOGTBL(%pc),%a0 # BASE ADDRESS OF 1/F AND LOG(F)
8254 fmov.l %d1,%fp1 # CONVERT K TO FLOATING-POINT FORMAT
8255
8256#--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
8257 mov.l &0x3FFF0000,X(%a6) # X IS NOW Y, I.E. 2^(-K)*X
8258 mov.l XFRAC(%a6),FFRAC(%a6)
8259 and.l &0xFE000000,FFRAC(%a6) # FIRST 7 BITS OF Y
8260 or.l &0x01000000,FFRAC(%a6) # GET F: ATTACH A 1 AT THE EIGHTH BIT
8261 mov.l FFRAC(%a6),%d1 # READY TO GET ADDRESS OF 1/F
8262 and.l &0x7E000000,%d1
8263 asr.l &8,%d1
8264 asr.l &8,%d1
8265 asr.l &4,%d1 # SHIFTED 20, D0 IS THE DISPLACEMENT
8266 add.l %d1,%a0 # A0 IS THE ADDRESS FOR 1/F
8267
8268 fmov.x X(%a6),%fp0
8269 mov.l &0x3fff0000,F(%a6)
8270 clr.l F+8(%a6)
8271 fsub.x F(%a6),%fp0 # Y-F
8272 fmovm.x &0xc,-(%sp) # SAVE FP2-3 WHILE FP0 IS NOT READY
8273#--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
8274#--REGISTERS SAVED: FPCR, FP1, FP2
8275
8276LP1CONT1:
8277#--AN RE-ENTRY POINT FOR LOGNP1
8278 fmul.x (%a0),%fp0 # FP0 IS U = (Y-F)/F
8279 fmul.x LOGOF2(%pc),%fp1 # GET K*LOG2 WHILE FP0 IS NOT READY
8280 fmov.x %fp0,%fp2
8281 fmul.x %fp2,%fp2 # FP2 IS V=U*U
8282 fmov.x %fp1,KLOG2(%a6) # PUT K*LOG2 IN MEMEORY, FREE FP1
8283
8284#--LOG(1+U) IS APPROXIMATED BY
8285#--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
8286#--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))]
8287
8288 fmov.x %fp2,%fp3
8289 fmov.x %fp2,%fp1
8290
8291 fmul.d LOGA6(%pc),%fp1 # V*A6
8292 fmul.d LOGA5(%pc),%fp2 # V*A5
8293
8294 fadd.d LOGA4(%pc),%fp1 # A4+V*A6
8295 fadd.d LOGA3(%pc),%fp2 # A3+V*A5
8296
8297 fmul.x %fp3,%fp1 # V*(A4+V*A6)
8298 fmul.x %fp3,%fp2 # V*(A3+V*A5)
8299
8300 fadd.d LOGA2(%pc),%fp1 # A2+V*(A4+V*A6)
8301 fadd.d LOGA1(%pc),%fp2 # A1+V*(A3+V*A5)
8302
8303 fmul.x %fp3,%fp1 # V*(A2+V*(A4+V*A6))
8304 add.l &16,%a0 # ADDRESS OF LOG(F)
8305 fmul.x %fp3,%fp2 # V*(A1+V*(A3+V*A5))
8306
8307 fmul.x %fp0,%fp1 # U*V*(A2+V*(A4+V*A6))
8308 fadd.x %fp2,%fp0 # U+V*(A1+V*(A3+V*A5))
8309
8310 fadd.x (%a0),%fp1 # LOG(F)+U*V*(A2+V*(A4+V*A6))
8311 fmovm.x (%sp)+,&0x30 # RESTORE FP2-3
8312 fadd.x %fp1,%fp0 # FP0 IS LOG(F) + LOG(1+U)
8313
8314 fmov.l %d0,%fpcr
8315 fadd.x KLOG2(%a6),%fp0 # FINAL ADD
8316 bra t_inx2
8317
8318
8319LOGNEAR1:
8320
8321# if the input is exactly equal to one, then exit through ld_pzero.
8322# if these 2 lines weren't here, the correct answer would be returned
8323# but the INEX2 bit would be set.
8324 fcmp.b %fp0,&0x1 # is it equal to one?
8325 fbeq.l ld_pzero # yes
8326
8327#--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
8328 fmov.x %fp0,%fp1
8329 fsub.s one(%pc),%fp1 # FP1 IS X-1
8330 fadd.s one(%pc),%fp0 # FP0 IS X+1
8331 fadd.x %fp1,%fp1 # FP1 IS 2(X-1)
8332#--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
8333#--IN U, U = 2(X-1)/(X+1) = FP1/FP0
8334
8335LP1CONT2:
8336#--THIS IS AN RE-ENTRY POINT FOR LOGNP1
8337 fdiv.x %fp0,%fp1 # FP1 IS U
8338 fmovm.x &0xc,-(%sp) # SAVE FP2-3
8339#--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
8340#--LET V=U*U, W=V*V, CALCULATE
8341#--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
8342#--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] )
8343 fmov.x %fp1,%fp0
8344 fmul.x %fp0,%fp0 # FP0 IS V
8345 fmov.x %fp1,SAVEU(%a6) # STORE U IN MEMORY, FREE FP1
8346 fmov.x %fp0,%fp1
8347 fmul.x %fp1,%fp1 # FP1 IS W
8348
8349 fmov.d LOGB5(%pc),%fp3
8350 fmov.d LOGB4(%pc),%fp2
8351
8352 fmul.x %fp1,%fp3 # W*B5
8353 fmul.x %fp1,%fp2 # W*B4
8354
8355 fadd.d LOGB3(%pc),%fp3 # B3+W*B5
8356 fadd.d LOGB2(%pc),%fp2 # B2+W*B4
8357
8358 fmul.x %fp3,%fp1 # W*(B3+W*B5), FP3 RELEASED
8359
8360 fmul.x %fp0,%fp2 # V*(B2+W*B4)
8361
8362 fadd.d LOGB1(%pc),%fp1 # B1+W*(B3+W*B5)
8363 fmul.x SAVEU(%a6),%fp0 # FP0 IS U*V
8364
8365 fadd.x %fp2,%fp1 # B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
8366 fmovm.x (%sp)+,&0x30 # FP2-3 RESTORED
8367
8368 fmul.x %fp1,%fp0 # U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
8369
8370 fmov.l %d0,%fpcr
8371 fadd.x SAVEU(%a6),%fp0
8372 bra t_inx2
8373
8374#--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
8375LOGNEG:
8376 bra t_operr
8377
8378 global slognd
8379slognd:
8380#--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
8381
8382 mov.l &-100,ADJK(%a6) # INPUT = 2^(ADJK) * FP0
8383
8384#----normalize the input value by left shifting k bits (k to be determined
8385#----below), adjusting exponent and storing -k to ADJK
8386#----the value TWOTO100 is no longer needed.
8387#----Note that this code assumes the denormalized input is NON-ZERO.
8388
8389 movm.l &0x3f00,-(%sp) # save some registers {d2-d7}
8390 mov.l (%a0),%d3 # D3 is exponent of smallest norm. #
8391 mov.l 4(%a0),%d4
8392 mov.l 8(%a0),%d5 # (D4,D5) is (Hi_X,Lo_X)
8393 clr.l %d2 # D2 used for holding K
8394
8395 tst.l %d4
8396 bne.b Hi_not0
8397
8398Hi_0:
8399 mov.l %d5,%d4
8400 clr.l %d5
8401 mov.l &32,%d2
8402 clr.l %d6
8403 bfffo %d4{&0:&32},%d6
8404 lsl.l %d6,%d4
8405 add.l %d6,%d2 # (D3,D4,D5) is normalized
8406
8407 mov.l %d3,X(%a6)
8408 mov.l %d4,XFRAC(%a6)
8409 mov.l %d5,XFRAC+4(%a6)
8410 neg.l %d2
8411 mov.l %d2,ADJK(%a6)
8412 fmov.x X(%a6),%fp0
8413 movm.l (%sp)+,&0xfc # restore registers {d2-d7}
8414 lea X(%a6),%a0
8415 bra.w LOGBGN # begin regular log(X)
8416
8417Hi_not0:
8418 clr.l %d6
8419 bfffo %d4{&0:&32},%d6 # find first 1
8420 mov.l %d6,%d2 # get k
8421 lsl.l %d6,%d4
8422 mov.l %d5,%d7 # a copy of D5
8423 lsl.l %d6,%d5
8424 neg.l %d6
8425 add.l &32,%d6
8426 lsr.l %d6,%d7
8427 or.l %d7,%d4 # (D3,D4,D5) normalized
8428
8429 mov.l %d3,X(%a6)
8430 mov.l %d4,XFRAC(%a6)
8431 mov.l %d5,XFRAC+4(%a6)
8432 neg.l %d2
8433 mov.l %d2,ADJK(%a6)
8434 fmov.x X(%a6),%fp0
8435 movm.l (%sp)+,&0xfc # restore registers {d2-d7}
8436 lea X(%a6),%a0
8437 bra.w LOGBGN # begin regular log(X)
8438
8439 global slognp1
8440#--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
8441slognp1:
8442 fmov.x (%a0),%fp0 # LOAD INPUT
8443 fabs.x %fp0 # test magnitude
8444 fcmp.x %fp0,LTHOLD(%pc) # compare with min threshold
8445 fbgt.w LP1REAL # if greater, continue
8446 fmov.l %d0,%fpcr
8447 mov.b &FMOV_OP,%d1 # last inst is MOVE
8448 fmov.x (%a0),%fp0 # return signed argument
8449 bra t_catch
8450
8451LP1REAL:
8452 fmov.x (%a0),%fp0 # LOAD INPUT
8453 mov.l &0x00000000,ADJK(%a6)
8454 fmov.x %fp0,%fp1 # FP1 IS INPUT Z
8455 fadd.s one(%pc),%fp0 # X := ROUND(1+Z)
8456 fmov.x %fp0,X(%a6)
8457 mov.w XFRAC(%a6),XDCARE(%a6)
8458 mov.l X(%a6),%d1
8459 cmp.l %d1,&0
8460 ble.w LP1NEG0 # LOG OF ZERO OR -VE
8461 cmp.l %d1,&0x3ffe8000 # IS BOUNDS [1/2,3/2]?
8462 blt.w LOGMAIN
8463 cmp.l %d1,&0x3fffc000
8464 bgt.w LOGMAIN
8465#--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
8466#--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
8467#--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
8468
8469LP1NEAR1:
8470#--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
8471 cmp.l %d1,&0x3ffef07d
8472 blt.w LP1CARE
8473 cmp.l %d1,&0x3fff8841
8474 bgt.w LP1CARE
8475
8476LP1ONE16:
8477#--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
8478#--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
8479 fadd.x %fp1,%fp1 # FP1 IS 2Z
8480 fadd.s one(%pc),%fp0 # FP0 IS 1+X
8481#--U = FP1/FP0
8482 bra.w LP1CONT2
8483
8484LP1CARE:
8485#--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
8486#--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
8487#--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
8488#--THERE ARE ONLY TWO CASES.
8489#--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
8490#--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z
8491#--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
8492#--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
8493
8494 mov.l XFRAC(%a6),FFRAC(%a6)
8495 and.l &0xFE000000,FFRAC(%a6)
8496 or.l &0x01000000,FFRAC(%a6) # F OBTAINED
8497 cmp.l %d1,&0x3FFF8000 # SEE IF 1+Z > 1
8498 bge.b KISZERO
8499
8500KISNEG1:
8501 fmov.s TWO(%pc),%fp0
8502 mov.l &0x3fff0000,F(%a6)
8503 clr.l F+8(%a6)
8504 fsub.x F(%a6),%fp0 # 2-F
8505 mov.l FFRAC(%a6),%d1
8506 and.l &0x7E000000,%d1
8507 asr.l &8,%d1
8508 asr.l &8,%d1
8509 asr.l &4,%d1 # D0 CONTAINS DISPLACEMENT FOR 1/F
8510 fadd.x %fp1,%fp1 # GET 2Z
8511 fmovm.x &0xc,-(%sp) # SAVE FP2 {%fp2/%fp3}
8512 fadd.x %fp1,%fp0 # FP0 IS Y-F = (2-F)+2Z
8513 lea LOGTBL(%pc),%a0 # A0 IS ADDRESS OF 1/F
8514 add.l %d1,%a0
8515 fmov.s negone(%pc),%fp1 # FP1 IS K = -1
8516 bra.w LP1CONT1
8517
8518KISZERO:
8519 fmov.s one(%pc),%fp0
8520 mov.l &0x3fff0000,F(%a6)
8521 clr.l F+8(%a6)
8522 fsub.x F(%a6),%fp0 # 1-F
8523 mov.l FFRAC(%a6),%d1
8524 and.l &0x7E000000,%d1
8525 asr.l &8,%d1
8526 asr.l &8,%d1
8527 asr.l &4,%d1
8528 fadd.x %fp1,%fp0 # FP0 IS Y-F
8529 fmovm.x &0xc,-(%sp) # FP2 SAVED {%fp2/%fp3}
8530 lea LOGTBL(%pc),%a0
8531 add.l %d1,%a0 # A0 IS ADDRESS OF 1/F
8532 fmov.s zero(%pc),%fp1 # FP1 IS K = 0
8533 bra.w LP1CONT1
8534
8535LP1NEG0:
8536#--FPCR SAVED. D0 IS X IN COMPACT FORM.
8537 cmp.l %d1,&0
8538 blt.b LP1NEG
8539LP1ZERO:
8540 fmov.s negone(%pc),%fp0
8541
8542 fmov.l %d0,%fpcr
8543 bra t_dz
8544
8545LP1NEG:
8546 fmov.s zero(%pc),%fp0
8547
8548 fmov.l %d0,%fpcr
8549 bra t_operr
8550
8551 global slognp1d
8552#--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
8553# Simply return the denorm
8554slognp1d:
8555 bra t_extdnrm
8556
8557#########################################################################
8558# satanh(): computes the inverse hyperbolic tangent of a norm input #
8559# satanhd(): computes the inverse hyperbolic tangent of a denorm input #
8560# #
8561# INPUT *************************************************************** #
8562# a0 = pointer to extended precision input #
8563# d0 = round precision,mode #
8564# #
8565# OUTPUT ************************************************************** #
8566# fp0 = arctanh(X) #
8567# #
8568# ACCURACY and MONOTONICITY ******************************************* #
8569# The returned result is within 3 ulps in 64 significant bit, #
8570# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
8571# rounded to double precision. The result is provably monotonic #
8572# in double precision. #
8573# #
8574# ALGORITHM *********************************************************** #
8575# #
8576# ATANH #
8577# 1. If |X| >= 1, go to 3. #
8578# #
8579# 2. (|X| < 1) Calculate atanh(X) by #
8580# sgn := sign(X) #
8581# y := |X| #
8582# z := 2y/(1-y) #
8583# atanh(X) := sgn * (1/2) * logp1(z) #
8584# Exit. #
8585# #
8586# 3. If |X| > 1, go to 5. #
8587# #
8588# 4. (|X| = 1) Generate infinity with an appropriate sign and #
8589# divide-by-zero by #
8590# sgn := sign(X) #
8591# atan(X) := sgn / (+0). #
8592# Exit. #
8593# #
8594# 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #
8595# Exit. #
8596# #
8597#########################################################################
8598
8599 global satanh
8600satanh:
8601 mov.l (%a0),%d1
8602 mov.w 4(%a0),%d1
8603 and.l &0x7FFFFFFF,%d1
8604 cmp.l %d1,&0x3FFF8000
8605 bge.b ATANHBIG
8606
8607#--THIS IS THE USUAL CASE, |X| < 1
8608#--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z).
8609
8610 fabs.x (%a0),%fp0 # Y = |X|
8611 fmov.x %fp0,%fp1
8612 fneg.x %fp1 # -Y
8613 fadd.x %fp0,%fp0 # 2Y
8614 fadd.s &0x3F800000,%fp1 # 1-Y
8615 fdiv.x %fp1,%fp0 # 2Y/(1-Y)
8616 mov.l (%a0),%d1
8617 and.l &0x80000000,%d1
8618 or.l &0x3F000000,%d1 # SIGN(X)*HALF
8619 mov.l %d1,-(%sp)
8620
8621 mov.l %d0,-(%sp) # save rnd prec,mode
8622 clr.l %d0 # pass ext prec,RN
8623 fmovm.x &0x01,-(%sp) # save Z on stack
8624 lea (%sp),%a0 # pass ptr to Z
8625 bsr slognp1 # LOG1P(Z)
8626 add.l &0xc,%sp # clear Z from stack
8627
8628 mov.l (%sp)+,%d0 # fetch old prec,mode
8629 fmov.l %d0,%fpcr # load it
8630 mov.b &FMUL_OP,%d1 # last inst is MUL
8631 fmul.s (%sp)+,%fp0
8632 bra t_catch
8633
8634ATANHBIG:
8635 fabs.x (%a0),%fp0 # |X|
8636 fcmp.s %fp0,&0x3F800000
8637 fbgt t_operr
8638 bra t_dz
8639
8640 global satanhd
8641#--ATANH(X) = X FOR DENORMALIZED X
8642satanhd:
8643 bra t_extdnrm
8644
8645#########################################################################
8646# slog10(): computes the base-10 logarithm of a normalized input #
8647# slog10d(): computes the base-10 logarithm of a denormalized input #
8648# slog2(): computes the base-2 logarithm of a normalized input #
8649# slog2d(): computes the base-2 logarithm of a denormalized input #
8650# #
8651# INPUT *************************************************************** #
8652# a0 = pointer to extended precision input #
8653# d0 = round precision,mode #
8654# #
8655# OUTPUT ************************************************************** #
8656# fp0 = log_10(X) or log_2(X) #
8657# #
8658# ACCURACY and MONOTONICITY ******************************************* #
8659# The returned result is within 1.7 ulps in 64 significant bit, #
8660# i.e. within 0.5003 ulp to 53 bits if the result is subsequently #
8661# rounded to double precision. The result is provably monotonic #
8662# in double precision. #
8663# #
8664# ALGORITHM *********************************************************** #
8665# #
8666# slog10d: #
8667# #
8668# Step 0. If X < 0, create a NaN and raise the invalid operation #
8669# flag. Otherwise, save FPCR in D1; set FpCR to default. #
8670# Notes: Default means round-to-nearest mode, no floating-point #
8671# traps, and precision control = double extended. #
8672# #
8673# Step 1. Call slognd to obtain Y = log(X), the natural log of X. #
8674# Notes: Even if X is denormalized, log(X) is always normalized. #
8675# #
8676# Step 2. Compute log_10(X) = log(X) * (1/log(10)). #
8677# 2.1 Restore the user FPCR #
8678# 2.2 Return ans := Y * INV_L10. #
8679# #
8680# slog10: #
8681# #
8682# Step 0. If X < 0, create a NaN and raise the invalid operation #
8683# flag. Otherwise, save FPCR in D1; set FpCR to default. #
8684# Notes: Default means round-to-nearest mode, no floating-point #
8685# traps, and precision control = double extended. #
8686# #
8687# Step 1. Call sLogN to obtain Y = log(X), the natural log of X. #
8688# #
8689# Step 2. Compute log_10(X) = log(X) * (1/log(10)). #
8690# 2.1 Restore the user FPCR #
8691# 2.2 Return ans := Y * INV_L10. #
8692# #
8693# sLog2d: #
8694# #
8695# Step 0. If X < 0, create a NaN and raise the invalid operation #
8696# flag. Otherwise, save FPCR in D1; set FpCR to default. #
8697# Notes: Default means round-to-nearest mode, no floating-point #
8698# traps, and precision control = double extended. #
8699# #
8700# Step 1. Call slognd to obtain Y = log(X), the natural log of X. #
8701# Notes: Even if X is denormalized, log(X) is always normalized. #
8702# #
8703# Step 2. Compute log_10(X) = log(X) * (1/log(2)). #
8704# 2.1 Restore the user FPCR #
8705# 2.2 Return ans := Y * INV_L2. #
8706# #
8707# sLog2: #
8708# #
8709# Step 0. If X < 0, create a NaN and raise the invalid operation #
8710# flag. Otherwise, save FPCR in D1; set FpCR to default. #
8711# Notes: Default means round-to-nearest mode, no floating-point #
8712# traps, and precision control = double extended. #
8713# #
8714# Step 1. If X is not an integer power of two, i.e., X != 2^k, #
8715# go to Step 3. #
8716# #
8717# Step 2. Return k. #
8718# 2.1 Get integer k, X = 2^k. #
8719# 2.2 Restore the user FPCR. #
8720# 2.3 Return ans := convert-to-double-extended(k). #
8721# #
8722# Step 3. Call sLogN to obtain Y = log(X), the natural log of X. #
8723# #
8724# Step 4. Compute log_2(X) = log(X) * (1/log(2)). #
8725# 4.1 Restore the user FPCR #
8726# 4.2 Return ans := Y * INV_L2. #
8727# #
8728#########################################################################
8729
8730INV_L10:
8731 long 0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000
8732
8733INV_L2:
8734 long 0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000
8735
8736 global slog10
8737#--entry point for Log10(X), X is normalized
8738slog10:
8739 fmov.b &0x1,%fp0
8740 fcmp.x %fp0,(%a0) # if operand == 1,
8741 fbeq.l ld_pzero # return an EXACT zero
8742
8743 mov.l (%a0),%d1
8744 blt.w invalid
8745 mov.l %d0,-(%sp)
8746 clr.l %d0
8747 bsr slogn # log(X), X normal.
8748 fmov.l (%sp)+,%fpcr
8749 fmul.x INV_L10(%pc),%fp0
8750 bra t_inx2
8751
8752 global slog10d
8753#--entry point for Log10(X), X is denormalized
8754slog10d:
8755 mov.l (%a0),%d1
8756 blt.w invalid
8757 mov.l %d0,-(%sp)
8758 clr.l %d0
8759 bsr slognd # log(X), X denorm.
8760 fmov.l (%sp)+,%fpcr
8761 fmul.x INV_L10(%pc),%fp0
8762 bra t_minx2
8763
8764 global slog2
8765#--entry point for Log2(X), X is normalized
8766slog2:
8767 mov.l (%a0),%d1
8768 blt.w invalid
8769
8770 mov.l 8(%a0),%d1
8771 bne.b continue # X is not 2^k
8772
8773 mov.l 4(%a0),%d1
8774 and.l &0x7FFFFFFF,%d1
8775 bne.b continue
8776
8777#--X = 2^k.
8778 mov.w (%a0),%d1
8779 and.l &0x00007FFF,%d1
8780 sub.l &0x3FFF,%d1
8781 beq.l ld_pzero
8782 fmov.l %d0,%fpcr
8783 fmov.l %d1,%fp0
8784 bra t_inx2
8785
8786continue:
8787 mov.l %d0,-(%sp)
8788 clr.l %d0
8789 bsr slogn # log(X), X normal.
8790 fmov.l (%sp)+,%fpcr
8791 fmul.x INV_L2(%pc),%fp0
8792 bra t_inx2
8793
8794invalid:
8795 bra t_operr
8796
8797 global slog2d
8798#--entry point for Log2(X), X is denormalized
8799slog2d:
8800 mov.l (%a0),%d1
8801 blt.w invalid
8802 mov.l %d0,-(%sp)
8803 clr.l %d0
8804 bsr slognd # log(X), X denorm.
8805 fmov.l (%sp)+,%fpcr
8806 fmul.x INV_L2(%pc),%fp0
8807 bra t_minx2
8808
8809#########################################################################
8810# stwotox(): computes 2**X for a normalized input #
8811# stwotoxd(): computes 2**X for a denormalized input #
8812# stentox(): computes 10**X for a normalized input #
8813# stentoxd(): computes 10**X for a denormalized input #
8814# #
8815# INPUT *************************************************************** #
8816# a0 = pointer to extended precision input #
8817# d0 = round precision,mode #
8818# #
8819# OUTPUT ************************************************************** #
8820# fp0 = 2**X or 10**X #
8821# #
8822# ACCURACY and MONOTONICITY ******************************************* #
8823# The returned result is within 2 ulps in 64 significant bit, #
8824# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
8825# rounded to double precision. The result is provably monotonic #
8826# in double precision. #
8827# #
8828# ALGORITHM *********************************************************** #
8829# #
8830# twotox #
8831# 1. If |X| > 16480, go to ExpBig. #
8832# #
8833# 2. If |X| < 2**(-70), go to ExpSm. #
8834# #
8835# 3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore #
8836# decompose N as #
8837# N = 64(M + M') + j, j = 0,1,2,...,63. #
8838# #
8839# 4. Overwrite r := r * log2. Then #
8840# 2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). #
8841# Go to expr to compute that expression. #
8842# #
8843# tentox #
8844# 1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig. #
8845# #
8846# 2. If |X| < 2**(-70), go to ExpSm. #
8847# #
8848# 3. Set y := X*log_2(10)*64 (base 2 log of 10). Set #
8849# N := round-to-int(y). Decompose N as #
8850# N = 64(M + M') + j, j = 0,1,2,...,63. #
8851# #
8852# 4. Define r as #
8853# r := ((X - N*L1)-N*L2) * L10 #
8854# where L1, L2 are the leading and trailing parts of #
8855# log_10(2)/64 and L10 is the natural log of 10. Then #
8856# 10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). #
8857# Go to expr to compute that expression. #
8858# #
8859# expr #
8860# 1. Fetch 2**(j/64) from table as Fact1 and Fact2. #
8861# #
8862# 2. Overwrite Fact1 and Fact2 by #
8863# Fact1 := 2**(M) * Fact1 #
8864# Fact2 := 2**(M) * Fact2 #
8865# Thus Fact1 + Fact2 = 2**(M) * 2**(j/64). #
8866# #
8867# 3. Calculate P where 1 + P approximates exp(r): #
8868# P = r + r*r*(A1+r*(A2+...+r*A5)). #
8869# #
8870# 4. Let AdjFact := 2**(M'). Return #
8871# AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ). #
8872# Exit. #
8873# #
8874# ExpBig #
8875# 1. Generate overflow by Huge * Huge if X > 0; otherwise, #
8876# generate underflow by Tiny * Tiny. #
8877# #
8878# ExpSm #
8879# 1. Return 1 + X. #
8880# #
8881#########################################################################
8882
8883L2TEN64:
8884 long 0x406A934F,0x0979A371 # 64LOG10/LOG2
8885L10TWO1:
8886 long 0x3F734413,0x509F8000 # LOG2/64LOG10
8887
8888L10TWO2:
8889 long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
8890
8891LOG10: long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
8892
8893LOG2: long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
8894
8895EXPA5: long 0x3F56C16D,0x6F7BD0B2
8896EXPA4: long 0x3F811112,0x302C712C
8897EXPA3: long 0x3FA55555,0x55554CC1
8898EXPA2: long 0x3FC55555,0x55554A54
8899EXPA1: long 0x3FE00000,0x00000000,0x00000000,0x00000000
8900
8901TEXPTBL:
8902 long 0x3FFF0000,0x80000000,0x00000000,0x3F738000
8903 long 0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
8904 long 0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
8905 long 0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
8906 long 0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
8907 long 0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
8908 long 0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
8909 long 0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
8910 long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
8911 long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
8912 long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
8913 long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
8914 long 0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
8915 long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
8916 long 0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
8917 long 0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
8918 long 0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
8919 long 0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
8920 long 0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
8921 long 0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
8922 long 0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
8923 long 0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
8924 long 0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
8925 long 0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
8926 long 0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
8927 long 0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
8928 long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
8929 long 0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
8930 long 0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
8931 long 0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
8932 long 0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
8933 long 0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
8934 long 0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
8935 long 0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
8936 long 0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
8937 long 0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
8938 long 0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
8939 long 0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
8940 long 0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
8941 long 0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
8942 long 0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
8943 long 0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
8944 long 0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
8945 long 0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
8946 long 0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
8947 long 0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
8948 long 0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
8949 long 0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
8950 long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
8951 long 0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
8952 long 0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
8953 long 0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
8954 long 0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
8955 long 0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
8956 long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
8957 long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
8958 long 0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
8959 long 0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
8960 long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
8961 long 0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
8962 long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
8963 long 0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
8964 long 0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
8965 long 0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
8966
8967 set INT,L_SCR1
8968
8969 set X,FP_SCR0
8970 set XDCARE,X+2
8971 set XFRAC,X+4
8972
8973 set ADJFACT,FP_SCR0
8974
8975 set FACT1,FP_SCR0
8976 set FACT1HI,FACT1+4
8977 set FACT1LOW,FACT1+8
8978
8979 set FACT2,FP_SCR1
8980 set FACT2HI,FACT2+4
8981 set FACT2LOW,FACT2+8
8982
8983 global stwotox
8984#--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
8985stwotox:
8986 fmovm.x (%a0),&0x80 # LOAD INPUT
8987
8988 mov.l (%a0),%d1
8989 mov.w 4(%a0),%d1
8990 fmov.x %fp0,X(%a6)
8991 and.l &0x7FFFFFFF,%d1
8992
8993 cmp.l %d1,&0x3FB98000 # |X| >= 2**(-70)?
8994 bge.b TWOOK1
8995 bra.w EXPBORS
8996
8997TWOOK1:
8998 cmp.l %d1,&0x400D80C0 # |X| > 16480?
8999 ble.b TWOMAIN
9000 bra.w EXPBORS
9001
9002TWOMAIN:
9003#--USUAL CASE, 2^(-70) <= |X| <= 16480
9004
9005 fmov.x %fp0,%fp1
9006 fmul.s &0x42800000,%fp1 # 64 * X
9007 fmov.l %fp1,INT(%a6) # N = ROUND-TO-INT(64 X)
9008 mov.l %d2,-(%sp)
9009 lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64)
9010 fmov.l INT(%a6),%fp1 # N --> FLOATING FMT
9011 mov.l INT(%a6),%d1
9012 mov.l %d1,%d2
9013 and.l &0x3F,%d1 # D0 IS J
9014 asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64)
9015 add.l %d1,%a1 # ADDRESS FOR 2^(J/64)
9016 asr.l &6,%d2 # d2 IS L, N = 64L + J
9017 mov.l %d2,%d1
9018 asr.l &1,%d1 # D0 IS M
9019 sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J
9020 add.l &0x3FFF,%d2
9021
9022#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
9023#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
9024#--ADJFACT = 2^(M').
9025#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
9026
9027 fmovm.x &0x0c,-(%sp) # save fp2/fp3
9028
9029 fmul.s &0x3C800000,%fp1 # (1/64)*N
9030 mov.l (%a1)+,FACT1(%a6)
9031 mov.l (%a1)+,FACT1HI(%a6)
9032 mov.l (%a1)+,FACT1LOW(%a6)
9033 mov.w (%a1)+,FACT2(%a6)
9034
9035 fsub.x %fp1,%fp0 # X - (1/64)*INT(64 X)
9036
9037 mov.w (%a1)+,FACT2HI(%a6)
9038 clr.w FACT2HI+2(%a6)
9039 clr.l FACT2LOW(%a6)
9040 add.w %d1,FACT1(%a6)
9041 fmul.x LOG2(%pc),%fp0 # FP0 IS R
9042 add.w %d1,FACT2(%a6)
9043
9044 bra.w expr
9045
9046EXPBORS:
9047#--FPCR, D0 SAVED
9048 cmp.l %d1,&0x3FFF8000
9049 bgt.b TEXPBIG
9050
9051#--|X| IS SMALL, RETURN 1 + X
9052
9053 fmov.l %d0,%fpcr # restore users round prec,mode
9054 fadd.s &0x3F800000,%fp0 # RETURN 1 + X
9055 bra t_pinx2
9056
9057TEXPBIG:
9058#--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
9059#--REGISTERS SAVE SO FAR ARE FPCR AND D0
9060 mov.l X(%a6),%d1
9061 cmp.l %d1,&0
9062 blt.b EXPNEG
9063
9064 bra t_ovfl2 # t_ovfl expects positive value
9065
9066EXPNEG:
9067 bra t_unfl2 # t_unfl expects positive value
9068
9069 global stwotoxd
9070stwotoxd:
9071#--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
9072
9073 fmov.l %d0,%fpcr # set user's rounding mode/precision
9074 fmov.s &0x3F800000,%fp0 # RETURN 1 + X
9075 mov.l (%a0),%d1
9076 or.l &0x00800001,%d1
9077 fadd.s %d1,%fp0
9078 bra t_pinx2
9079
9080 global stentox
9081#--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
9082stentox:
9083 fmovm.x (%a0),&0x80 # LOAD INPUT
9084
9085 mov.l (%a0),%d1
9086 mov.w 4(%a0),%d1
9087 fmov.x %fp0,X(%a6)
9088 and.l &0x7FFFFFFF,%d1
9089
9090 cmp.l %d1,&0x3FB98000 # |X| >= 2**(-70)?
9091 bge.b TENOK1
9092 bra.w EXPBORS
9093
9094TENOK1:
9095 cmp.l %d1,&0x400B9B07 # |X| <= 16480*log2/log10 ?
9096 ble.b TENMAIN
9097 bra.w EXPBORS
9098
9099TENMAIN:
9100#--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
9101
9102 fmov.x %fp0,%fp1
9103 fmul.d L2TEN64(%pc),%fp1 # X*64*LOG10/LOG2
9104 fmov.l %fp1,INT(%a6) # N=INT(X*64*LOG10/LOG2)
9105 mov.l %d2,-(%sp)
9106 lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64)
9107 fmov.l INT(%a6),%fp1 # N --> FLOATING FMT
9108 mov.l INT(%a6),%d1
9109 mov.l %d1,%d2
9110 and.l &0x3F,%d1 # D0 IS J
9111 asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64)
9112 add.l %d1,%a1 # ADDRESS FOR 2^(J/64)
9113 asr.l &6,%d2 # d2 IS L, N = 64L + J
9114 mov.l %d2,%d1
9115 asr.l &1,%d1 # D0 IS M
9116 sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J
9117 add.l &0x3FFF,%d2
9118
9119#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
9120#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
9121#--ADJFACT = 2^(M').
9122#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
9123 fmovm.x &0x0c,-(%sp) # save fp2/fp3
9124
9125 fmov.x %fp1,%fp2
9126
9127 fmul.d L10TWO1(%pc),%fp1 # N*(LOG2/64LOG10)_LEAD
9128 mov.l (%a1)+,FACT1(%a6)
9129
9130 fmul.x L10TWO2(%pc),%fp2 # N*(LOG2/64LOG10)_TRAIL
9131
9132 mov.l (%a1)+,FACT1HI(%a6)
9133 mov.l (%a1)+,FACT1LOW(%a6)
9134 fsub.x %fp1,%fp0 # X - N L_LEAD
9135 mov.w (%a1)+,FACT2(%a6)
9136
9137 fsub.x %fp2,%fp0 # X - N L_TRAIL
9138
9139 mov.w (%a1)+,FACT2HI(%a6)
9140 clr.w FACT2HI+2(%a6)
9141 clr.l FACT2LOW(%a6)
9142
9143 fmul.x LOG10(%pc),%fp0 # FP0 IS R
9144 add.w %d1,FACT1(%a6)
9145 add.w %d1,FACT2(%a6)
9146
9147expr:
9148#--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
9149#--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
9150#--FP0 IS R. THE FOLLOWING CODE COMPUTES
9151#-- 2**(M'+M) * 2**(J/64) * EXP(R)
9152
9153 fmov.x %fp0,%fp1
9154 fmul.x %fp1,%fp1 # FP1 IS S = R*R
9155
9156 fmov.d EXPA5(%pc),%fp2 # FP2 IS A5
9157 fmov.d EXPA4(%pc),%fp3 # FP3 IS A4
9158
9159 fmul.x %fp1,%fp2 # FP2 IS S*A5
9160 fmul.x %fp1,%fp3 # FP3 IS S*A4
9161
9162 fadd.d EXPA3(%pc),%fp2 # FP2 IS A3+S*A5
9163 fadd.d EXPA2(%pc),%fp3 # FP3 IS A2+S*A4
9164
9165 fmul.x %fp1,%fp2 # FP2 IS S*(A3+S*A5)
9166 fmul.x %fp1,%fp3 # FP3 IS S*(A2+S*A4)
9167
9168 fadd.d EXPA1(%pc),%fp2 # FP2 IS A1+S*(A3+S*A5)
9169 fmul.x %fp0,%fp3 # FP3 IS R*S*(A2+S*A4)
9170
9171 fmul.x %fp1,%fp2 # FP2 IS S*(A1+S*(A3+S*A5))
9172 fadd.x %fp3,%fp0 # FP0 IS R+R*S*(A2+S*A4)
9173 fadd.x %fp2,%fp0 # FP0 IS EXP(R) - 1
9174
9175 fmovm.x (%sp)+,&0x30 # restore fp2/fp3
9176
9177#--FINAL RECONSTRUCTION PROCESS
9178#--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1) - (1 OR 0)
9179
9180 fmul.x FACT1(%a6),%fp0
9181 fadd.x FACT2(%a6),%fp0
9182 fadd.x FACT1(%a6),%fp0
9183
9184 fmov.l %d0,%fpcr # restore users round prec,mode
9185 mov.w %d2,ADJFACT(%a6) # INSERT EXPONENT
9186 mov.l (%sp)+,%d2
9187 mov.l &0x80000000,ADJFACT+4(%a6)
9188 clr.l ADJFACT+8(%a6)
9189 mov.b &FMUL_OP,%d1 # last inst is MUL
9190 fmul.x ADJFACT(%a6),%fp0 # FINAL ADJUSTMENT
9191 bra t_catch
9192
9193 global stentoxd
9194stentoxd:
9195#--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
9196
9197 fmov.l %d0,%fpcr # set user's rounding mode/precision
9198 fmov.s &0x3F800000,%fp0 # RETURN 1 + X
9199 mov.l (%a0),%d1
9200 or.l &0x00800001,%d1
9201 fadd.s %d1,%fp0
9202 bra t_pinx2
9203
9204#########################################################################
9205# sscale(): computes the destination operand scaled by the source #
9206# operand. If the absoulute value of the source operand is #
9207# >= 2^14, an overflow or underflow is returned. #
9208# #
9209# INPUT *************************************************************** #
9210# a0 = pointer to double-extended source operand X #
9211# a1 = pointer to double-extended destination operand Y #
9212# #
9213# OUTPUT ************************************************************** #
9214# fp0 = scale(X,Y) #
9215# #
9216#########################################################################
9217
9218set SIGN, L_SCR1
9219
9220 global sscale
9221sscale:
9222 mov.l %d0,-(%sp) # store off ctrl bits for now
9223
9224 mov.w DST_EX(%a1),%d1 # get dst exponent
9225 smi.b SIGN(%a6) # use SIGN to hold dst sign
9226 andi.l &0x00007fff,%d1 # strip sign from dst exp
9227
9228 mov.w SRC_EX(%a0),%d0 # check src bounds
9229 andi.w &0x7fff,%d0 # clr src sign bit
9230 cmpi.w %d0,&0x3fff # is src ~ ZERO?
9231 blt.w src_small # yes
9232 cmpi.w %d0,&0x400c # no; is src too big?
9233 bgt.w src_out # yes
9234
9235#
9236# Source is within 2^14 range.
9237#
9238src_ok:
9239 fintrz.x SRC(%a0),%fp0 # calc int of src
9240 fmov.l %fp0,%d0 # int src to d0
9241# don't want any accrued bits from the fintrz showing up later since
9242# we may need to read the fpsr for the last fp op in t_catch2().
9243 fmov.l &0x0,%fpsr
9244
9245 tst.b DST_HI(%a1) # is dst denormalized?
9246 bmi.b sok_norm
9247
9248# the dst is a DENORM. normalize the DENORM and add the adjustment to
9249# the src value. then, jump to the norm part of the routine.
9250sok_dnrm:
9251 mov.l %d0,-(%sp) # save src for now
9252
9253 mov.w DST_EX(%a1),FP_SCR0_EX(%a6) # make a copy
9254 mov.l DST_HI(%a1),FP_SCR0_HI(%a6)
9255 mov.l DST_LO(%a1),FP_SCR0_LO(%a6)
9256
9257 lea FP_SCR0(%a6),%a0 # pass ptr to DENORM
9258 bsr.l norm # normalize the DENORM
9259 neg.l %d0
9260 add.l (%sp)+,%d0 # add adjustment to src
9261
9262 fmovm.x FP_SCR0(%a6),&0x80 # load normalized DENORM
9263
9264 cmpi.w %d0,&-0x3fff # is the shft amt really low?
9265 bge.b sok_norm2 # thank goodness no
9266
9267# the multiply factor that we're trying to create should be a denorm
9268# for the multiply to work. therefore, we're going to actually do a
9269# multiply with a denorm which will cause an unimplemented data type
9270# exception to be put into the machine which will be caught and corrected
9271# later. we don't do this with the DENORMs above because this method
9272# is slower. but, don't fret, I don't see it being used much either.
9273 fmov.l (%sp)+,%fpcr # restore user fpcr
9274 mov.l &0x80000000,%d1 # load normalized mantissa
9275 subi.l &-0x3fff,%d0 # how many should we shift?
9276 neg.l %d0 # make it positive
9277 cmpi.b %d0,&0x20 # is it > 32?
9278 bge.b sok_dnrm_32 # yes
9279 lsr.l %d0,%d1 # no; bit stays in upper lw
9280 clr.l -(%sp) # insert zero low mantissa
9281 mov.l %d1,-(%sp) # insert new high mantissa
9282 clr.l -(%sp) # make zero exponent
9283 bra.b sok_norm_cont
9284sok_dnrm_32:
9285 subi.b &0x20,%d0 # get shift count
9286 lsr.l %d0,%d1 # make low mantissa longword
9287 mov.l %d1,-(%sp) # insert new low mantissa
9288 clr.l -(%sp) # insert zero high mantissa
9289 clr.l -(%sp) # make zero exponent
9290 bra.b sok_norm_cont
9291
9292# the src will force the dst to a DENORM value or worse. so, let's
9293# create an fp multiply that will create the result.
9294sok_norm:
9295 fmovm.x DST(%a1),&0x80 # load fp0 with normalized src
9296sok_norm2:
9297 fmov.l (%sp)+,%fpcr # restore user fpcr
9298
9299 addi.w &0x3fff,%d0 # turn src amt into exp value
9300 swap %d0 # put exponent in high word
9301 clr.l -(%sp) # insert new exponent
9302 mov.l &0x80000000,-(%sp) # insert new high mantissa
9303 mov.l %d0,-(%sp) # insert new lo mantissa
9304
9305sok_norm_cont:
9306 fmov.l %fpcr,%d0 # d0 needs fpcr for t_catch2
9307 mov.b &FMUL_OP,%d1 # last inst is MUL
9308 fmul.x (%sp)+,%fp0 # do the multiply
9309 bra t_catch2 # catch any exceptions
9310
9311#
9312# Source is outside of 2^14 range. Test the sign and branch
9313# to the appropriate exception handler.
9314#
9315src_out:
9316 mov.l (%sp)+,%d0 # restore ctrl bits
9317 exg %a0,%a1 # swap src,dst ptrs
9318 tst.b SRC_EX(%a1) # is src negative?
9319 bmi t_unfl # yes; underflow
9320 bra t_ovfl_sc # no; overflow
9321
9322#
9323# The source input is below 1, so we check for denormalized numbers
9324# and set unfl.
9325#
9326src_small:
9327 tst.b DST_HI(%a1) # is dst denormalized?
9328 bpl.b ssmall_done # yes
9329
9330 mov.l (%sp)+,%d0
9331 fmov.l %d0,%fpcr # no; load control bits
9332 mov.b &FMOV_OP,%d1 # last inst is MOVE
9333 fmov.x DST(%a1),%fp0 # simply return dest
9334 bra t_catch2
9335ssmall_done:
9336 mov.l (%sp)+,%d0 # load control bits into d1
9337 mov.l %a1,%a0 # pass ptr to dst
9338 bra t_resdnrm
9339
9340#########################################################################
9341# smod(): computes the fp MOD of the input values X,Y. #
9342# srem(): computes the fp (IEEE) REM of the input values X,Y. #
9343# #
9344# INPUT *************************************************************** #
9345# a0 = pointer to extended precision input X #
9346# a1 = pointer to extended precision input Y #
9347# d0 = round precision,mode #
9348# #
9349# The input operands X and Y can be either normalized or #
9350# denormalized. #
9351# #
9352# OUTPUT ************************************************************** #
9353# fp0 = FREM(X,Y) or FMOD(X,Y) #
9354# #
9355# ALGORITHM *********************************************************** #
9356# #
9357# Step 1. Save and strip signs of X and Y: signX := sign(X), #
9358# signY := sign(Y), X := |X|, Y := |Y|, #
9359# signQ := signX EOR signY. Record whether MOD or REM #
9360# is requested. #
9361# #
9362# Step 2. Set L := expo(X)-expo(Y), k := 0, Q := 0. #
9363# If (L < 0) then #
9364# R := X, go to Step 4. #
9365# else #
9366# R := 2^(-L)X, j := L. #
9367# endif #
9368# #
9369# Step 3. Perform MOD(X,Y) #
9370# 3.1 If R = Y, go to Step 9. #
9371# 3.2 If R > Y, then { R := R - Y, Q := Q + 1} #
9372# 3.3 If j = 0, go to Step 4. #
9373# 3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to #
9374# Step 3.1. #
9375# #
9376# Step 4. At this point, R = X - QY = MOD(X,Y). Set #
9377# Last_Subtract := false (used in Step 7 below). If #
9378# MOD is requested, go to Step 6. #
9379# #
9380# Step 5. R = MOD(X,Y), but REM(X,Y) is requested. #
9381# 5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to #
9382# Step 6. #
9383# 5.2 If R > Y/2, then { set Last_Subtract := true, #
9384# Q := Q + 1, Y := signY*Y }. Go to Step 6. #
9385# 5.3 This is the tricky case of R = Y/2. If Q is odd, #
9386# then { Q := Q + 1, signX := -signX }. #
9387# #
9388# Step 6. R := signX*R. #
9389# #
9390# Step 7. If Last_Subtract = true, R := R - Y. #
9391# #
9392# Step 8. Return signQ, last 7 bits of Q, and R as required. #
9393# #
9394# Step 9. At this point, R = 2^(-j)*X - Q Y = Y. Thus, #
9395# X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1), #
9396# R := 0. Return signQ, last 7 bits of Q, and R. #
9397# #
9398#########################################################################
9399
9400 set Mod_Flag,L_SCR3
9401 set Sc_Flag,L_SCR3+1
9402
9403 set SignY,L_SCR2
9404 set SignX,L_SCR2+2
9405 set SignQ,L_SCR3+2
9406
9407 set Y,FP_SCR0
9408 set Y_Hi,Y+4
9409 set Y_Lo,Y+8
9410
9411 set R,FP_SCR1
9412 set R_Hi,R+4
9413 set R_Lo,R+8
9414
9415Scale:
9416 long 0x00010000,0x80000000,0x00000000,0x00000000
9417
9418 global smod
9419smod:
9420 clr.b FPSR_QBYTE(%a6)
9421 mov.l %d0,-(%sp) # save ctrl bits
9422 clr.b Mod_Flag(%a6)
9423 bra.b Mod_Rem
9424
9425 global srem
9426srem:
9427 clr.b FPSR_QBYTE(%a6)
9428 mov.l %d0,-(%sp) # save ctrl bits
9429 mov.b &0x1,Mod_Flag(%a6)
9430
9431Mod_Rem:
9432#..Save sign of X and Y
9433 movm.l &0x3f00,-(%sp) # save data registers
9434 mov.w SRC_EX(%a0),%d3
9435 mov.w %d3,SignY(%a6)
9436 and.l &0x00007FFF,%d3 # Y := |Y|
9437
9438#
9439 mov.l SRC_HI(%a0),%d4
9440 mov.l SRC_LO(%a0),%d5 # (D3,D4,D5) is |Y|
9441
9442 tst.l %d3
9443 bne.b Y_Normal
9444
9445 mov.l &0x00003FFE,%d3 # $3FFD + 1
9446 tst.l %d4
9447 bne.b HiY_not0
9448
9449HiY_0:
9450 mov.l %d5,%d4
9451 clr.l %d5
9452 sub.l &32,%d3
9453 clr.l %d6
9454 bfffo %d4{&0:&32},%d6
9455 lsl.l %d6,%d4
9456 sub.l %d6,%d3 # (D3,D4,D5) is normalized
9457# ...with bias $7FFD
9458 bra.b Chk_X
9459
9460HiY_not0:
9461 clr.l %d6
9462 bfffo %d4{&0:&32},%d6
9463 sub.l %d6,%d3
9464 lsl.l %d6,%d4
9465 mov.l %d5,%d7 # a copy of D5
9466 lsl.l %d6,%d5
9467 neg.l %d6
9468 add.l &32,%d6
9469 lsr.l %d6,%d7
9470 or.l %d7,%d4 # (D3,D4,D5) normalized
9471# ...with bias $7FFD
9472 bra.b Chk_X
9473
9474Y_Normal:
9475 add.l &0x00003FFE,%d3 # (D3,D4,D5) normalized
9476# ...with bias $7FFD
9477
9478Chk_X:
9479 mov.w DST_EX(%a1),%d0
9480 mov.w %d0,SignX(%a6)
9481 mov.w SignY(%a6),%d1
9482 eor.l %d0,%d1
9483 and.l &0x00008000,%d1
9484 mov.w %d1,SignQ(%a6) # sign(Q) obtained
9485 and.l &0x00007FFF,%d0
9486 mov.l DST_HI(%a1),%d1
9487 mov.l DST_LO(%a1),%d2 # (D0,D1,D2) is |X|
9488 tst.l %d0
9489 bne.b X_Normal
9490 mov.l &0x00003FFE,%d0
9491 tst.l %d1
9492 bne.b HiX_not0
9493
9494HiX_0:
9495 mov.l %d2,%d1
9496 clr.l %d2
9497 sub.l &32,%d0
9498 clr.l %d6
9499 bfffo %d1{&0:&32},%d6
9500 lsl.l %d6,%d1
9501 sub.l %d6,%d0 # (D0,D1,D2) is normalized
9502# ...with bias $7FFD
9503 bra.b Init
9504
9505HiX_not0:
9506 clr.l %d6
9507 bfffo %d1{&0:&32},%d6
9508 sub.l %d6,%d0
9509 lsl.l %d6,%d1
9510 mov.l %d2,%d7 # a copy of D2
9511 lsl.l %d6,%d2
9512 neg.l %d6
9513 add.l &32,%d6
9514 lsr.l %d6,%d7
9515 or.l %d7,%d1 # (D0,D1,D2) normalized
9516# ...with bias $7FFD
9517 bra.b Init
9518
9519X_Normal:
9520 add.l &0x00003FFE,%d0 # (D0,D1,D2) normalized
9521# ...with bias $7FFD
9522
9523Init:
9524#
9525 mov.l %d3,L_SCR1(%a6) # save biased exp(Y)
9526 mov.l %d0,-(%sp) # save biased exp(X)
9527 sub.l %d3,%d0 # L := expo(X)-expo(Y)
9528
9529 clr.l %d6 # D6 := carry <- 0
9530 clr.l %d3 # D3 is Q
9531 mov.l &0,%a1 # A1 is k; j+k=L, Q=0
9532
9533#..(Carry,D1,D2) is R
9534 tst.l %d0
9535 bge.b Mod_Loop_pre
9536
9537#..expo(X) < expo(Y). Thus X = mod(X,Y)
9538#
9539 mov.l (%sp)+,%d0 # restore d0
9540 bra.w Get_Mod
9541
9542Mod_Loop_pre:
9543 addq.l &0x4,%sp # erase exp(X)
9544#..At this point R = 2^(-L)X; Q = 0; k = 0; and k+j = L
9545Mod_Loop:
9546 tst.l %d6 # test carry bit
9547 bgt.b R_GT_Y
9548
9549#..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
9550 cmp.l %d1,%d4 # compare hi(R) and hi(Y)
9551 bne.b R_NE_Y
9552 cmp.l %d2,%d5 # compare lo(R) and lo(Y)
9553 bne.b R_NE_Y
9554
9555#..At this point, R = Y
9556 bra.w Rem_is_0
9557
9558R_NE_Y:
9559#..use the borrow of the previous compare
9560 bcs.b R_LT_Y # borrow is set iff R < Y
9561
9562R_GT_Y:
9563#..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
9564#..and Y < (D1,D2) < 2Y. Either way, perform R - Y
9565 sub.l %d5,%d2 # lo(R) - lo(Y)
9566 subx.l %d4,%d1 # hi(R) - hi(Y)
9567 clr.l %d6 # clear carry
9568 addq.l &1,%d3 # Q := Q + 1
9569
9570R_LT_Y:
9571#..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
9572 tst.l %d0 # see if j = 0.
9573 beq.b PostLoop
9574
9575 add.l %d3,%d3 # Q := 2Q
9576 add.l %d2,%d2 # lo(R) = 2lo(R)
9577 roxl.l &1,%d1 # hi(R) = 2hi(R) + carry
9578 scs %d6 # set Carry if 2(R) overflows
9579 addq.l &1,%a1 # k := k+1
9580 subq.l &1,%d0 # j := j - 1
9581#..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
9582
9583 bra.b Mod_Loop
9584
9585PostLoop:
9586#..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
9587
9588#..normalize R.
9589 mov.l L_SCR1(%a6),%d0 # new biased expo of R
9590 tst.l %d1
9591 bne.b HiR_not0
9592
9593HiR_0:
9594 mov.l %d2,%d1
9595 clr.l %d2
9596 sub.l &32,%d0
9597 clr.l %d6
9598 bfffo %d1{&0:&32},%d6
9599 lsl.l %d6,%d1
9600 sub.l %d6,%d0 # (D0,D1,D2) is normalized
9601# ...with bias $7FFD
9602 bra.b Get_Mod
9603
9604HiR_not0:
9605 clr.l %d6
9606 bfffo %d1{&0:&32},%d6
9607 bmi.b Get_Mod # already normalized
9608 sub.l %d6,%d0
9609 lsl.l %d6,%d1
9610 mov.l %d2,%d7 # a copy of D2
9611 lsl.l %d6,%d2
9612 neg.l %d6
9613 add.l &32,%d6
9614 lsr.l %d6,%d7
9615 or.l %d7,%d1 # (D0,D1,D2) normalized
9616
9617#
9618Get_Mod:
9619 cmp.l %d0,&0x000041FE
9620 bge.b No_Scale
9621Do_Scale:
9622 mov.w %d0,R(%a6)
9623 mov.l %d1,R_Hi(%a6)
9624 mov.l %d2,R_Lo(%a6)
9625 mov.l L_SCR1(%a6),%d6
9626 mov.w %d6,Y(%a6)
9627 mov.l %d4,Y_Hi(%a6)
9628 mov.l %d5,Y_Lo(%a6)
9629 fmov.x R(%a6),%fp0 # no exception
9630 mov.b &1,Sc_Flag(%a6)
9631 bra.b ModOrRem
9632No_Scale:
9633 mov.l %d1,R_Hi(%a6)
9634 mov.l %d2,R_Lo(%a6)
9635 sub.l &0x3FFE,%d0
9636 mov.w %d0,R(%a6)
9637 mov.l L_SCR1(%a6),%d6
9638 sub.l &0x3FFE,%d6
9639 mov.l %d6,L_SCR1(%a6)
9640 fmov.x R(%a6),%fp0
9641 mov.w %d6,Y(%a6)
9642 mov.l %d4,Y_Hi(%a6)
9643 mov.l %d5,Y_Lo(%a6)
9644 clr.b Sc_Flag(%a6)
9645
9646#
9647ModOrRem:
9648 tst.b Mod_Flag(%a6)
9649 beq.b Fix_Sign
9650
9651 mov.l L_SCR1(%a6),%d6 # new biased expo(Y)
9652 subq.l &1,%d6 # biased expo(Y/2)
9653 cmp.l %d0,%d6
9654 blt.b Fix_Sign
9655 bgt.b Last_Sub
9656
9657 cmp.l %d1,%d4
9658 bne.b Not_EQ
9659 cmp.l %d2,%d5
9660 bne.b Not_EQ
9661 bra.w Tie_Case
9662
9663Not_EQ:
9664 bcs.b Fix_Sign
9665
9666Last_Sub:
9667#
9668 fsub.x Y(%a6),%fp0 # no exceptions
9669 addq.l &1,%d3 # Q := Q + 1
9670
9671#
9672Fix_Sign:
9673#..Get sign of X
9674 mov.w SignX(%a6),%d6
9675 bge.b Get_Q
9676 fneg.x %fp0
9677
9678#..Get Q
9679#
9680Get_Q:
9681 clr.l %d6
9682 mov.w SignQ(%a6),%d6 # D6 is sign(Q)
9683 mov.l &8,%d7
9684 lsr.l %d7,%d6
9685 and.l &0x0000007F,%d3 # 7 bits of Q
9686 or.l %d6,%d3 # sign and bits of Q
9687# swap %d3
9688# fmov.l %fpsr,%d6
9689# and.l &0xFF00FFFF,%d6
9690# or.l %d3,%d6
9691# fmov.l %d6,%fpsr # put Q in fpsr
9692 mov.b %d3,FPSR_QBYTE(%a6) # put Q in fpsr
9693
9694#
9695Restore:
9696 movm.l (%sp)+,&0xfc # {%d2-%d7}
9697 mov.l (%sp)+,%d0
9698 fmov.l %d0,%fpcr
9699 tst.b Sc_Flag(%a6)
9700 beq.b Finish
9701 mov.b &FMUL_OP,%d1 # last inst is MUL
9702 fmul.x Scale(%pc),%fp0 # may cause underflow
9703 bra t_catch2
9704# the '040 package did this apparently to see if the dst operand for the
9705# preceding fmul was a denorm. but, it better not have been since the
9706# algorithm just got done playing with fp0 and expected no exceptions
9707# as a result. trust me...
9708# bra t_avoid_unsupp # check for denorm as a
9709# ;result of the scaling
9710
9711Finish:
9712 mov.b &FMOV_OP,%d1 # last inst is MOVE
9713 fmov.x %fp0,%fp0 # capture exceptions & round
9714 bra t_catch2
9715
9716Rem_is_0:
9717#..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
9718 addq.l &1,%d3
9719 cmp.l %d0,&8 # D0 is j
9720 bge.b Q_Big
9721
9722 lsl.l %d0,%d3
9723 bra.b Set_R_0
9724
9725Q_Big:
9726 clr.l %d3
9727
9728Set_R_0:
9729 fmov.s &0x00000000,%fp0
9730 clr.b Sc_Flag(%a6)
9731 bra.w Fix_Sign
9732
9733Tie_Case:
9734#..Check parity of Q
9735 mov.l %d3,%d6
9736 and.l &0x00000001,%d6
9737 tst.l %d6
9738 beq.w Fix_Sign # Q is even
9739
9740#..Q is odd, Q := Q + 1, signX := -signX
9741 addq.l &1,%d3
9742 mov.w SignX(%a6),%d6
9743 eor.l &0x00008000,%d6
9744 mov.w %d6,SignX(%a6)
9745 bra.w Fix_Sign
9746
9747#########################################################################
9748# XDEF **************************************************************** #
9749# tag(): return the optype of the input ext fp number #
9750# #
9751# This routine is used by the 060FPLSP. #
9752# #
9753# XREF **************************************************************** #
9754# None #
9755# #
9756# INPUT *************************************************************** #
9757# a0 = pointer to extended precision operand #
9758# #
9759# OUTPUT ************************************************************** #
9760# d0 = value of type tag #
9761# one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
9762# #
9763# ALGORITHM *********************************************************** #
9764# Simply test the exponent, j-bit, and mantissa values to #
9765# determine the type of operand. #
9766# If it's an unnormalized zero, alter the operand and force it #
9767# to be a normal zero. #
9768# #
9769#########################################################################
9770
9771 global tag
9772tag:
9773 mov.w FTEMP_EX(%a0), %d0 # extract exponent
9774 andi.w &0x7fff, %d0 # strip off sign
9775 cmpi.w %d0, &0x7fff # is (EXP == MAX)?
9776 beq.b inf_or_nan_x
9777not_inf_or_nan_x:
9778 btst &0x7,FTEMP_HI(%a0)
9779 beq.b not_norm_x
9780is_norm_x:
9781 mov.b &NORM, %d0
9782 rts
9783not_norm_x:
9784 tst.w %d0 # is exponent = 0?
9785 bne.b is_unnorm_x
9786not_unnorm_x:
9787 tst.l FTEMP_HI(%a0)
9788 bne.b is_denorm_x
9789 tst.l FTEMP_LO(%a0)
9790 bne.b is_denorm_x
9791is_zero_x:
9792 mov.b &ZERO, %d0
9793 rts
9794is_denorm_x:
9795 mov.b &DENORM, %d0
9796 rts
9797is_unnorm_x:
9798 bsr.l unnorm_fix # convert to norm,denorm,or zero
9799 rts
9800is_unnorm_reg_x:
9801 mov.b &UNNORM, %d0
9802 rts
9803inf_or_nan_x:
9804 tst.l FTEMP_LO(%a0)
9805 bne.b is_nan_x
9806 mov.l FTEMP_HI(%a0), %d0
9807 and.l &0x7fffffff, %d0 # msb is a don't care!
9808 bne.b is_nan_x
9809is_inf_x:
9810 mov.b &INF, %d0
9811 rts
9812is_nan_x:
9813 mov.b &QNAN, %d0
9814 rts
9815
9816#############################################################
9817
9818qnan: long 0x7fff0000, 0xffffffff, 0xffffffff
9819
9820#########################################################################
9821# XDEF **************************************************************** #
9822# t_dz(): Handle 060FPLSP dz exception for "flogn" emulation. #
9823# t_dz2(): Handle 060FPLSP dz exception for "fatanh" emulation. #
9824# #
9825# These rouitnes are used by the 060FPLSP package. #
9826# #
9827# XREF **************************************************************** #
9828# None #
9829# #
9830# INPUT *************************************************************** #
9831# a0 = pointer to extended precision source operand. #
9832# #
9833# OUTPUT ************************************************************** #
9834# fp0 = default DZ result. #
9835# #
9836# ALGORITHM *********************************************************** #
9837# Transcendental emulation for the 060FPLSP has detected that #
9838# a DZ exception should occur for the instruction. If DZ is disabled, #
9839# return the default result. #
9840# If DZ is enabled, the dst operand should be returned unscathed #
9841# in fp0 while fp1 is used to create a DZ exception so that the #
9842# operating system can log that such an event occurred. #
9843# #
9844#########################################################################
9845
9846 global t_dz
9847t_dz:
9848 tst.b SRC_EX(%a0) # check sign for neg or pos
9849 bpl.b dz_pinf # branch if pos sign
9850
9851 global t_dz2
9852t_dz2:
9853 ori.l &dzinf_mask+neg_mask,USER_FPSR(%a6) # set N/I/DZ/ADZ
9854
9855 btst &dz_bit,FPCR_ENABLE(%a6)
9856 bne.b dz_minf_ena
9857
9858# dz is disabled. return a -INF.
9859 fmov.s &0xff800000,%fp0 # return -INF
9860 rts
9861
9862# dz is enabled. create a dz exception so the user can record it
9863# but use fp1 instead. return the dst operand unscathed in fp0.
9864dz_minf_ena:
9865 fmovm.x EXC_FP0(%a6),&0x80 # return fp0 unscathed
9866 fmov.l USER_FPCR(%a6),%fpcr
9867 fmov.s &0xbf800000,%fp1 # load -1
9868 fdiv.s &0x00000000,%fp1 # -1 / 0
9869 rts
9870
9871dz_pinf:
9872 ori.l &dzinf_mask,USER_FPSR(%a6) # set I/DZ/ADZ
9873
9874 btst &dz_bit,FPCR_ENABLE(%a6)
9875 bne.b dz_pinf_ena
9876
9877# dz is disabled. return a +INF.
9878 fmov.s &0x7f800000,%fp0 # return +INF
9879 rts
9880
9881# dz is enabled. create a dz exception so the user can record it
9882# but use fp1 instead. return the dst operand unscathed in fp0.
9883dz_pinf_ena:
9884 fmovm.x EXC_FP0(%a6),&0x80 # return fp0 unscathed
9885 fmov.l USER_FPCR(%a6),%fpcr
9886 fmov.s &0x3f800000,%fp1 # load +1
9887 fdiv.s &0x00000000,%fp1 # +1 / 0
9888 rts
9889
9890#########################################################################
9891# XDEF **************************************************************** #
9892# t_operr(): Handle 060FPLSP OPERR exception during emulation. #
9893# #
9894# This routine is used by the 060FPLSP package. #
9895# #
9896# XREF **************************************************************** #
9897# None. #
9898# #
9899# INPUT *************************************************************** #
9900# fp1 = source operand #
9901# #
9902# OUTPUT ************************************************************** #
9903# fp0 = default result #
9904# fp1 = unchanged #
9905# #
9906# ALGORITHM *********************************************************** #
9907# An operand error should occur as the result of transcendental #
9908# emulation in the 060FPLSP. If OPERR is disabled, just return a NAN #
9909# in fp0. If OPERR is enabled, return the dst operand unscathed in fp0 #
9910# and the source operand in fp1. Use fp2 to create an OPERR exception #
9911# so that the operating system can log the event. #
9912# #
9913#########################################################################
9914
9915 global t_operr
9916t_operr:
9917 ori.l &opnan_mask,USER_FPSR(%a6) # set NAN/OPERR/AIOP
9918
9919 btst &operr_bit,FPCR_ENABLE(%a6)
9920 bne.b operr_ena
9921
9922# operr is disabled. return a QNAN in fp0
9923 fmovm.x qnan(%pc),&0x80 # return QNAN
9924 rts
9925
9926# operr is enabled. create an operr exception so the user can record it
9927# but use fp2 instead. return the dst operand unscathed in fp0.
9928operr_ena:
9929 fmovm.x EXC_FP0(%a6),&0x80 # return fp0 unscathed
9930 fmov.l USER_FPCR(%a6),%fpcr
9931 fmovm.x &0x04,-(%sp) # save fp2
9932 fmov.s &0x7f800000,%fp2 # load +INF
9933 fmul.s &0x00000000,%fp2 # +INF x 0
9934 fmovm.x (%sp)+,&0x20 # restore fp2
9935 rts
9936
9937pls_huge:
9938 long 0x7ffe0000,0xffffffff,0xffffffff
9939mns_huge:
9940 long 0xfffe0000,0xffffffff,0xffffffff
9941pls_tiny:
9942 long 0x00000000,0x80000000,0x00000000
9943mns_tiny:
9944 long 0x80000000,0x80000000,0x00000000
9945
9946#########################################################################
9947# XDEF **************************************************************** #
9948# t_unfl(): Handle 060FPLSP underflow exception during emulation. #
9949# t_unfl2(): Handle 060FPLSP underflow exception during #
9950# emulation. result always positive. #
9951# #
9952# This routine is used by the 060FPLSP package. #
9953# #
9954# XREF **************************************************************** #
9955# None. #
9956# #
9957# INPUT *************************************************************** #
9958# a0 = pointer to extended precision source operand #
9959# #
9960# OUTPUT ************************************************************** #
9961# fp0 = default underflow result #
9962# #
9963# ALGORITHM *********************************************************** #
9964# An underflow should occur as the result of transcendental #
9965# emulation in the 060FPLSP. Create an underflow by using "fmul" #
9966# and two very small numbers of appropriate sign so the operating #
9967# system can log the event. #
9968# #
9969#########################################################################
9970
9971 global t_unfl
9972t_unfl:
9973 tst.b SRC_EX(%a0)
9974 bpl.b unf_pos
9975
9976 global t_unfl2
9977t_unfl2:
9978 ori.l &unfinx_mask+neg_mask,USER_FPSR(%a6) # set N/UNFL/INEX2/AUNFL/AINEX
9979
9980 fmov.l USER_FPCR(%a6),%fpcr
9981 fmovm.x mns_tiny(%pc),&0x80
9982 fmul.x pls_tiny(%pc),%fp0
9983
9984 fmov.l %fpsr,%d0
9985 rol.l &0x8,%d0
9986 mov.b %d0,FPSR_CC(%a6)
9987 rts
9988unf_pos:
9989 ori.w &unfinx_mask,FPSR_EXCEPT(%a6) # set UNFL/INEX2/AUNFL/AINEX
9990
9991 fmov.l USER_FPCR(%a6),%fpcr
9992 fmovm.x pls_tiny(%pc),&0x80
9993 fmul.x %fp0,%fp0
9994
9995 fmov.l %fpsr,%d0
9996 rol.l &0x8,%d0
9997 mov.b %d0,FPSR_CC(%a6)
9998 rts
9999
10000#########################################################################
10001# XDEF **************************************************************** #
10002# t_ovfl(): Handle 060FPLSP overflow exception during emulation. #
10003# (monadic) #
10004# t_ovfl2(): Handle 060FPLSP overflow exception during #
10005# emulation. result always positive. (dyadic) #
10006# t_ovfl_sc(): Handle 060FPLSP overflow exception during #
10007# emulation for "fscale". #
10008# #
10009# This routine is used by the 060FPLSP package. #
10010# #
10011# XREF **************************************************************** #
10012# None. #
10013# #
10014# INPUT *************************************************************** #
10015# a0 = pointer to extended precision source operand #
10016# #
10017# OUTPUT ************************************************************** #
10018# fp0 = default underflow result #
10019# #
10020# ALGORITHM *********************************************************** #
10021# An overflow should occur as the result of transcendental #
10022# emulation in the 060FPLSP. Create an overflow by using "fmul" #
10023# and two very lareg numbers of appropriate sign so the operating #
10024# system can log the event. #
10025# For t_ovfl_sc() we take special care not to lose the INEX2 bit. #
10026# #
10027#########################################################################
10028
10029 global t_ovfl_sc
10030t_ovfl_sc:
10031 ori.l &ovfl_inx_mask,USER_FPSR(%a6) # set OVFL/AOVFL/AINEX
10032
10033 mov.b %d0,%d1 # fetch rnd prec,mode
10034 andi.b &0xc0,%d1 # extract prec
10035 beq.w ovfl_work
10036
10037# dst op is a DENORM. we have to normalize the mantissa to see if the
10038# result would be inexact for the given precision. make a copy of the
10039# dst so we don't screw up the version passed to us.
10040 mov.w LOCAL_EX(%a0),FP_SCR0_EX(%a6)
10041 mov.l LOCAL_HI(%a0),FP_SCR0_HI(%a6)
10042 mov.l LOCAL_LO(%a0),FP_SCR0_LO(%a6)
10043 lea FP_SCR0(%a6),%a0 # pass ptr to FP_SCR0
10044 movm.l &0xc080,-(%sp) # save d0-d1/a0
10045 bsr.l norm # normalize mantissa
10046 movm.l (%sp)+,&0x0103 # restore d0-d1/a0
10047
10048 cmpi.b %d1,&0x40 # is precision sgl?
10049 bne.b ovfl_sc_dbl # no; dbl
10050ovfl_sc_sgl:
10051 tst.l LOCAL_LO(%a0) # is lo lw of sgl set?
10052 bne.b ovfl_sc_inx # yes
10053 tst.b 3+LOCAL_HI(%a0) # is lo byte of hi lw set?
10054 bne.b ovfl_sc_inx # yes
10055 bra.w ovfl_work # don't set INEX2
10056ovfl_sc_dbl:
10057 mov.l LOCAL_LO(%a0),%d1 # are any of lo 11 bits of
10058 andi.l &0x7ff,%d1 # dbl mantissa set?
10059 beq.w ovfl_work # no; don't set INEX2
10060ovfl_sc_inx:
10061 ori.l &inex2_mask,USER_FPSR(%a6) # set INEX2
10062 bra.b ovfl_work # continue
10063
10064 global t_ovfl
10065t_ovfl:
10066 ori.w &ovfinx_mask,FPSR_EXCEPT(%a6) # set OVFL/INEX2/AOVFL/AINEX
10067ovfl_work:
10068 tst.b SRC_EX(%a0)
10069 bpl.b ovfl_p
10070ovfl_m:
10071 fmov.l USER_FPCR(%a6),%fpcr
10072 fmovm.x mns_huge(%pc),&0x80
10073 fmul.x pls_huge(%pc),%fp0
10074
10075 fmov.l %fpsr,%d0
10076 rol.l &0x8,%d0
10077 ori.b &neg_mask,%d0
10078 mov.b %d0,FPSR_CC(%a6)
10079 rts
10080ovfl_p:
10081 fmov.l USER_FPCR(%a6),%fpcr
10082 fmovm.x pls_huge(%pc),&0x80
10083 fmul.x pls_huge(%pc),%fp0
10084
10085 fmov.l %fpsr,%d0
10086 rol.l &0x8,%d0
10087 mov.b %d0,FPSR_CC(%a6)
10088 rts
10089
10090 global t_ovfl2
10091t_ovfl2:
10092 ori.w &ovfinx_mask,FPSR_EXCEPT(%a6) # set OVFL/INEX2/AOVFL/AINEX
10093 fmov.l USER_FPCR(%a6),%fpcr
10094 fmovm.x pls_huge(%pc),&0x80
10095 fmul.x pls_huge(%pc),%fp0
10096
10097 fmov.l %fpsr,%d0
10098 rol.l &0x8,%d0
10099 mov.b %d0,FPSR_CC(%a6)
10100 rts
10101
10102#########################################################################
10103# XDEF **************************************************************** #
10104# t_catch(): Handle 060FPLSP OVFL,UNFL,or INEX2 exception during #
10105# emulation. #
10106# t_catch2(): Handle 060FPLSP OVFL,UNFL,or INEX2 exception during #
10107# emulation. #
10108# #
10109# These routines are used by the 060FPLSP package. #
10110# #
10111# XREF **************************************************************** #
10112# None. #
10113# #
10114# INPUT *************************************************************** #
10115# fp0 = default underflow or overflow result #
10116# #
10117# OUTPUT ************************************************************** #
10118# fp0 = default result #
10119# #
10120# ALGORITHM *********************************************************** #
10121# If an overflow or underflow occurred during the last #
10122# instruction of transcendental 060FPLSP emulation, then it has already #
10123# occurred and has been logged. Now we need to see if an inexact #
10124# exception should occur. #
10125# #
10126#########################################################################
10127
10128 global t_catch2
10129t_catch2:
10130 fmov.l %fpsr,%d0
10131 or.l %d0,USER_FPSR(%a6)
10132 bra.b inx2_work
10133
10134 global t_catch
10135t_catch:
10136 fmov.l %fpsr,%d0
10137 or.l %d0,USER_FPSR(%a6)
10138
10139#########################################################################
10140# XDEF **************************************************************** #
10141# t_inx2(): Handle inexact 060FPLSP exception during emulation. #
10142# t_pinx2(): Handle inexact 060FPLSP exception for "+" results. #
10143# t_minx2(): Handle inexact 060FPLSP exception for "-" results. #
10144# #
10145# XREF **************************************************************** #
10146# None. #
10147# #
10148# INPUT *************************************************************** #
10149# fp0 = default result #
10150# #
10151# OUTPUT ************************************************************** #
10152# fp0 = default result #
10153# #
10154# ALGORITHM *********************************************************** #
10155# The last instruction of transcendental emulation for the #
10156# 060FPLSP should be inexact. So, if inexact is enabled, then we create #
10157# the event here by adding a large and very small number together #
10158# so that the operating system can log the event. #
10159# Must check, too, if the result was zero, in which case we just #
10160# set the FPSR bits and return. #
10161# #
10162#########################################################################
10163
10164 global t_inx2
10165t_inx2:
10166 fblt.w t_minx2
10167 fbeq.w inx2_zero
10168
10169 global t_pinx2
10170t_pinx2:
10171 ori.w &inx2a_mask,FPSR_EXCEPT(%a6) # set INEX2/AINEX
10172 bra.b inx2_work
10173
10174 global t_minx2
10175t_minx2:
10176 ori.l &inx2a_mask+neg_mask,USER_FPSR(%a6)
10177
10178inx2_work:
10179 btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
10180 bne.b inx2_work_ena # yes
10181 rts
10182inx2_work_ena:
10183 fmov.l USER_FPCR(%a6),%fpcr # insert user's exceptions
10184 fmov.s &0x3f800000,%fp1 # load +1
10185 fadd.x pls_tiny(%pc),%fp1 # cause exception
10186 rts
10187
10188inx2_zero:
10189 mov.b &z_bmask,FPSR_CC(%a6)
10190 ori.w &inx2a_mask,2+USER_FPSR(%a6) # set INEX/AINEX
10191 rts
10192
10193#########################################################################
10194# XDEF **************************************************************** #
10195# t_extdnrm(): Handle DENORM inputs in 060FPLSP. #
10196# t_resdnrm(): Handle DENORM inputs in 060FPLSP for "fscale". #
10197# #
10198# This routine is used by the 060FPLSP package. #
10199# #
10200# XREF **************************************************************** #
10201# None. #
10202# #
10203# INPUT *************************************************************** #
10204# a0 = pointer to extended precision input operand #
10205# #
10206# OUTPUT ************************************************************** #
10207# fp0 = default result #
10208# #
10209# ALGORITHM *********************************************************** #
10210# For all functions that have a denormalized input and that #
10211# f(x)=x, this is the entry point. #
10212# DENORM value is moved using "fmove" which triggers an exception #
10213# if enabled so the operating system can log the event. #
10214# #
10215#########################################################################
10216
10217 global t_extdnrm
10218t_extdnrm:
10219 fmov.l USER_FPCR(%a6),%fpcr
10220 fmov.x SRC_EX(%a0),%fp0
10221 fmov.l %fpsr,%d0
10222 ori.l &unfinx_mask,%d0
10223 or.l %d0,USER_FPSR(%a6)
10224 rts
10225
10226 global t_resdnrm
10227t_resdnrm:
10228 fmov.l USER_FPCR(%a6),%fpcr
10229 fmov.x SRC_EX(%a0),%fp0
10230 fmov.l %fpsr,%d0
10231 or.l %d0,USER_FPSR(%a6)
10232 rts
10233
10234##########################################
10235
10236#
10237# sto_cos:
10238# This is used by fsincos library emulation. The correct
10239# values are already in fp0 and fp1 so we do nothing here.
10240#
10241 global sto_cos
10242sto_cos:
10243 rts
10244
10245##########################################
10246
10247#
10248# dst_qnan --- force result when destination is a NaN
10249#
10250 global dst_qnan
10251dst_qnan:
10252 fmov.x DST(%a1),%fp0
10253 tst.b DST_EX(%a1)
10254 bmi.b dst_qnan_m
10255dst_qnan_p:
10256 mov.b &nan_bmask,FPSR_CC(%a6)
10257 rts
10258dst_qnan_m:
10259 mov.b &nan_bmask+neg_bmask,FPSR_CC(%a6)
10260 rts
10261
10262#
10263# src_qnan --- force result when source is a NaN
10264#
10265 global src_qnan
10266src_qnan:
10267 fmov.x SRC(%a0),%fp0
10268 tst.b SRC_EX(%a0)
10269 bmi.b src_qnan_m
10270src_qnan_p:
10271 mov.b &nan_bmask,FPSR_CC(%a6)
10272 rts
10273src_qnan_m:
10274 mov.b &nan_bmask+neg_bmask,FPSR_CC(%a6)
10275 rts
10276
10277##########################################
10278
10279#
10280# Native instruction support
10281#
10282# Some systems may need entry points even for 68060 native
10283# instructions. These routines are provided for
10284# convenience.
10285#
10286 global _fadds_
10287_fadds_:
10288 fmov.l %fpcr,-(%sp) # save fpcr
10289 fmov.l &0x00000000,%fpcr # clear fpcr for load
10290 fmov.s 0x8(%sp),%fp0 # load sgl dst
10291 fmov.l (%sp)+,%fpcr # restore fpcr
10292 fadd.s 0x8(%sp),%fp0 # fadd w/ sgl src
10293 rts
10294
10295 global _faddd_
10296_faddd_:
10297 fmov.l %fpcr,-(%sp) # save fpcr
10298 fmov.l &0x00000000,%fpcr # clear fpcr for load
10299 fmov.d 0x8(%sp),%fp0 # load dbl dst
10300 fmov.l (%sp)+,%fpcr # restore fpcr
10301 fadd.d 0xc(%sp),%fp0 # fadd w/ dbl src
10302 rts
10303
10304 global _faddx_
10305_faddx_:
10306 fmovm.x 0x4(%sp),&0x80 # load ext dst
10307 fadd.x 0x10(%sp),%fp0 # fadd w/ ext src
10308 rts
10309
10310 global _fsubs_
10311_fsubs_:
10312 fmov.l %fpcr,-(%sp) # save fpcr
10313 fmov.l &0x00000000,%fpcr # clear fpcr for load
10314 fmov.s 0x8(%sp),%fp0 # load sgl dst
10315 fmov.l (%sp)+,%fpcr # restore fpcr
10316 fsub.s 0x8(%sp),%fp0 # fsub w/ sgl src
10317 rts
10318
10319 global _fsubd_
10320_fsubd_:
10321 fmov.l %fpcr,-(%sp) # save fpcr
10322 fmov.l &0x00000000,%fpcr # clear fpcr for load
10323 fmov.d 0x8(%sp),%fp0 # load dbl dst
10324 fmov.l (%sp)+,%fpcr # restore fpcr
10325 fsub.d 0xc(%sp),%fp0 # fsub w/ dbl src
10326 rts
10327
10328 global _fsubx_
10329_fsubx_:
10330 fmovm.x 0x4(%sp),&0x80 # load ext dst
10331 fsub.x 0x10(%sp),%fp0 # fsub w/ ext src
10332 rts
10333
10334 global _fmuls_
10335_fmuls_:
10336 fmov.l %fpcr,-(%sp) # save fpcr
10337 fmov.l &0x00000000,%fpcr # clear fpcr for load
10338 fmov.s 0x8(%sp),%fp0 # load sgl dst
10339 fmov.l (%sp)+,%fpcr # restore fpcr
10340 fmul.s 0x8(%sp),%fp0 # fmul w/ sgl src
10341 rts
10342
10343 global _fmuld_
10344_fmuld_:
10345 fmov.l %fpcr,-(%sp) # save fpcr
10346 fmov.l &0x00000000,%fpcr # clear fpcr for load
10347 fmov.d 0x8(%sp),%fp0 # load dbl dst
10348 fmov.l (%sp)+,%fpcr # restore fpcr
10349 fmul.d 0xc(%sp),%fp0 # fmul w/ dbl src
10350 rts
10351
10352 global _fmulx_
10353_fmulx_:
10354 fmovm.x 0x4(%sp),&0x80 # load ext dst
10355 fmul.x 0x10(%sp),%fp0 # fmul w/ ext src
10356 rts
10357
10358 global _fdivs_
10359_fdivs_:
10360 fmov.l %fpcr,-(%sp) # save fpcr
10361 fmov.l &0x00000000,%fpcr # clear fpcr for load
10362 fmov.s 0x8(%sp),%fp0 # load sgl dst
10363 fmov.l (%sp)+,%fpcr # restore fpcr
10364 fdiv.s 0x8(%sp),%fp0 # fdiv w/ sgl src
10365 rts
10366
10367 global _fdivd_
10368_fdivd_:
10369 fmov.l %fpcr,-(%sp) # save fpcr
10370 fmov.l &0x00000000,%fpcr # clear fpcr for load
10371 fmov.d 0x8(%sp),%fp0 # load dbl dst
10372 fmov.l (%sp)+,%fpcr # restore fpcr
10373 fdiv.d 0xc(%sp),%fp0 # fdiv w/ dbl src
10374 rts
10375
10376 global _fdivx_
10377_fdivx_:
10378 fmovm.x 0x4(%sp),&0x80 # load ext dst
10379 fdiv.x 0x10(%sp),%fp0 # fdiv w/ ext src
10380 rts
10381
10382 global _fabss_
10383_fabss_:
10384 fabs.s 0x4(%sp),%fp0 # fabs w/ sgl src
10385 rts
10386
10387 global _fabsd_
10388_fabsd_:
10389 fabs.d 0x4(%sp),%fp0 # fabs w/ dbl src
10390 rts
10391
10392 global _fabsx_
10393_fabsx_:
10394 fabs.x 0x4(%sp),%fp0 # fabs w/ ext src
10395 rts
10396
10397 global _fnegs_
10398_fnegs_:
10399 fneg.s 0x4(%sp),%fp0 # fneg w/ sgl src
10400 rts
10401
10402 global _fnegd_
10403_fnegd_:
10404 fneg.d 0x4(%sp),%fp0 # fneg w/ dbl src
10405 rts
10406
10407 global _fnegx_
10408_fnegx_:
10409 fneg.x 0x4(%sp),%fp0 # fneg w/ ext src
10410 rts
10411
10412 global _fsqrts_
10413_fsqrts_:
10414 fsqrt.s 0x4(%sp),%fp0 # fsqrt w/ sgl src
10415 rts
10416
10417 global _fsqrtd_
10418_fsqrtd_:
10419 fsqrt.d 0x4(%sp),%fp0 # fsqrt w/ dbl src
10420 rts
10421
10422 global _fsqrtx_
10423_fsqrtx_:
10424 fsqrt.x 0x4(%sp),%fp0 # fsqrt w/ ext src
10425 rts
10426
10427 global _fints_
10428_fints_:
10429 fint.s 0x4(%sp),%fp0 # fint w/ sgl src
10430 rts
10431
10432 global _fintd_
10433_fintd_:
10434 fint.d 0x4(%sp),%fp0 # fint w/ dbl src
10435 rts
10436
10437 global _fintx_
10438_fintx_:
10439 fint.x 0x4(%sp),%fp0 # fint w/ ext src
10440 rts
10441
10442 global _fintrzs_
10443_fintrzs_:
10444 fintrz.s 0x4(%sp),%fp0 # fintrz w/ sgl src
10445 rts
10446
10447 global _fintrzd_
10448_fintrzd_:
10449 fintrz.d 0x4(%sp),%fp0 # fintrx w/ dbl src
10450 rts
10451
10452 global _fintrzx_
10453_fintrzx_:
10454 fintrz.x 0x4(%sp),%fp0 # fintrz w/ ext src
10455 rts
10456
10457########################################################################
10458
10459#########################################################################
10460# src_zero(): Return signed zero according to sign of src operand. #
10461#########################################################################
10462 global src_zero
10463src_zero:
10464 tst.b SRC_EX(%a0) # get sign of src operand
10465 bmi.b ld_mzero # if neg, load neg zero
10466
10467#
10468# ld_pzero(): return a positive zero.
10469#
10470 global ld_pzero
10471ld_pzero:
10472 fmov.s &0x00000000,%fp0 # load +0
10473 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10474 rts
10475
10476# ld_mzero(): return a negative zero.
10477 global ld_mzero
10478ld_mzero:
10479 fmov.s &0x80000000,%fp0 # load -0
10480 mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set 'N','Z' ccode bits
10481 rts
10482
10483#########################################################################
10484# dst_zero(): Return signed zero according to sign of dst operand. #
10485#########################################################################
10486 global dst_zero
10487dst_zero:
10488 tst.b DST_EX(%a1) # get sign of dst operand
10489 bmi.b ld_mzero # if neg, load neg zero
10490 bra.b ld_pzero # load positive zero
10491
10492#########################################################################
10493# src_inf(): Return signed inf according to sign of src operand. #
10494#########################################################################
10495 global src_inf
10496src_inf:
10497 tst.b SRC_EX(%a0) # get sign of src operand
10498 bmi.b ld_minf # if negative branch
10499
10500#
10501# ld_pinf(): return a positive infinity.
10502#
10503 global ld_pinf
10504ld_pinf:
10505 fmov.s &0x7f800000,%fp0 # load +INF
10506 mov.b &inf_bmask,FPSR_CC(%a6) # set 'INF' ccode bit
10507 rts
10508
10509#
10510# ld_minf():return a negative infinity.
10511#
10512 global ld_minf
10513ld_minf:
10514 fmov.s &0xff800000,%fp0 # load -INF
10515 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
10516 rts
10517
10518#########################################################################
10519# dst_inf(): Return signed inf according to sign of dst operand. #
10520#########################################################################
10521 global dst_inf
10522dst_inf:
10523 tst.b DST_EX(%a1) # get sign of dst operand
10524 bmi.b ld_minf # if negative branch
10525 bra.b ld_pinf
10526
10527 global szr_inf
10528#################################################################
10529# szr_inf(): Return +ZERO for a negative src operand or #
10530# +INF for a positive src operand. #
10531# Routine used for fetox, ftwotox, and ftentox. #
10532#################################################################
10533szr_inf:
10534 tst.b SRC_EX(%a0) # check sign of source
10535 bmi.b ld_pzero
10536 bra.b ld_pinf
10537
10538#########################################################################
10539# sopr_inf(): Return +INF for a positive src operand or #
10540# jump to operand error routine for a negative src operand. #
10541# Routine used for flogn, flognp1, flog10, and flog2. #
10542#########################################################################
10543 global sopr_inf
10544sopr_inf:
10545 tst.b SRC_EX(%a0) # check sign of source
10546 bmi.w t_operr
10547 bra.b ld_pinf
10548
10549#################################################################
10550# setoxm1i(): Return minus one for a negative src operand or #
10551# positive infinity for a positive src operand. #
10552# Routine used for fetoxm1. #
10553#################################################################
10554 global setoxm1i
10555setoxm1i:
10556 tst.b SRC_EX(%a0) # check sign of source
10557 bmi.b ld_mone
10558 bra.b ld_pinf
10559
10560#########################################################################
10561# src_one(): Return signed one according to sign of src operand. #
10562#########################################################################
10563 global src_one
10564src_one:
10565 tst.b SRC_EX(%a0) # check sign of source
10566 bmi.b ld_mone
10567
10568#
10569# ld_pone(): return positive one.
10570#
10571 global ld_pone
10572ld_pone:
10573 fmov.s &0x3f800000,%fp0 # load +1
10574 clr.b FPSR_CC(%a6)
10575 rts
10576
10577#
10578# ld_mone(): return negative one.
10579#
10580 global ld_mone
10581ld_mone:
10582 fmov.s &0xbf800000,%fp0 # load -1
10583 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
10584 rts
10585
10586ppiby2: long 0x3fff0000, 0xc90fdaa2, 0x2168c235
10587mpiby2: long 0xbfff0000, 0xc90fdaa2, 0x2168c235
10588
10589#################################################################
10590# spi_2(): Return signed PI/2 according to sign of src operand. #
10591#################################################################
10592 global spi_2
10593spi_2:
10594 tst.b SRC_EX(%a0) # check sign of source
10595 bmi.b ld_mpi2
10596
10597#
10598# ld_ppi2(): return positive PI/2.
10599#
10600 global ld_ppi2
10601ld_ppi2:
10602 fmov.l %d0,%fpcr
10603 fmov.x ppiby2(%pc),%fp0 # load +pi/2
10604 bra.w t_pinx2 # set INEX2
10605
10606#
10607# ld_mpi2(): return negative PI/2.
10608#
10609 global ld_mpi2
10610ld_mpi2:
10611 fmov.l %d0,%fpcr
10612 fmov.x mpiby2(%pc),%fp0 # load -pi/2
10613 bra.w t_minx2 # set INEX2
10614
10615####################################################
10616# The following routines give support for fsincos. #
10617####################################################
10618
10619#
10620# ssincosz(): When the src operand is ZERO, store a one in the
10621# cosine register and return a ZERO in fp0 w/ the same sign
10622# as the src operand.
10623#
10624 global ssincosz
10625ssincosz:
10626 fmov.s &0x3f800000,%fp1
10627 tst.b SRC_EX(%a0) # test sign
10628 bpl.b sincoszp
10629 fmov.s &0x80000000,%fp0 # return sin result in fp0
10630 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6)
10631 rts
10632sincoszp:
10633 fmov.s &0x00000000,%fp0 # return sin result in fp0
10634 mov.b &z_bmask,FPSR_CC(%a6)
10635 rts
10636
10637#
10638# ssincosi(): When the src operand is INF, store a QNAN in the cosine
10639# register and jump to the operand error routine for negative
10640# src operands.
10641#
10642 global ssincosi
10643ssincosi:
10644 fmov.x qnan(%pc),%fp1 # load NAN
10645 bra.w t_operr
10646
10647#
10648# ssincosqnan(): When the src operand is a QNAN, store the QNAN in the cosine
10649# register and branch to the src QNAN routine.
10650#
10651 global ssincosqnan
10652ssincosqnan:
10653 fmov.x LOCAL_EX(%a0),%fp1
10654 bra.w src_qnan
10655
10656########################################################################
10657
10658 global smod_sdnrm
10659 global smod_snorm
10660smod_sdnrm:
10661smod_snorm:
10662 mov.b DTAG(%a6),%d1
10663 beq.l smod
10664 cmpi.b %d1,&ZERO
10665 beq.w smod_zro
10666 cmpi.b %d1,&INF
10667 beq.l t_operr
10668 cmpi.b %d1,&DENORM
10669 beq.l smod
10670 bra.l dst_qnan
10671
10672 global smod_szero
10673smod_szero:
10674 mov.b DTAG(%a6),%d1
10675 beq.l t_operr
10676 cmpi.b %d1,&ZERO
10677 beq.l t_operr
10678 cmpi.b %d1,&INF
10679 beq.l t_operr
10680 cmpi.b %d1,&DENORM
10681 beq.l t_operr
10682 bra.l dst_qnan
10683
10684 global smod_sinf
10685smod_sinf:
10686 mov.b DTAG(%a6),%d1
10687 beq.l smod_fpn
10688 cmpi.b %d1,&ZERO
10689 beq.l smod_zro
10690 cmpi.b %d1,&INF
10691 beq.l t_operr
10692 cmpi.b %d1,&DENORM
10693 beq.l smod_fpn
10694 bra.l dst_qnan
10695
10696smod_zro:
10697srem_zro:
10698 mov.b SRC_EX(%a0),%d1 # get src sign
10699 mov.b DST_EX(%a1),%d0 # get dst sign
10700 eor.b %d0,%d1 # get qbyte sign
10701 andi.b &0x80,%d1
10702 mov.b %d1,FPSR_QBYTE(%a6)
10703 tst.b %d0
10704 bpl.w ld_pzero
10705 bra.w ld_mzero
10706
10707smod_fpn:
10708srem_fpn:
10709 clr.b FPSR_QBYTE(%a6)
10710 mov.l %d0,-(%sp)
10711 mov.b SRC_EX(%a0),%d1 # get src sign
10712 mov.b DST_EX(%a1),%d0 # get dst sign
10713 eor.b %d0,%d1 # get qbyte sign
10714 andi.b &0x80,%d1
10715 mov.b %d1,FPSR_QBYTE(%a6)
10716 cmpi.b DTAG(%a6),&DENORM
10717 bne.b smod_nrm
10718 lea DST(%a1),%a0
10719 mov.l (%sp)+,%d0
10720 bra t_resdnrm
10721smod_nrm:
10722 fmov.l (%sp)+,%fpcr
10723 fmov.x DST(%a1),%fp0
10724 tst.b DST_EX(%a1)
10725 bmi.b smod_nrm_neg
10726 rts
10727
10728smod_nrm_neg:
10729 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' code
10730 rts
10731
10732#########################################################################
10733 global srem_snorm
10734 global srem_sdnrm
10735srem_sdnrm:
10736srem_snorm:
10737 mov.b DTAG(%a6),%d1
10738 beq.l srem
10739 cmpi.b %d1,&ZERO
10740 beq.w srem_zro
10741 cmpi.b %d1,&INF
10742 beq.l t_operr
10743 cmpi.b %d1,&DENORM
10744 beq.l srem
10745 bra.l dst_qnan
10746
10747 global srem_szero
10748srem_szero:
10749 mov.b DTAG(%a6),%d1
10750 beq.l t_operr
10751 cmpi.b %d1,&ZERO
10752 beq.l t_operr
10753 cmpi.b %d1,&INF
10754 beq.l t_operr
10755 cmpi.b %d1,&DENORM
10756 beq.l t_operr
10757 bra.l dst_qnan
10758
10759 global srem_sinf
10760srem_sinf:
10761 mov.b DTAG(%a6),%d1
10762 beq.w srem_fpn
10763 cmpi.b %d1,&ZERO
10764 beq.w srem_zro
10765 cmpi.b %d1,&INF
10766 beq.l t_operr
10767 cmpi.b %d1,&DENORM
10768 beq.l srem_fpn
10769 bra.l dst_qnan
10770
10771#########################################################################
10772
10773 global sscale_snorm
10774 global sscale_sdnrm
10775sscale_snorm:
10776sscale_sdnrm:
10777 mov.b DTAG(%a6),%d1
10778 beq.l sscale
10779 cmpi.b %d1,&ZERO
10780 beq.l dst_zero
10781 cmpi.b %d1,&INF
10782 beq.l dst_inf
10783 cmpi.b %d1,&DENORM
10784 beq.l sscale
10785 bra.l dst_qnan
10786
10787 global sscale_szero
10788sscale_szero:
10789 mov.b DTAG(%a6),%d1
10790 beq.l sscale
10791 cmpi.b %d1,&ZERO
10792 beq.l dst_zero
10793 cmpi.b %d1,&INF
10794 beq.l dst_inf
10795 cmpi.b %d1,&DENORM
10796 beq.l sscale
10797 bra.l dst_qnan
10798
10799 global sscale_sinf
10800sscale_sinf:
10801 mov.b DTAG(%a6),%d1
10802 beq.l t_operr
10803 cmpi.b %d1,&QNAN
10804 beq.l dst_qnan
10805 bra.l t_operr
10806
10807########################################################################
10808
10809 global sop_sqnan
10810sop_sqnan:
10811 mov.b DTAG(%a6),%d1
10812 cmpi.b %d1,&QNAN
10813 beq.l dst_qnan
10814 bra.l src_qnan
10815
10816#########################################################################
10817# norm(): normalize the mantissa of an extended precision input. the #
10818# input operand should not be normalized already. #
10819# #
10820# XDEF **************************************************************** #
10821# norm() #
10822# #
10823# XREF **************************************************************** #
10824# none #
10825# #
10826# INPUT *************************************************************** #
10827# a0 = pointer fp extended precision operand to normalize #
10828# #
10829# OUTPUT ************************************************************** #
10830# d0 = number of bit positions the mantissa was shifted #
10831# a0 = the input operand's mantissa is normalized; the exponent #
10832# is unchanged. #
10833# #
10834#########################################################################
10835 global norm
10836norm:
10837 mov.l %d2, -(%sp) # create some temp regs
10838 mov.l %d3, -(%sp)
10839
10840 mov.l FTEMP_HI(%a0), %d0 # load hi(mantissa)
10841 mov.l FTEMP_LO(%a0), %d1 # load lo(mantissa)
10842
10843 bfffo %d0{&0:&32}, %d2 # how many places to shift?
10844 beq.b norm_lo # hi(man) is all zeroes!
10845
10846norm_hi:
10847 lsl.l %d2, %d0 # left shift hi(man)
10848 bfextu %d1{&0:%d2}, %d3 # extract lo bits
10849
10850 or.l %d3, %d0 # create hi(man)
10851 lsl.l %d2, %d1 # create lo(man)
10852
10853 mov.l %d0, FTEMP_HI(%a0) # store new hi(man)
10854 mov.l %d1, FTEMP_LO(%a0) # store new lo(man)
10855
10856 mov.l %d2, %d0 # return shift amount
10857
10858 mov.l (%sp)+, %d3 # restore temp regs
10859 mov.l (%sp)+, %d2
10860
10861 rts
10862
10863norm_lo:
10864 bfffo %d1{&0:&32}, %d2 # how many places to shift?
10865 lsl.l %d2, %d1 # shift lo(man)
10866 add.l &32, %d2 # add 32 to shft amount
10867
10868 mov.l %d1, FTEMP_HI(%a0) # store hi(man)
10869 clr.l FTEMP_LO(%a0) # lo(man) is now zero
10870
10871 mov.l %d2, %d0 # return shift amount
10872
10873 mov.l (%sp)+, %d3 # restore temp regs
10874 mov.l (%sp)+, %d2
10875
10876 rts
10877
10878#########################################################################
10879# unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO #
10880# - returns corresponding optype tag #
10881# #
10882# XDEF **************************************************************** #
10883# unnorm_fix() #
10884# #
10885# XREF **************************************************************** #
10886# norm() - normalize the mantissa #
10887# #
10888# INPUT *************************************************************** #
10889# a0 = pointer to unnormalized extended precision number #
10890# #
10891# OUTPUT ************************************************************** #
10892# d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO #
10893# a0 = input operand has been converted to a norm, denorm, or #
10894# zero; both the exponent and mantissa are changed. #
10895# #
10896#########################################################################
10897
10898 global unnorm_fix
10899unnorm_fix:
10900 bfffo FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
10901 bne.b unnorm_shift # hi(man) is not all zeroes
10902
10903#
10904# hi(man) is all zeroes so see if any bits in lo(man) are set
10905#
10906unnorm_chk_lo:
10907 bfffo FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
10908 beq.w unnorm_zero # yes
10909
10910 add.w &32, %d0 # no; fix shift distance
10911
10912#
10913# d0 = # shifts needed for complete normalization
10914#
10915unnorm_shift:
10916 clr.l %d1 # clear top word
10917 mov.w FTEMP_EX(%a0), %d1 # extract exponent
10918 and.w &0x7fff, %d1 # strip off sgn
10919
10920 cmp.w %d0, %d1 # will denorm push exp < 0?
10921 bgt.b unnorm_nrm_zero # yes; denorm only until exp = 0
10922
10923#
10924# exponent would not go < 0. therefore, number stays normalized
10925#
10926 sub.w %d0, %d1 # shift exponent value
10927 mov.w FTEMP_EX(%a0), %d0 # load old exponent
10928 and.w &0x8000, %d0 # save old sign
10929 or.w %d0, %d1 # {sgn,new exp}
10930 mov.w %d1, FTEMP_EX(%a0) # insert new exponent
10931
10932 bsr.l norm # normalize UNNORM
10933
10934 mov.b &NORM, %d0 # return new optype tag
10935 rts
10936
10937#
10938# exponent would go < 0, so only denormalize until exp = 0
10939#
10940unnorm_nrm_zero:
10941 cmp.b %d1, &32 # is exp <= 32?
10942 bgt.b unnorm_nrm_zero_lrg # no; go handle large exponent
10943
10944 bfextu FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
10945 mov.l %d0, FTEMP_HI(%a0) # save new hi(man)
10946
10947 mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)
10948 lsl.l %d1, %d0 # extract new lo(man)
10949 mov.l %d0, FTEMP_LO(%a0) # save new lo(man)
10950
10951 and.w &0x8000, FTEMP_EX(%a0) # set exp = 0
10952
10953 mov.b &DENORM, %d0 # return new optype tag
10954 rts
10955
10956#
10957# only mantissa bits set are in lo(man)
10958#
10959unnorm_nrm_zero_lrg:
10960 sub.w &32, %d1 # adjust shft amt by 32
10961
10962 mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)
10963 lsl.l %d1, %d0 # left shift lo(man)
10964
10965 mov.l %d0, FTEMP_HI(%a0) # store new hi(man)
10966 clr.l FTEMP_LO(%a0) # lo(man) = 0
10967
10968 and.w &0x8000, FTEMP_EX(%a0) # set exp = 0
10969
10970 mov.b &DENORM, %d0 # return new optype tag
10971 rts
10972
10973#
10974# whole mantissa is zero so this UNNORM is actually a zero
10975#
10976unnorm_zero:
10977 and.w &0x8000, FTEMP_EX(%a0) # force exponent to zero
10978
10979 mov.b &ZERO, %d0 # fix optype tag
10980 rts