aboutsummaryrefslogtreecommitdiffstats
path: root/arch/m68k/fpsp040/ssin.S
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/m68k/fpsp040/ssin.S
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'arch/m68k/fpsp040/ssin.S')
-rw-r--r--arch/m68k/fpsp040/ssin.S746
1 files changed, 746 insertions, 0 deletions
diff --git a/arch/m68k/fpsp040/ssin.S b/arch/m68k/fpsp040/ssin.S
new file mode 100644
index 000000000000..043c91cdd657
--- /dev/null
+++ b/arch/m68k/fpsp040/ssin.S
@@ -0,0 +1,746 @@
1|
2| ssin.sa 3.3 7/29/91
3|
4| The entry point sSIN computes the sine of an input argument
5| sCOS computes the cosine, and sSINCOS computes both. The
6| corresponding entry points with a "d" computes the same
7| corresponding function values for denormalized inputs.
8|
9| Input: Double-extended number X in location pointed to
10| by address register a0.
11|
12| Output: The function value sin(X) or cos(X) returned in Fp0 if SIN or
13| COS is requested. Otherwise, for SINCOS, sin(X) is returned
14| in Fp0, and cos(X) is returned in Fp1.
15|
16| Modifies: Fp0 for SIN or COS; both Fp0 and Fp1 for SINCOS.
17|
18| Accuracy and Monotonicity: The returned result is within 1 ulp in
19| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
20| result is subsequently rounded to double precision. The
21| result is provably monotonic in double precision.
22|
23| Speed: The programs sSIN and sCOS take approximately 150 cycles for
24| input argument X such that |X| < 15Pi, which is the usual
25| situation. The speed for sSINCOS is approximately 190 cycles.
26|
27| Algorithm:
28|
29| SIN and COS:
30| 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1.
31|
32| 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7.
33|
34| 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
35| k = N mod 4, so in particular, k = 0,1,2,or 3. Overwrite
36| k by k := k + AdjN.
37|
38| 4. If k is even, go to 6.
39|
40| 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. Return sgn*cos(r)
41| where cos(r) is approximated by an even polynomial in r,
42| 1 + r*r*(B1+s*(B2+ ... + s*B8)), s = r*r.
43| Exit.
44|
45| 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r)
46| where sin(r) is approximated by an odd polynomial in r
47| r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r.
48| Exit.
49|
50| 7. If |X| > 1, go to 9.
51|
52| 8. (|X|<2**(-40)) If SIN is invoked, return X; otherwise return 1.
53|
54| 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 3.
55|
56| SINCOS:
57| 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
58|
59| 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
60| k = N mod 4, so in particular, k = 0,1,2,or 3.
61|
62| 3. If k is even, go to 5.
63|
64| 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), i.e.
65| j1 exclusive or with the l.s.b. of k.
66| sgn1 := (-1)**j1, sgn2 := (-1)**j2.
67| SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where
68| sin(r) and cos(r) are computed as odd and even polynomials
69| in r, respectively. Exit
70|
71| 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1.
72| SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where
73| sin(r) and cos(r) are computed as odd and even polynomials
74| in r, respectively. Exit
75|
76| 6. If |X| > 1, go to 8.
77|
78| 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit.
79|
80| 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
81|
82
83| Copyright (C) Motorola, Inc. 1990
84| All Rights Reserved
85|
86| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
87| The copyright notice above does not evidence any
88| actual or intended publication of such source code.
89
90|SSIN idnt 2,1 | Motorola 040 Floating Point Software Package
91
92 |section 8
93
94#include "fpsp.h"
95
96BOUNDS1: .long 0x3FD78000,0x4004BC7E
97TWOBYPI: .long 0x3FE45F30,0x6DC9C883
98
99SINA7: .long 0xBD6AAA77,0xCCC994F5
100SINA6: .long 0x3DE61209,0x7AAE8DA1
101
102SINA5: .long 0xBE5AE645,0x2A118AE4
103SINA4: .long 0x3EC71DE3,0xA5341531
104
105SINA3: .long 0xBF2A01A0,0x1A018B59,0x00000000,0x00000000
106
107SINA2: .long 0x3FF80000,0x88888888,0x888859AF,0x00000000
108
109SINA1: .long 0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000
110
111COSB8: .long 0x3D2AC4D0,0xD6011EE3
112COSB7: .long 0xBDA9396F,0x9F45AC19
113
114COSB6: .long 0x3E21EED9,0x0612C972
115COSB5: .long 0xBE927E4F,0xB79D9FCF
116
117COSB4: .long 0x3EFA01A0,0x1A01D423,0x00000000,0x00000000
118
119COSB3: .long 0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000
120
121COSB2: .long 0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E
122COSB1: .long 0xBF000000
123
124INVTWOPI: .long 0x3FFC0000,0xA2F9836E,0x4E44152A
125
126TWOPI1: .long 0x40010000,0xC90FDAA2,0x00000000,0x00000000
127TWOPI2: .long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000
128
129 |xref PITBL
130
131 .set INARG,FP_SCR4
132
133 .set X,FP_SCR5
134 .set XDCARE,X+2
135 .set XFRAC,X+4
136
137 .set RPRIME,FP_SCR1
138 .set SPRIME,FP_SCR2
139
140 .set POSNEG1,L_SCR1
141 .set TWOTO63,L_SCR1
142
143 .set ENDFLAG,L_SCR2
144 .set N,L_SCR2
145
146 .set ADJN,L_SCR3
147
148 | xref t_frcinx
149 |xref t_extdnrm
150 |xref sto_cos
151
152 .global ssind
153ssind:
154|--SIN(X) = X FOR DENORMALIZED X
155 bra t_extdnrm
156
157 .global scosd
158scosd:
159|--COS(X) = 1 FOR DENORMALIZED X
160
161 fmoves #0x3F800000,%fp0
162|
163| 9D25B Fix: Sometimes the previous fmove.s sets fpsr bits
164|
165 fmovel #0,%fpsr
166|
167 bra t_frcinx
168
169 .global ssin
170ssin:
171|--SET ADJN TO 0
172 movel #0,ADJN(%a6)
173 bras SINBGN
174
175 .global scos
176scos:
177|--SET ADJN TO 1
178 movel #1,ADJN(%a6)
179
180SINBGN:
181|--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
182
183 fmovex (%a0),%fp0 | ...LOAD INPUT
184
185 movel (%a0),%d0
186 movew 4(%a0),%d0
187 fmovex %fp0,X(%a6)
188 andil #0x7FFFFFFF,%d0 | ...COMPACTIFY X
189
190 cmpil #0x3FD78000,%d0 | ...|X| >= 2**(-40)?
191 bges SOK1
192 bra SINSM
193
194SOK1:
195 cmpil #0x4004BC7E,%d0 | ...|X| < 15 PI?
196 blts SINMAIN
197 bra REDUCEX
198
199SINMAIN:
200|--THIS IS THE USUAL CASE, |X| <= 15 PI.
201|--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
202 fmovex %fp0,%fp1
203 fmuld TWOBYPI,%fp1 | ...X*2/PI
204
205|--HIDE THE NEXT THREE INSTRUCTIONS
206 lea PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32
207
208
209|--FP1 IS NOW READY
210 fmovel %fp1,N(%a6) | ...CONVERT TO INTEGER
211
212 movel N(%a6),%d0
213 asll #4,%d0
214 addal %d0,%a1 | ...A1 IS THE ADDRESS OF N*PIBY2
215| ...WHICH IS IN TWO PIECES Y1 & Y2
216
217 fsubx (%a1)+,%fp0 | ...X-Y1
218|--HIDE THE NEXT ONE
219 fsubs (%a1),%fp0 | ...FP0 IS R = (X-Y1)-Y2
220
221SINCONT:
222|--continuation from REDUCEX
223
224|--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
225 movel N(%a6),%d0
226 addl ADJN(%a6),%d0 | ...SEE IF D0 IS ODD OR EVEN
227 rorl #1,%d0 | ...D0 WAS ODD IFF D0 IS NEGATIVE
228 cmpil #0,%d0
229 blt COSPOLY
230
231SINPOLY:
232|--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
233|--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
234|--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
235|--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
236|--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
237|--WHERE T=S*S.
238|--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
239|--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
240 fmovex %fp0,X(%a6) | ...X IS R
241 fmulx %fp0,%fp0 | ...FP0 IS S
242|---HIDE THE NEXT TWO WHILE WAITING FOR FP0
243 fmoved SINA7,%fp3
244 fmoved SINA6,%fp2
245|--FP0 IS NOW READY
246 fmovex %fp0,%fp1
247 fmulx %fp1,%fp1 | ...FP1 IS T
248|--HIDE THE NEXT TWO WHILE WAITING FOR FP1
249
250 rorl #1,%d0
251 andil #0x80000000,%d0
252| ...LEAST SIG. BIT OF D0 IN SIGN POSITION
253 eorl %d0,X(%a6) | ...X IS NOW R'= SGN*R
254
255 fmulx %fp1,%fp3 | ...TA7
256 fmulx %fp1,%fp2 | ...TA6
257
258 faddd SINA5,%fp3 | ...A5+TA7
259 faddd SINA4,%fp2 | ...A4+TA6
260
261 fmulx %fp1,%fp3 | ...T(A5+TA7)
262 fmulx %fp1,%fp2 | ...T(A4+TA6)
263
264 faddd SINA3,%fp3 | ...A3+T(A5+TA7)
265 faddx SINA2,%fp2 | ...A2+T(A4+TA6)
266
267 fmulx %fp3,%fp1 | ...T(A3+T(A5+TA7))
268
269 fmulx %fp0,%fp2 | ...S(A2+T(A4+TA6))
270 faddx SINA1,%fp1 | ...A1+T(A3+T(A5+TA7))
271 fmulx X(%a6),%fp0 | ...R'*S
272
273 faddx %fp2,%fp1 | ...[A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
274|--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
275|--FP2 RELEASED, RESTORE NOW AND TAKE FULL ADVANTAGE OF HIDING
276
277
278 fmulx %fp1,%fp0 | ...SIN(R')-R'
279|--FP1 RELEASED.
280
281 fmovel %d1,%FPCR |restore users exceptions
282 faddx X(%a6),%fp0 |last inst - possible exception set
283 bra t_frcinx
284
285
286COSPOLY:
287|--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
288|--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY
289|--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
290|--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
291|--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
292|--WHERE T=S*S.
293|--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
294|--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
295|--AND IS THEREFORE STORED AS SINGLE PRECISION.
296
297 fmulx %fp0,%fp0 | ...FP0 IS S
298|---HIDE THE NEXT TWO WHILE WAITING FOR FP0
299 fmoved COSB8,%fp2
300 fmoved COSB7,%fp3
301|--FP0 IS NOW READY
302 fmovex %fp0,%fp1
303 fmulx %fp1,%fp1 | ...FP1 IS T
304|--HIDE THE NEXT TWO WHILE WAITING FOR FP1
305 fmovex %fp0,X(%a6) | ...X IS S
306 rorl #1,%d0
307 andil #0x80000000,%d0
308| ...LEAST SIG. BIT OF D0 IN SIGN POSITION
309
310 fmulx %fp1,%fp2 | ...TB8
311|--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
312 eorl %d0,X(%a6) | ...X IS NOW S'= SGN*S
313 andil #0x80000000,%d0
314
315 fmulx %fp1,%fp3 | ...TB7
316|--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
317 oril #0x3F800000,%d0 | ...D0 IS SGN IN SINGLE
318 movel %d0,POSNEG1(%a6)
319
320 faddd COSB6,%fp2 | ...B6+TB8
321 faddd COSB5,%fp3 | ...B5+TB7
322
323 fmulx %fp1,%fp2 | ...T(B6+TB8)
324 fmulx %fp1,%fp3 | ...T(B5+TB7)
325
326 faddd COSB4,%fp2 | ...B4+T(B6+TB8)
327 faddx COSB3,%fp3 | ...B3+T(B5+TB7)
328
329 fmulx %fp1,%fp2 | ...T(B4+T(B6+TB8))
330 fmulx %fp3,%fp1 | ...T(B3+T(B5+TB7))
331
332 faddx COSB2,%fp2 | ...B2+T(B4+T(B6+TB8))
333 fadds COSB1,%fp1 | ...B1+T(B3+T(B5+TB7))
334
335 fmulx %fp2,%fp0 | ...S(B2+T(B4+T(B6+TB8)))
336|--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
337|--FP2 RELEASED.
338
339
340 faddx %fp1,%fp0
341|--FP1 RELEASED
342
343 fmulx X(%a6),%fp0
344
345 fmovel %d1,%FPCR |restore users exceptions
346 fadds POSNEG1(%a6),%fp0 |last inst - possible exception set
347 bra t_frcinx
348
349
350SINBORS:
351|--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
352|--IF |X| < 2**(-40), RETURN X OR 1.
353 cmpil #0x3FFF8000,%d0
354 bgts REDUCEX
355
356
357SINSM:
358 movel ADJN(%a6),%d0
359 cmpil #0,%d0
360 bgts COSTINY
361
362SINTINY:
363 movew #0x0000,XDCARE(%a6) | ...JUST IN CASE
364 fmovel %d1,%FPCR |restore users exceptions
365 fmovex X(%a6),%fp0 |last inst - possible exception set
366 bra t_frcinx
367
368
369COSTINY:
370 fmoves #0x3F800000,%fp0
371
372 fmovel %d1,%FPCR |restore users exceptions
373 fsubs #0x00800000,%fp0 |last inst - possible exception set
374 bra t_frcinx
375
376
377REDUCEX:
378|--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
379|--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
380|--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
381
382 fmovemx %fp2-%fp5,-(%a7) | ...save FP2 through FP5
383 movel %d2,-(%a7)
384 fmoves #0x00000000,%fp1
385|--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
386|--there is a danger of unwanted overflow in first LOOP iteration. In this
387|--case, reduce argument by one remainder step to make subsequent reduction
388|--safe.
389 cmpil #0x7ffeffff,%d0 |is argument dangerously large?
390 bnes LOOP
391 movel #0x7ffe0000,FP_SCR2(%a6) |yes
392| ;create 2**16383*PI/2
393 movel #0xc90fdaa2,FP_SCR2+4(%a6)
394 clrl FP_SCR2+8(%a6)
395 ftstx %fp0 |test sign of argument
396 movel #0x7fdc0000,FP_SCR3(%a6) |create low half of 2**16383*
397| ;PI/2 at FP_SCR3
398 movel #0x85a308d3,FP_SCR3+4(%a6)
399 clrl FP_SCR3+8(%a6)
400 fblt red_neg
401 orw #0x8000,FP_SCR2(%a6) |positive arg
402 orw #0x8000,FP_SCR3(%a6)
403red_neg:
404 faddx FP_SCR2(%a6),%fp0 |high part of reduction is exact
405 fmovex %fp0,%fp1 |save high result in fp1
406 faddx FP_SCR3(%a6),%fp0 |low part of reduction
407 fsubx %fp0,%fp1 |determine low component of result
408 faddx FP_SCR3(%a6),%fp1 |fp0/fp1 are reduced argument.
409
410|--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
411|--integer quotient will be stored in N
412|--Intermediate remainder is 66-bit long; (R,r) in (FP0,FP1)
413
414LOOP:
415 fmovex %fp0,INARG(%a6) | ...+-2**K * F, 1 <= F < 2
416 movew INARG(%a6),%d0
417 movel %d0,%a1 | ...save a copy of D0
418 andil #0x00007FFF,%d0
419 subil #0x00003FFF,%d0 | ...D0 IS K
420 cmpil #28,%d0
421 bles LASTLOOP
422CONTLOOP:
423 subil #27,%d0 | ...D0 IS L := K-27
424 movel #0,ENDFLAG(%a6)
425 bras WORK
426LASTLOOP:
427 clrl %d0 | ...D0 IS L := 0
428 movel #1,ENDFLAG(%a6)
429
430WORK:
431|--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
432|--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
433
434|--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
435|--2**L * (PIby2_1), 2**L * (PIby2_2)
436
437 movel #0x00003FFE,%d2 | ...BIASED EXPO OF 2/PI
438 subl %d0,%d2 | ...BIASED EXPO OF 2**(-L)*(2/PI)
439
440 movel #0xA2F9836E,FP_SCR1+4(%a6)
441 movel #0x4E44152A,FP_SCR1+8(%a6)
442 movew %d2,FP_SCR1(%a6) | ...FP_SCR1 is 2**(-L)*(2/PI)
443
444 fmovex %fp0,%fp2
445 fmulx FP_SCR1(%a6),%fp2
446|--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
447|--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
448|--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
449|--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
450|--US THE DESIRED VALUE IN FLOATING POINT.
451
452|--HIDE SIX CYCLES OF INSTRUCTION
453 movel %a1,%d2
454 swap %d2
455 andil #0x80000000,%d2
456 oril #0x5F000000,%d2 | ...D2 IS SIGN(INARG)*2**63 IN SGL
457 movel %d2,TWOTO63(%a6)
458
459 movel %d0,%d2
460 addil #0x00003FFF,%d2 | ...BIASED EXPO OF 2**L * (PI/2)
461
462|--FP2 IS READY
463 fadds TWOTO63(%a6),%fp2 | ...THE FRACTIONAL PART OF FP1 IS ROUNDED
464
465|--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1 and 2**(L)*Piby2_2
466 movew %d2,FP_SCR2(%a6)
467 clrw FP_SCR2+2(%a6)
468 movel #0xC90FDAA2,FP_SCR2+4(%a6)
469 clrl FP_SCR2+8(%a6) | ...FP_SCR2 is 2**(L) * Piby2_1
470
471|--FP2 IS READY
472 fsubs TWOTO63(%a6),%fp2 | ...FP2 is N
473
474 addil #0x00003FDD,%d0
475 movew %d0,FP_SCR3(%a6)
476 clrw FP_SCR3+2(%a6)
477 movel #0x85A308D3,FP_SCR3+4(%a6)
478 clrl FP_SCR3+8(%a6) | ...FP_SCR3 is 2**(L) * Piby2_2
479
480 movel ENDFLAG(%a6),%d0
481
482|--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
483|--P2 = 2**(L) * Piby2_2
484 fmovex %fp2,%fp4
485 fmulx FP_SCR2(%a6),%fp4 | ...W = N*P1
486 fmovex %fp2,%fp5
487 fmulx FP_SCR3(%a6),%fp5 | ...w = N*P2
488 fmovex %fp4,%fp3
489|--we want P+p = W+w but |p| <= half ulp of P
490|--Then, we need to compute A := R-P and a := r-p
491 faddx %fp5,%fp3 | ...FP3 is P
492 fsubx %fp3,%fp4 | ...W-P
493
494 fsubx %fp3,%fp0 | ...FP0 is A := R - P
495 faddx %fp5,%fp4 | ...FP4 is p = (W-P)+w
496
497 fmovex %fp0,%fp3 | ...FP3 A
498 fsubx %fp4,%fp1 | ...FP1 is a := r - p
499
500|--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
501|--|r| <= half ulp of R.
502 faddx %fp1,%fp0 | ...FP0 is R := A+a
503|--No need to calculate r if this is the last loop
504 cmpil #0,%d0
505 bgt RESTORE
506
507|--Need to calculate r
508 fsubx %fp0,%fp3 | ...A-R
509 faddx %fp3,%fp1 | ...FP1 is r := (A-R)+a
510 bra LOOP
511
512RESTORE:
513 fmovel %fp2,N(%a6)
514 movel (%a7)+,%d2
515 fmovemx (%a7)+,%fp2-%fp5
516
517
518 movel ADJN(%a6),%d0
519 cmpil #4,%d0
520
521 blt SINCONT
522 bras SCCONT
523
524 .global ssincosd
525ssincosd:
526|--SIN AND COS OF X FOR DENORMALIZED X
527
528 fmoves #0x3F800000,%fp1
529 bsr sto_cos |store cosine result
530 bra t_extdnrm
531
532 .global ssincos
533ssincos:
534|--SET ADJN TO 4
535 movel #4,ADJN(%a6)
536
537 fmovex (%a0),%fp0 | ...LOAD INPUT
538
539 movel (%a0),%d0
540 movew 4(%a0),%d0
541 fmovex %fp0,X(%a6)
542 andil #0x7FFFFFFF,%d0 | ...COMPACTIFY X
543
544 cmpil #0x3FD78000,%d0 | ...|X| >= 2**(-40)?
545 bges SCOK1
546 bra SCSM
547
548SCOK1:
549 cmpil #0x4004BC7E,%d0 | ...|X| < 15 PI?
550 blts SCMAIN
551 bra REDUCEX
552
553
554SCMAIN:
555|--THIS IS THE USUAL CASE, |X| <= 15 PI.
556|--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
557 fmovex %fp0,%fp1
558 fmuld TWOBYPI,%fp1 | ...X*2/PI
559
560|--HIDE THE NEXT THREE INSTRUCTIONS
561 lea PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32
562
563
564|--FP1 IS NOW READY
565 fmovel %fp1,N(%a6) | ...CONVERT TO INTEGER
566
567 movel N(%a6),%d0
568 asll #4,%d0
569 addal %d0,%a1 | ...ADDRESS OF N*PIBY2, IN Y1, Y2
570
571 fsubx (%a1)+,%fp0 | ...X-Y1
572 fsubs (%a1),%fp0 | ...FP0 IS R = (X-Y1)-Y2
573
574SCCONT:
575|--continuation point from REDUCEX
576
577|--HIDE THE NEXT TWO
578 movel N(%a6),%d0
579 rorl #1,%d0
580
581 cmpil #0,%d0 | ...D0 < 0 IFF N IS ODD
582 bge NEVEN
583
584NODD:
585|--REGISTERS SAVED SO FAR: D0, A0, FP2.
586
587 fmovex %fp0,RPRIME(%a6)
588 fmulx %fp0,%fp0 | ...FP0 IS S = R*R
589 fmoved SINA7,%fp1 | ...A7
590 fmoved COSB8,%fp2 | ...B8
591 fmulx %fp0,%fp1 | ...SA7
592 movel %d2,-(%a7)
593 movel %d0,%d2
594 fmulx %fp0,%fp2 | ...SB8
595 rorl #1,%d2
596 andil #0x80000000,%d2
597
598 faddd SINA6,%fp1 | ...A6+SA7
599 eorl %d0,%d2
600 andil #0x80000000,%d2
601 faddd COSB7,%fp2 | ...B7+SB8
602
603 fmulx %fp0,%fp1 | ...S(A6+SA7)
604 eorl %d2,RPRIME(%a6)
605 movel (%a7)+,%d2
606 fmulx %fp0,%fp2 | ...S(B7+SB8)
607 rorl #1,%d0
608 andil #0x80000000,%d0
609
610 faddd SINA5,%fp1 | ...A5+S(A6+SA7)
611 movel #0x3F800000,POSNEG1(%a6)
612 eorl %d0,POSNEG1(%a6)
613 faddd COSB6,%fp2 | ...B6+S(B7+SB8)
614
615 fmulx %fp0,%fp1 | ...S(A5+S(A6+SA7))
616 fmulx %fp0,%fp2 | ...S(B6+S(B7+SB8))
617 fmovex %fp0,SPRIME(%a6)
618
619 faddd SINA4,%fp1 | ...A4+S(A5+S(A6+SA7))
620 eorl %d0,SPRIME(%a6)
621 faddd COSB5,%fp2 | ...B5+S(B6+S(B7+SB8))
622
623 fmulx %fp0,%fp1 | ...S(A4+...)
624 fmulx %fp0,%fp2 | ...S(B5+...)
625
626 faddd SINA3,%fp1 | ...A3+S(A4+...)
627 faddd COSB4,%fp2 | ...B4+S(B5+...)
628
629 fmulx %fp0,%fp1 | ...S(A3+...)
630 fmulx %fp0,%fp2 | ...S(B4+...)
631
632 faddx SINA2,%fp1 | ...A2+S(A3+...)
633 faddx COSB3,%fp2 | ...B3+S(B4+...)
634
635 fmulx %fp0,%fp1 | ...S(A2+...)
636 fmulx %fp0,%fp2 | ...S(B3+...)
637
638 faddx SINA1,%fp1 | ...A1+S(A2+...)
639 faddx COSB2,%fp2 | ...B2+S(B3+...)
640
641 fmulx %fp0,%fp1 | ...S(A1+...)
642 fmulx %fp2,%fp0 | ...S(B2+...)
643
644
645
646 fmulx RPRIME(%a6),%fp1 | ...R'S(A1+...)
647 fadds COSB1,%fp0 | ...B1+S(B2...)
648 fmulx SPRIME(%a6),%fp0 | ...S'(B1+S(B2+...))
649
650 movel %d1,-(%sp) |restore users mode & precision
651 andil #0xff,%d1 |mask off all exceptions
652 fmovel %d1,%FPCR
653 faddx RPRIME(%a6),%fp1 | ...COS(X)
654 bsr sto_cos |store cosine result
655 fmovel (%sp)+,%FPCR |restore users exceptions
656 fadds POSNEG1(%a6),%fp0 | ...SIN(X)
657
658 bra t_frcinx
659
660
661NEVEN:
662|--REGISTERS SAVED SO FAR: FP2.
663
664 fmovex %fp0,RPRIME(%a6)
665 fmulx %fp0,%fp0 | ...FP0 IS S = R*R
666 fmoved COSB8,%fp1 | ...B8
667 fmoved SINA7,%fp2 | ...A7
668 fmulx %fp0,%fp1 | ...SB8
669 fmovex %fp0,SPRIME(%a6)
670 fmulx %fp0,%fp2 | ...SA7
671 rorl #1,%d0
672 andil #0x80000000,%d0
673 faddd COSB7,%fp1 | ...B7+SB8
674 faddd SINA6,%fp2 | ...A6+SA7
675 eorl %d0,RPRIME(%a6)
676 eorl %d0,SPRIME(%a6)
677 fmulx %fp0,%fp1 | ...S(B7+SB8)
678 oril #0x3F800000,%d0
679 movel %d0,POSNEG1(%a6)
680 fmulx %fp0,%fp2 | ...S(A6+SA7)
681
682 faddd COSB6,%fp1 | ...B6+S(B7+SB8)
683 faddd SINA5,%fp2 | ...A5+S(A6+SA7)
684
685 fmulx %fp0,%fp1 | ...S(B6+S(B7+SB8))
686 fmulx %fp0,%fp2 | ...S(A5+S(A6+SA7))
687
688 faddd COSB5,%fp1 | ...B5+S(B6+S(B7+SB8))
689 faddd SINA4,%fp2 | ...A4+S(A5+S(A6+SA7))
690
691 fmulx %fp0,%fp1 | ...S(B5+...)
692 fmulx %fp0,%fp2 | ...S(A4+...)
693
694 faddd COSB4,%fp1 | ...B4+S(B5+...)
695 faddd SINA3,%fp2 | ...A3+S(A4+...)
696
697 fmulx %fp0,%fp1 | ...S(B4+...)
698 fmulx %fp0,%fp2 | ...S(A3+...)
699
700 faddx COSB3,%fp1 | ...B3+S(B4+...)
701 faddx SINA2,%fp2 | ...A2+S(A3+...)
702
703 fmulx %fp0,%fp1 | ...S(B3+...)
704 fmulx %fp0,%fp2 | ...S(A2+...)
705
706 faddx COSB2,%fp1 | ...B2+S(B3+...)
707 faddx SINA1,%fp2 | ...A1+S(A2+...)
708
709 fmulx %fp0,%fp1 | ...S(B2+...)
710 fmulx %fp2,%fp0 | ...s(a1+...)
711
712
713
714 fadds COSB1,%fp1 | ...B1+S(B2...)
715 fmulx RPRIME(%a6),%fp0 | ...R'S(A1+...)
716 fmulx SPRIME(%a6),%fp1 | ...S'(B1+S(B2+...))
717
718 movel %d1,-(%sp) |save users mode & precision
719 andil #0xff,%d1 |mask off all exceptions
720 fmovel %d1,%FPCR
721 fadds POSNEG1(%a6),%fp1 | ...COS(X)
722 bsr sto_cos |store cosine result
723 fmovel (%sp)+,%FPCR |restore users exceptions
724 faddx RPRIME(%a6),%fp0 | ...SIN(X)
725
726 bra t_frcinx
727
728SCBORS:
729 cmpil #0x3FFF8000,%d0
730 bgt REDUCEX
731
732
733SCSM:
734 movew #0x0000,XDCARE(%a6)
735 fmoves #0x3F800000,%fp1
736
737 movel %d1,-(%sp) |save users mode & precision
738 andil #0xff,%d1 |mask off all exceptions
739 fmovel %d1,%FPCR
740 fsubs #0x00800000,%fp1
741 bsr sto_cos |store cosine result
742 fmovel (%sp)+,%FPCR |restore users exceptions
743 fmovex X(%a6),%fp0
744 bra t_frcinx
745
746 |end