diff options
Diffstat (limited to 'arch/m68k/fpsp040/satan.S')
-rw-r--r-- | arch/m68k/fpsp040/satan.S | 478 |
1 files changed, 478 insertions, 0 deletions
diff --git a/arch/m68k/fpsp040/satan.S b/arch/m68k/fpsp040/satan.S new file mode 100644 index 000000000000..20dae222d51e --- /dev/null +++ b/arch/m68k/fpsp040/satan.S | |||
@@ -0,0 +1,478 @@ | |||
1 | | | ||
2 | | satan.sa 3.3 12/19/90 | ||
3 | | | ||
4 | | The entry point satan computes the arctangent of an | ||
5 | | input value. satand does the same except the input value is a | ||
6 | | denormalized number. | ||
7 | | | ||
8 | | Input: Double-extended value in memory location pointed to by address | ||
9 | | register a0. | ||
10 | | | ||
11 | | Output: Arctan(X) returned in floating-point register Fp0. | ||
12 | | | ||
13 | | Accuracy and Monotonicity: The returned result is within 2 ulps in | ||
14 | | 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the | ||
15 | | result is subsequently rounded to double precision. The | ||
16 | | result is provably monotonic in double precision. | ||
17 | | | ||
18 | | Speed: The program satan takes approximately 160 cycles for input | ||
19 | | argument X such that 1/16 < |X| < 16. For the other arguments, | ||
20 | | the program will run no worse than 10% slower. | ||
21 | | | ||
22 | | Algorithm: | ||
23 | | Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5. | ||
24 | | | ||
25 | | Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. Note that k = -4, -3,..., or 3. | ||
26 | | Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 significant bits | ||
27 | | of X with a bit-1 attached at the 6-th bit position. Define u | ||
28 | | to be u = (X-F) / (1 + X*F). | ||
29 | | | ||
30 | | Step 3. Approximate arctan(u) by a polynomial poly. | ||
31 | | | ||
32 | | Step 4. Return arctan(F) + poly, arctan(F) is fetched from a table of values | ||
33 | | calculated beforehand. Exit. | ||
34 | | | ||
35 | | Step 5. If |X| >= 16, go to Step 7. | ||
36 | | | ||
37 | | Step 6. Approximate arctan(X) by an odd polynomial in X. Exit. | ||
38 | | | ||
39 | | Step 7. Define X' = -1/X. Approximate arctan(X') by an odd polynomial in X'. | ||
40 | | Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit. | ||
41 | | | ||
42 | |||
43 | | Copyright (C) Motorola, Inc. 1990 | ||
44 | | All Rights Reserved | ||
45 | | | ||
46 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
47 | | The copyright notice above does not evidence any | ||
48 | | actual or intended publication of such source code. | ||
49 | |||
50 | |satan idnt 2,1 | Motorola 040 Floating Point Software Package | ||
51 | |||
52 | |section 8 | ||
53 | |||
54 | #include "fpsp.h" | ||
55 | |||
56 | BOUNDS1: .long 0x3FFB8000,0x4002FFFF | ||
57 | |||
58 | ONE: .long 0x3F800000 | ||
59 | |||
60 | .long 0x00000000 | ||
61 | |||
62 | ATANA3: .long 0xBFF6687E,0x314987D8 | ||
63 | ATANA2: .long 0x4002AC69,0x34A26DB3 | ||
64 | |||
65 | ATANA1: .long 0xBFC2476F,0x4E1DA28E | ||
66 | ATANB6: .long 0x3FB34444,0x7F876989 | ||
67 | |||
68 | ATANB5: .long 0xBFB744EE,0x7FAF45DB | ||
69 | ATANB4: .long 0x3FBC71C6,0x46940220 | ||
70 | |||
71 | ATANB3: .long 0xBFC24924,0x921872F9 | ||
72 | ATANB2: .long 0x3FC99999,0x99998FA9 | ||
73 | |||
74 | ATANB1: .long 0xBFD55555,0x55555555 | ||
75 | ATANC5: .long 0xBFB70BF3,0x98539E6A | ||
76 | |||
77 | ATANC4: .long 0x3FBC7187,0x962D1D7D | ||
78 | ATANC3: .long 0xBFC24924,0x827107B8 | ||
79 | |||
80 | ATANC2: .long 0x3FC99999,0x9996263E | ||
81 | ATANC1: .long 0xBFD55555,0x55555536 | ||
82 | |||
83 | PPIBY2: .long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000 | ||
84 | NPIBY2: .long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000 | ||
85 | PTINY: .long 0x00010000,0x80000000,0x00000000,0x00000000 | ||
86 | NTINY: .long 0x80010000,0x80000000,0x00000000,0x00000000 | ||
87 | |||
88 | ATANTBL: | ||
89 | .long 0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000 | ||
90 | .long 0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000 | ||
91 | .long 0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000 | ||
92 | .long 0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000 | ||
93 | .long 0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000 | ||
94 | .long 0x3FFB0000,0xAB98E943,0x62765619,0x00000000 | ||
95 | .long 0x3FFB0000,0xB389E502,0xF9C59862,0x00000000 | ||
96 | .long 0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000 | ||
97 | .long 0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000 | ||
98 | .long 0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000 | ||
99 | .long 0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000 | ||
100 | .long 0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000 | ||
101 | .long 0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000 | ||
102 | .long 0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000 | ||
103 | .long 0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000 | ||
104 | .long 0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000 | ||
105 | .long 0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000 | ||
106 | .long 0x3FFC0000,0x8B232A08,0x304282D8,0x00000000 | ||
107 | .long 0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000 | ||
108 | .long 0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000 | ||
109 | .long 0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000 | ||
110 | .long 0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000 | ||
111 | .long 0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000 | ||
112 | .long 0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000 | ||
113 | .long 0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000 | ||
114 | .long 0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000 | ||
115 | .long 0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000 | ||
116 | .long 0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000 | ||
117 | .long 0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000 | ||
118 | .long 0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000 | ||
119 | .long 0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000 | ||
120 | .long 0x3FFC0000,0xF7170A28,0xECC06666,0x00000000 | ||
121 | .long 0x3FFD0000,0x812FD288,0x332DAD32,0x00000000 | ||
122 | .long 0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000 | ||
123 | .long 0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000 | ||
124 | .long 0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000 | ||
125 | .long 0x3FFD0000,0x9EB68949,0x3889A227,0x00000000 | ||
126 | .long 0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000 | ||
127 | .long 0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000 | ||
128 | .long 0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000 | ||
129 | .long 0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000 | ||
130 | .long 0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000 | ||
131 | .long 0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000 | ||
132 | .long 0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000 | ||
133 | .long 0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000 | ||
134 | .long 0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000 | ||
135 | .long 0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000 | ||
136 | .long 0x3FFD0000,0xEA2D764F,0x64315989,0x00000000 | ||
137 | .long 0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000 | ||
138 | .long 0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000 | ||
139 | .long 0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000 | ||
140 | .long 0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000 | ||
141 | .long 0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000 | ||
142 | .long 0x3FFE0000,0x97731420,0x365E538C,0x00000000 | ||
143 | .long 0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000 | ||
144 | .long 0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000 | ||
145 | .long 0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000 | ||
146 | .long 0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000 | ||
147 | .long 0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000 | ||
148 | .long 0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000 | ||
149 | .long 0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000 | ||
150 | .long 0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000 | ||
151 | .long 0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000 | ||
152 | .long 0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000 | ||
153 | .long 0x3FFE0000,0xCD000549,0xADEC7159,0x00000000 | ||
154 | .long 0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000 | ||
155 | .long 0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000 | ||
156 | .long 0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000 | ||
157 | .long 0x3FFE0000,0xE8771129,0xC4353259,0x00000000 | ||
158 | .long 0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000 | ||
159 | .long 0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000 | ||
160 | .long 0x3FFE0000,0xF919039D,0x758B8D41,0x00000000 | ||
161 | .long 0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000 | ||
162 | .long 0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000 | ||
163 | .long 0x3FFF0000,0x83889E35,0x49D108E1,0x00000000 | ||
164 | .long 0x3FFF0000,0x859CFA76,0x511D724B,0x00000000 | ||
165 | .long 0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000 | ||
166 | .long 0x3FFF0000,0x89732FD1,0x9557641B,0x00000000 | ||
167 | .long 0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000 | ||
168 | .long 0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000 | ||
169 | .long 0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000 | ||
170 | .long 0x3FFF0000,0x922DA7D7,0x91888487,0x00000000 | ||
171 | .long 0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000 | ||
172 | .long 0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000 | ||
173 | .long 0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000 | ||
174 | .long 0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000 | ||
175 | .long 0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000 | ||
176 | .long 0x3FFF0000,0x9F100575,0x006CC571,0x00000000 | ||
177 | .long 0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000 | ||
178 | .long 0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000 | ||
179 | .long 0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000 | ||
180 | .long 0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000 | ||
181 | .long 0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000 | ||
182 | .long 0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000 | ||
183 | .long 0x3FFF0000,0xA83A5153,0x0956168F,0x00000000 | ||
184 | .long 0x3FFF0000,0xA93A2007,0x7539546E,0x00000000 | ||
185 | .long 0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000 | ||
186 | .long 0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000 | ||
187 | .long 0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000 | ||
188 | .long 0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000 | ||
189 | .long 0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000 | ||
190 | .long 0x3FFF0000,0xB1846515,0x0F71496A,0x00000000 | ||
191 | .long 0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000 | ||
192 | .long 0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000 | ||
193 | .long 0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000 | ||
194 | .long 0x3FFF0000,0xB525529D,0x562246BD,0x00000000 | ||
195 | .long 0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000 | ||
196 | .long 0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000 | ||
197 | .long 0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000 | ||
198 | .long 0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000 | ||
199 | .long 0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000 | ||
200 | .long 0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000 | ||
201 | .long 0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000 | ||
202 | .long 0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000 | ||
203 | .long 0x3FFF0000,0xBB471285,0x7637E17D,0x00000000 | ||
204 | .long 0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000 | ||
205 | .long 0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000 | ||
206 | .long 0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000 | ||
207 | .long 0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000 | ||
208 | .long 0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000 | ||
209 | .long 0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000 | ||
210 | .long 0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000 | ||
211 | .long 0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000 | ||
212 | .long 0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000 | ||
213 | .long 0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000 | ||
214 | .long 0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000 | ||
215 | .long 0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000 | ||
216 | .long 0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000 | ||
217 | |||
218 | .set X,FP_SCR1 | ||
219 | .set XDCARE,X+2 | ||
220 | .set XFRAC,X+4 | ||
221 | .set XFRACLO,X+8 | ||
222 | |||
223 | .set ATANF,FP_SCR2 | ||
224 | .set ATANFHI,ATANF+4 | ||
225 | .set ATANFLO,ATANF+8 | ||
226 | |||
227 | |||
228 | | xref t_frcinx | ||
229 | |xref t_extdnrm | ||
230 | |||
231 | .global satand | ||
232 | satand: | ||
233 | |--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT | ||
234 | |||
235 | bra t_extdnrm | ||
236 | |||
237 | .global satan | ||
238 | satan: | ||
239 | |--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S | ||
240 | |||
241 | fmovex (%a0),%fp0 | ...LOAD INPUT | ||
242 | |||
243 | movel (%a0),%d0 | ||
244 | movew 4(%a0),%d0 | ||
245 | fmovex %fp0,X(%a6) | ||
246 | andil #0x7FFFFFFF,%d0 | ||
247 | |||
248 | cmpil #0x3FFB8000,%d0 | ...|X| >= 1/16? | ||
249 | bges ATANOK1 | ||
250 | bra ATANSM | ||
251 | |||
252 | ATANOK1: | ||
253 | cmpil #0x4002FFFF,%d0 | ...|X| < 16 ? | ||
254 | bles ATANMAIN | ||
255 | bra ATANBIG | ||
256 | |||
257 | |||
258 | |--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE | ||
259 | |--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ). | ||
260 | |--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN | ||
261 | |--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE | ||
262 | |--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS | ||
263 | |--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR | ||
264 | |--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO | ||
265 | |--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE | ||
266 | |--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL | ||
267 | |--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE | ||
268 | |--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION | ||
269 | |--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION | ||
270 | |--WILL INVOLVE A VERY LONG POLYNOMIAL. | ||
271 | |||
272 | |--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS | ||
273 | |--WE CHOSE F TO BE +-2^K * 1.BBBB1 | ||
274 | |--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE | ||
275 | |--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE | ||
276 | |--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS | ||
277 | |-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|). | ||
278 | |||
279 | ATANMAIN: | ||
280 | |||
281 | movew #0x0000,XDCARE(%a6) | ...CLEAN UP X JUST IN CASE | ||
282 | andil #0xF8000000,XFRAC(%a6) | ...FIRST 5 BITS | ||
283 | oril #0x04000000,XFRAC(%a6) | ...SET 6-TH BIT TO 1 | ||
284 | movel #0x00000000,XFRACLO(%a6) | ...LOCATION OF X IS NOW F | ||
285 | |||
286 | fmovex %fp0,%fp1 | ...FP1 IS X | ||
287 | fmulx X(%a6),%fp1 | ...FP1 IS X*F, NOTE THAT X*F > 0 | ||
288 | fsubx X(%a6),%fp0 | ...FP0 IS X-F | ||
289 | fadds #0x3F800000,%fp1 | ...FP1 IS 1 + X*F | ||
290 | fdivx %fp1,%fp0 | ...FP0 IS U = (X-F)/(1+X*F) | ||
291 | |||
292 | |--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|) | ||
293 | |--CREATE ATAN(F) AND STORE IT IN ATANF, AND | ||
294 | |--SAVE REGISTERS FP2. | ||
295 | |||
296 | movel %d2,-(%a7) | ...SAVE d2 TEMPORARILY | ||
297 | movel %d0,%d2 | ...THE EXPO AND 16 BITS OF X | ||
298 | andil #0x00007800,%d0 | ...4 VARYING BITS OF F'S FRACTION | ||
299 | andil #0x7FFF0000,%d2 | ...EXPONENT OF F | ||
300 | subil #0x3FFB0000,%d2 | ...K+4 | ||
301 | asrl #1,%d2 | ||
302 | addl %d2,%d0 | ...THE 7 BITS IDENTIFYING F | ||
303 | asrl #7,%d0 | ...INDEX INTO TBL OF ATAN(|F|) | ||
304 | lea ATANTBL,%a1 | ||
305 | addal %d0,%a1 | ...ADDRESS OF ATAN(|F|) | ||
306 | movel (%a1)+,ATANF(%a6) | ||
307 | movel (%a1)+,ATANFHI(%a6) | ||
308 | movel (%a1)+,ATANFLO(%a6) | ...ATANF IS NOW ATAN(|F|) | ||
309 | movel X(%a6),%d0 | ...LOAD SIGN AND EXPO. AGAIN | ||
310 | andil #0x80000000,%d0 | ...SIGN(F) | ||
311 | orl %d0,ATANF(%a6) | ...ATANF IS NOW SIGN(F)*ATAN(|F|) | ||
312 | movel (%a7)+,%d2 | ...RESTORE d2 | ||
313 | |||
314 | |--THAT'S ALL I HAVE TO DO FOR NOW, | ||
315 | |--BUT ALAS, THE DIVIDE IS STILL CRANKING! | ||
316 | |||
317 | |--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS | ||
318 | |--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U | ||
319 | |--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT. | ||
320 | |--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3)) | ||
321 | |--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3. | ||
322 | |--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT | ||
323 | |--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED | ||
324 | |||
325 | |||
326 | fmovex %fp0,%fp1 | ||
327 | fmulx %fp1,%fp1 | ||
328 | fmoved ATANA3,%fp2 | ||
329 | faddx %fp1,%fp2 | ...A3+V | ||
330 | fmulx %fp1,%fp2 | ...V*(A3+V) | ||
331 | fmulx %fp0,%fp1 | ...U*V | ||
332 | faddd ATANA2,%fp2 | ...A2+V*(A3+V) | ||
333 | fmuld ATANA1,%fp1 | ...A1*U*V | ||
334 | fmulx %fp2,%fp1 | ...A1*U*V*(A2+V*(A3+V)) | ||
335 | |||
336 | faddx %fp1,%fp0 | ...ATAN(U), FP1 RELEASED | ||
337 | fmovel %d1,%FPCR |restore users exceptions | ||
338 | faddx ATANF(%a6),%fp0 | ...ATAN(X) | ||
339 | bra t_frcinx | ||
340 | |||
341 | ATANBORS: | ||
342 | |--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED. | ||
343 | |--FP0 IS X AND |X| <= 1/16 OR |X| >= 16. | ||
344 | cmpil #0x3FFF8000,%d0 | ||
345 | bgt ATANBIG | ...I.E. |X| >= 16 | ||
346 | |||
347 | ATANSM: | ||
348 | |--|X| <= 1/16 | ||
349 | |--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE | ||
350 | |--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6))))) | ||
351 | |--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] ) | ||
352 | |--WHERE Y = X*X, AND Z = Y*Y. | ||
353 | |||
354 | cmpil #0x3FD78000,%d0 | ||
355 | blt ATANTINY | ||
356 | |--COMPUTE POLYNOMIAL | ||
357 | fmulx %fp0,%fp0 | ...FP0 IS Y = X*X | ||
358 | |||
359 | |||
360 | movew #0x0000,XDCARE(%a6) | ||
361 | |||
362 | fmovex %fp0,%fp1 | ||
363 | fmulx %fp1,%fp1 | ...FP1 IS Z = Y*Y | ||
364 | |||
365 | fmoved ATANB6,%fp2 | ||
366 | fmoved ATANB5,%fp3 | ||
367 | |||
368 | fmulx %fp1,%fp2 | ...Z*B6 | ||
369 | fmulx %fp1,%fp3 | ...Z*B5 | ||
370 | |||
371 | faddd ATANB4,%fp2 | ...B4+Z*B6 | ||
372 | faddd ATANB3,%fp3 | ...B3+Z*B5 | ||
373 | |||
374 | fmulx %fp1,%fp2 | ...Z*(B4+Z*B6) | ||
375 | fmulx %fp3,%fp1 | ...Z*(B3+Z*B5) | ||
376 | |||
377 | faddd ATANB2,%fp2 | ...B2+Z*(B4+Z*B6) | ||
378 | faddd ATANB1,%fp1 | ...B1+Z*(B3+Z*B5) | ||
379 | |||
380 | fmulx %fp0,%fp2 | ...Y*(B2+Z*(B4+Z*B6)) | ||
381 | fmulx X(%a6),%fp0 | ...X*Y | ||
382 | |||
383 | faddx %fp2,%fp1 | ...[B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))] | ||
384 | |||
385 | |||
386 | fmulx %fp1,%fp0 | ...X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) | ||
387 | |||
388 | fmovel %d1,%FPCR |restore users exceptions | ||
389 | faddx X(%a6),%fp0 | ||
390 | |||
391 | bra t_frcinx | ||
392 | |||
393 | ATANTINY: | ||
394 | |--|X| < 2^(-40), ATAN(X) = X | ||
395 | movew #0x0000,XDCARE(%a6) | ||
396 | |||
397 | fmovel %d1,%FPCR |restore users exceptions | ||
398 | fmovex X(%a6),%fp0 |last inst - possible exception set | ||
399 | |||
400 | bra t_frcinx | ||
401 | |||
402 | ATANBIG: | ||
403 | |--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE, | ||
404 | |--RETURN SIGN(X)*PI/2 + ATAN(-1/X). | ||
405 | cmpil #0x40638000,%d0 | ||
406 | bgt ATANHUGE | ||
407 | |||
408 | |--APPROXIMATE ATAN(-1/X) BY | ||
409 | |--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X' | ||
410 | |--THIS CAN BE RE-WRITTEN AS | ||
411 | |--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y. | ||
412 | |||
413 | fmoves #0xBF800000,%fp1 | ...LOAD -1 | ||
414 | fdivx %fp0,%fp1 | ...FP1 IS -1/X | ||
415 | |||
416 | |||
417 | |--DIVIDE IS STILL CRANKING | ||
418 | |||
419 | fmovex %fp1,%fp0 | ...FP0 IS X' | ||
420 | fmulx %fp0,%fp0 | ...FP0 IS Y = X'*X' | ||
421 | fmovex %fp1,X(%a6) | ...X IS REALLY X' | ||
422 | |||
423 | fmovex %fp0,%fp1 | ||
424 | fmulx %fp1,%fp1 | ...FP1 IS Z = Y*Y | ||
425 | |||
426 | fmoved ATANC5,%fp3 | ||
427 | fmoved ATANC4,%fp2 | ||
428 | |||
429 | fmulx %fp1,%fp3 | ...Z*C5 | ||
430 | fmulx %fp1,%fp2 | ...Z*B4 | ||
431 | |||
432 | faddd ATANC3,%fp3 | ...C3+Z*C5 | ||
433 | faddd ATANC2,%fp2 | ...C2+Z*C4 | ||
434 | |||
435 | fmulx %fp3,%fp1 | ...Z*(C3+Z*C5), FP3 RELEASED | ||
436 | fmulx %fp0,%fp2 | ...Y*(C2+Z*C4) | ||
437 | |||
438 | faddd ATANC1,%fp1 | ...C1+Z*(C3+Z*C5) | ||
439 | fmulx X(%a6),%fp0 | ...X'*Y | ||
440 | |||
441 | faddx %fp2,%fp1 | ...[Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)] | ||
442 | |||
443 | |||
444 | fmulx %fp1,%fp0 | ...X'*Y*([B1+Z*(B3+Z*B5)] | ||
445 | | ... +[Y*(B2+Z*(B4+Z*B6))]) | ||
446 | faddx X(%a6),%fp0 | ||
447 | |||
448 | fmovel %d1,%FPCR |restore users exceptions | ||
449 | |||
450 | btstb #7,(%a0) | ||
451 | beqs pos_big | ||
452 | |||
453 | neg_big: | ||
454 | faddx NPIBY2,%fp0 | ||
455 | bra t_frcinx | ||
456 | |||
457 | pos_big: | ||
458 | faddx PPIBY2,%fp0 | ||
459 | bra t_frcinx | ||
460 | |||
461 | ATANHUGE: | ||
462 | |--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY | ||
463 | btstb #7,(%a0) | ||
464 | beqs pos_huge | ||
465 | |||
466 | neg_huge: | ||
467 | fmovex NPIBY2,%fp0 | ||
468 | fmovel %d1,%fpcr | ||
469 | fsubx NTINY,%fp0 | ||
470 | bra t_frcinx | ||
471 | |||
472 | pos_huge: | ||
473 | fmovex PPIBY2,%fp0 | ||
474 | fmovel %d1,%fpcr | ||
475 | fsubx PTINY,%fp0 | ||
476 | bra t_frcinx | ||
477 | |||
478 | |end | ||