diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/m68k/fpsp040 |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'arch/m68k/fpsp040')
44 files changed, 16782 insertions, 0 deletions
diff --git a/arch/m68k/fpsp040/Makefile b/arch/m68k/fpsp040/Makefile new file mode 100644 index 000000000000..0214d2f6f8b0 --- /dev/null +++ b/arch/m68k/fpsp040/Makefile | |||
@@ -0,0 +1,16 @@ | |||
1 | # | ||
2 | # Makefile for Linux arch/m68k/fpsp040 source directory | ||
3 | # | ||
4 | |||
5 | obj-y := bindec.o binstr.o decbin.o do_func.o gen_except.o get_op.o \ | ||
6 | kernel_ex.o res_func.o round.o sacos.o sasin.o satan.o satanh.o \ | ||
7 | scosh.o setox.o sgetem.o sint.o slog2.o slogn.o \ | ||
8 | smovecr.o srem_mod.o scale.o \ | ||
9 | ssin.o ssinh.o stan.o stanh.o sto_res.o stwotox.o tbldo.o util.o \ | ||
10 | x_bsun.o x_fline.o x_operr.o x_ovfl.o x_snan.o x_store.o \ | ||
11 | x_unfl.o x_unimp.o x_unsupp.o bugfix.o skeleton.o | ||
12 | |||
13 | EXTRA_AFLAGS := -traditional | ||
14 | EXTRA_LDFLAGS := -x | ||
15 | |||
16 | $(OS_OBJS): fpsp.h | ||
diff --git a/arch/m68k/fpsp040/README b/arch/m68k/fpsp040/README new file mode 100644 index 000000000000..f5749446033e --- /dev/null +++ b/arch/m68k/fpsp040/README | |||
@@ -0,0 +1,30 @@ | |||
1 | |||
2 | MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP | ||
3 | M68000 Hi-Performance Microprocessor Division | ||
4 | M68040 Software Package | ||
5 | |||
6 | M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. | ||
7 | All rights reserved. | ||
8 | |||
9 | THE SOFTWARE is provided on an "AS IS" basis and without warranty. | ||
10 | To the maximum extent permitted by applicable law, | ||
11 | MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, | ||
12 | INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A | ||
13 | PARTICULAR PURPOSE and any warranty against infringement with | ||
14 | regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) | ||
15 | and any accompanying written materials. | ||
16 | |||
17 | To the maximum extent permitted by applicable law, | ||
18 | IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER | ||
19 | (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS | ||
20 | PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR | ||
21 | OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE | ||
22 | SOFTWARE. Motorola assumes no responsibility for the maintenance | ||
23 | and support of the SOFTWARE. | ||
24 | |||
25 | You are hereby granted a copyright license to use, modify, and | ||
26 | distribute the SOFTWARE so long as this entire notice is retained | ||
27 | without alteration in any modified and/or redistributed versions, | ||
28 | and that such modified versions are clearly identified as such. | ||
29 | No licenses are granted by implication, estoppel or otherwise | ||
30 | under any patents or trademarks of Motorola, Inc. | ||
diff --git a/arch/m68k/fpsp040/bindec.S b/arch/m68k/fpsp040/bindec.S new file mode 100644 index 000000000000..3ba446a99a12 --- /dev/null +++ b/arch/m68k/fpsp040/bindec.S | |||
@@ -0,0 +1,920 @@ | |||
1 | | | ||
2 | | bindec.sa 3.4 1/3/91 | ||
3 | | | ||
4 | | bindec | ||
5 | | | ||
6 | | Description: | ||
7 | | Converts an input in extended precision format | ||
8 | | to bcd format. | ||
9 | | | ||
10 | | Input: | ||
11 | | a0 points to the input extended precision value | ||
12 | | value in memory; d0 contains the k-factor sign-extended | ||
13 | | to 32-bits. The input may be either normalized, | ||
14 | | unnormalized, or denormalized. | ||
15 | | | ||
16 | | Output: result in the FP_SCR1 space on the stack. | ||
17 | | | ||
18 | | Saves and Modifies: D2-D7,A2,FP2 | ||
19 | | | ||
20 | | Algorithm: | ||
21 | | | ||
22 | | A1. Set RM and size ext; Set SIGMA = sign of input. | ||
23 | | The k-factor is saved for use in d7. Clear the | ||
24 | | BINDEC_FLG for separating normalized/denormalized | ||
25 | | input. If input is unnormalized or denormalized, | ||
26 | | normalize it. | ||
27 | | | ||
28 | | A2. Set X = abs(input). | ||
29 | | | ||
30 | | A3. Compute ILOG. | ||
31 | | ILOG is the log base 10 of the input value. It is | ||
32 | | approximated by adding e + 0.f when the original | ||
33 | | value is viewed as 2^^e * 1.f in extended precision. | ||
34 | | This value is stored in d6. | ||
35 | | | ||
36 | | A4. Clr INEX bit. | ||
37 | | The operation in A3 above may have set INEX2. | ||
38 | | | ||
39 | | A5. Set ICTR = 0; | ||
40 | | ICTR is a flag used in A13. It must be set before the | ||
41 | | loop entry A6. | ||
42 | | | ||
43 | | A6. Calculate LEN. | ||
44 | | LEN is the number of digits to be displayed. The | ||
45 | | k-factor can dictate either the total number of digits, | ||
46 | | if it is a positive number, or the number of digits | ||
47 | | after the decimal point which are to be included as | ||
48 | | significant. See the 68882 manual for examples. | ||
49 | | If LEN is computed to be greater than 17, set OPERR in | ||
50 | | USER_FPSR. LEN is stored in d4. | ||
51 | | | ||
52 | | A7. Calculate SCALE. | ||
53 | | SCALE is equal to 10^ISCALE, where ISCALE is the number | ||
54 | | of decimal places needed to insure LEN integer digits | ||
55 | | in the output before conversion to bcd. LAMBDA is the | ||
56 | | sign of ISCALE, used in A9. Fp1 contains | ||
57 | | 10^^(abs(ISCALE)) using a rounding mode which is a | ||
58 | | function of the original rounding mode and the signs | ||
59 | | of ISCALE and X. A table is given in the code. | ||
60 | | | ||
61 | | A8. Clr INEX; Force RZ. | ||
62 | | The operation in A3 above may have set INEX2. | ||
63 | | RZ mode is forced for the scaling operation to insure | ||
64 | | only one rounding error. The grs bits are collected in | ||
65 | | the INEX flag for use in A10. | ||
66 | | | ||
67 | | A9. Scale X -> Y. | ||
68 | | The mantissa is scaled to the desired number of | ||
69 | | significant digits. The excess digits are collected | ||
70 | | in INEX2. | ||
71 | | | ||
72 | | A10. Or in INEX. | ||
73 | | If INEX is set, round error occurred. This is | ||
74 | | compensated for by 'or-ing' in the INEX2 flag to | ||
75 | | the lsb of Y. | ||
76 | | | ||
77 | | A11. Restore original FPCR; set size ext. | ||
78 | | Perform FINT operation in the user's rounding mode. | ||
79 | | Keep the size to extended. | ||
80 | | | ||
81 | | A12. Calculate YINT = FINT(Y) according to user's rounding | ||
82 | | mode. The FPSP routine sintd0 is used. The output | ||
83 | | is in fp0. | ||
84 | | | ||
85 | | A13. Check for LEN digits. | ||
86 | | If the int operation results in more than LEN digits, | ||
87 | | or less than LEN -1 digits, adjust ILOG and repeat from | ||
88 | | A6. This test occurs only on the first pass. If the | ||
89 | | result is exactly 10^LEN, decrement ILOG and divide | ||
90 | | the mantissa by 10. | ||
91 | | | ||
92 | | A14. Convert the mantissa to bcd. | ||
93 | | The binstr routine is used to convert the LEN digit | ||
94 | | mantissa to bcd in memory. The input to binstr is | ||
95 | | to be a fraction; i.e. (mantissa)/10^LEN and adjusted | ||
96 | | such that the decimal point is to the left of bit 63. | ||
97 | | The bcd digits are stored in the correct position in | ||
98 | | the final string area in memory. | ||
99 | | | ||
100 | | A15. Convert the exponent to bcd. | ||
101 | | As in A14 above, the exp is converted to bcd and the | ||
102 | | digits are stored in the final string. | ||
103 | | Test the length of the final exponent string. If the | ||
104 | | length is 4, set operr. | ||
105 | | | ||
106 | | A16. Write sign bits to final string. | ||
107 | | | ||
108 | | Implementation Notes: | ||
109 | | | ||
110 | | The registers are used as follows: | ||
111 | | | ||
112 | | d0: scratch; LEN input to binstr | ||
113 | | d1: scratch | ||
114 | | d2: upper 32-bits of mantissa for binstr | ||
115 | | d3: scratch;lower 32-bits of mantissa for binstr | ||
116 | | d4: LEN | ||
117 | | d5: LAMBDA/ICTR | ||
118 | | d6: ILOG | ||
119 | | d7: k-factor | ||
120 | | a0: ptr for original operand/final result | ||
121 | | a1: scratch pointer | ||
122 | | a2: pointer to FP_X; abs(original value) in ext | ||
123 | | fp0: scratch | ||
124 | | fp1: scratch | ||
125 | | fp2: scratch | ||
126 | | F_SCR1: | ||
127 | | F_SCR2: | ||
128 | | L_SCR1: | ||
129 | | L_SCR2: | ||
130 | |||
131 | | Copyright (C) Motorola, Inc. 1990 | ||
132 | | All Rights Reserved | ||
133 | | | ||
134 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
135 | | The copyright notice above does not evidence any | ||
136 | | actual or intended publication of such source code. | ||
137 | |||
138 | |BINDEC idnt 2,1 | Motorola 040 Floating Point Software Package | ||
139 | |||
140 | #include "fpsp.h" | ||
141 | |||
142 | |section 8 | ||
143 | |||
144 | | Constants in extended precision | ||
145 | LOG2: .long 0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000 | ||
146 | LOG2UP1: .long 0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000 | ||
147 | |||
148 | | Constants in single precision | ||
149 | FONE: .long 0x3F800000,0x00000000,0x00000000,0x00000000 | ||
150 | FTWO: .long 0x40000000,0x00000000,0x00000000,0x00000000 | ||
151 | FTEN: .long 0x41200000,0x00000000,0x00000000,0x00000000 | ||
152 | F4933: .long 0x459A2800,0x00000000,0x00000000,0x00000000 | ||
153 | |||
154 | RBDTBL: .byte 0,0,0,0 | ||
155 | .byte 3,3,2,2 | ||
156 | .byte 3,2,2,3 | ||
157 | .byte 2,3,3,2 | ||
158 | |||
159 | |xref binstr | ||
160 | |xref sintdo | ||
161 | |xref ptenrn,ptenrm,ptenrp | ||
162 | |||
163 | .global bindec | ||
164 | .global sc_mul | ||
165 | bindec: | ||
166 | moveml %d2-%d7/%a2,-(%a7) | ||
167 | fmovemx %fp0-%fp2,-(%a7) | ||
168 | |||
169 | | A1. Set RM and size ext. Set SIGMA = sign input; | ||
170 | | The k-factor is saved for use in d7. Clear BINDEC_FLG for | ||
171 | | separating normalized/denormalized input. If the input | ||
172 | | is a denormalized number, set the BINDEC_FLG memory word | ||
173 | | to signal denorm. If the input is unnormalized, normalize | ||
174 | | the input and test for denormalized result. | ||
175 | | | ||
176 | fmovel #rm_mode,%FPCR |set RM and ext | ||
177 | movel (%a0),L_SCR2(%a6) |save exponent for sign check | ||
178 | movel %d0,%d7 |move k-factor to d7 | ||
179 | clrb BINDEC_FLG(%a6) |clr norm/denorm flag | ||
180 | movew STAG(%a6),%d0 |get stag | ||
181 | andiw #0xe000,%d0 |isolate stag bits | ||
182 | beq A2_str |if zero, input is norm | ||
183 | | | ||
184 | | Normalize the denorm | ||
185 | | | ||
186 | un_de_norm: | ||
187 | movew (%a0),%d0 | ||
188 | andiw #0x7fff,%d0 |strip sign of normalized exp | ||
189 | movel 4(%a0),%d1 | ||
190 | movel 8(%a0),%d2 | ||
191 | norm_loop: | ||
192 | subw #1,%d0 | ||
193 | lsll #1,%d2 | ||
194 | roxll #1,%d1 | ||
195 | tstl %d1 | ||
196 | bges norm_loop | ||
197 | | | ||
198 | | Test if the normalized input is denormalized | ||
199 | | | ||
200 | tstw %d0 | ||
201 | bgts pos_exp |if greater than zero, it is a norm | ||
202 | st BINDEC_FLG(%a6) |set flag for denorm | ||
203 | pos_exp: | ||
204 | andiw #0x7fff,%d0 |strip sign of normalized exp | ||
205 | movew %d0,(%a0) | ||
206 | movel %d1,4(%a0) | ||
207 | movel %d2,8(%a0) | ||
208 | |||
209 | | A2. Set X = abs(input). | ||
210 | | | ||
211 | A2_str: | ||
212 | movel (%a0),FP_SCR2(%a6) | move input to work space | ||
213 | movel 4(%a0),FP_SCR2+4(%a6) | move input to work space | ||
214 | movel 8(%a0),FP_SCR2+8(%a6) | move input to work space | ||
215 | andil #0x7fffffff,FP_SCR2(%a6) |create abs(X) | ||
216 | |||
217 | | A3. Compute ILOG. | ||
218 | | ILOG is the log base 10 of the input value. It is approx- | ||
219 | | imated by adding e + 0.f when the original value is viewed | ||
220 | | as 2^^e * 1.f in extended precision. This value is stored | ||
221 | | in d6. | ||
222 | | | ||
223 | | Register usage: | ||
224 | | Input/Output | ||
225 | | d0: k-factor/exponent | ||
226 | | d2: x/x | ||
227 | | d3: x/x | ||
228 | | d4: x/x | ||
229 | | d5: x/x | ||
230 | | d6: x/ILOG | ||
231 | | d7: k-factor/Unchanged | ||
232 | | a0: ptr for original operand/final result | ||
233 | | a1: x/x | ||
234 | | a2: x/x | ||
235 | | fp0: x/float(ILOG) | ||
236 | | fp1: x/x | ||
237 | | fp2: x/x | ||
238 | | F_SCR1:x/x | ||
239 | | F_SCR2:Abs(X)/Abs(X) with $3fff exponent | ||
240 | | L_SCR1:x/x | ||
241 | | L_SCR2:first word of X packed/Unchanged | ||
242 | |||
243 | tstb BINDEC_FLG(%a6) |check for denorm | ||
244 | beqs A3_cont |if clr, continue with norm | ||
245 | movel #-4933,%d6 |force ILOG = -4933 | ||
246 | bras A4_str | ||
247 | A3_cont: | ||
248 | movew FP_SCR2(%a6),%d0 |move exp to d0 | ||
249 | movew #0x3fff,FP_SCR2(%a6) |replace exponent with 0x3fff | ||
250 | fmovex FP_SCR2(%a6),%fp0 |now fp0 has 1.f | ||
251 | subw #0x3fff,%d0 |strip off bias | ||
252 | faddw %d0,%fp0 |add in exp | ||
253 | fsubs FONE,%fp0 |subtract off 1.0 | ||
254 | fbge pos_res |if pos, branch | ||
255 | fmulx LOG2UP1,%fp0 |if neg, mul by LOG2UP1 | ||
256 | fmovel %fp0,%d6 |put ILOG in d6 as a lword | ||
257 | bras A4_str |go move out ILOG | ||
258 | pos_res: | ||
259 | fmulx LOG2,%fp0 |if pos, mul by LOG2 | ||
260 | fmovel %fp0,%d6 |put ILOG in d6 as a lword | ||
261 | |||
262 | |||
263 | | A4. Clr INEX bit. | ||
264 | | The operation in A3 above may have set INEX2. | ||
265 | |||
266 | A4_str: | ||
267 | fmovel #0,%FPSR |zero all of fpsr - nothing needed | ||
268 | |||
269 | |||
270 | | A5. Set ICTR = 0; | ||
271 | | ICTR is a flag used in A13. It must be set before the | ||
272 | | loop entry A6. The lower word of d5 is used for ICTR. | ||
273 | |||
274 | clrw %d5 |clear ICTR | ||
275 | |||
276 | |||
277 | | A6. Calculate LEN. | ||
278 | | LEN is the number of digits to be displayed. The k-factor | ||
279 | | can dictate either the total number of digits, if it is | ||
280 | | a positive number, or the number of digits after the | ||
281 | | original decimal point which are to be included as | ||
282 | | significant. See the 68882 manual for examples. | ||
283 | | If LEN is computed to be greater than 17, set OPERR in | ||
284 | | USER_FPSR. LEN is stored in d4. | ||
285 | | | ||
286 | | Register usage: | ||
287 | | Input/Output | ||
288 | | d0: exponent/Unchanged | ||
289 | | d2: x/x/scratch | ||
290 | | d3: x/x | ||
291 | | d4: exc picture/LEN | ||
292 | | d5: ICTR/Unchanged | ||
293 | | d6: ILOG/Unchanged | ||
294 | | d7: k-factor/Unchanged | ||
295 | | a0: ptr for original operand/final result | ||
296 | | a1: x/x | ||
297 | | a2: x/x | ||
298 | | fp0: float(ILOG)/Unchanged | ||
299 | | fp1: x/x | ||
300 | | fp2: x/x | ||
301 | | F_SCR1:x/x | ||
302 | | F_SCR2:Abs(X) with $3fff exponent/Unchanged | ||
303 | | L_SCR1:x/x | ||
304 | | L_SCR2:first word of X packed/Unchanged | ||
305 | |||
306 | A6_str: | ||
307 | tstl %d7 |branch on sign of k | ||
308 | bles k_neg |if k <= 0, LEN = ILOG + 1 - k | ||
309 | movel %d7,%d4 |if k > 0, LEN = k | ||
310 | bras len_ck |skip to LEN check | ||
311 | k_neg: | ||
312 | movel %d6,%d4 |first load ILOG to d4 | ||
313 | subl %d7,%d4 |subtract off k | ||
314 | addql #1,%d4 |add in the 1 | ||
315 | len_ck: | ||
316 | tstl %d4 |LEN check: branch on sign of LEN | ||
317 | bles LEN_ng |if neg, set LEN = 1 | ||
318 | cmpl #17,%d4 |test if LEN > 17 | ||
319 | bles A7_str |if not, forget it | ||
320 | movel #17,%d4 |set max LEN = 17 | ||
321 | tstl %d7 |if negative, never set OPERR | ||
322 | bles A7_str |if positive, continue | ||
323 | orl #opaop_mask,USER_FPSR(%a6) |set OPERR & AIOP in USER_FPSR | ||
324 | bras A7_str |finished here | ||
325 | LEN_ng: | ||
326 | moveql #1,%d4 |min LEN is 1 | ||
327 | |||
328 | |||
329 | | A7. Calculate SCALE. | ||
330 | | SCALE is equal to 10^ISCALE, where ISCALE is the number | ||
331 | | of decimal places needed to insure LEN integer digits | ||
332 | | in the output before conversion to bcd. LAMBDA is the sign | ||
333 | | of ISCALE, used in A9. Fp1 contains 10^^(abs(ISCALE)) using | ||
334 | | the rounding mode as given in the following table (see | ||
335 | | Coonen, p. 7.23 as ref.; however, the SCALE variable is | ||
336 | | of opposite sign in bindec.sa from Coonen). | ||
337 | | | ||
338 | | Initial USE | ||
339 | | FPCR[6:5] LAMBDA SIGN(X) FPCR[6:5] | ||
340 | | ---------------------------------------------- | ||
341 | | RN 00 0 0 00/0 RN | ||
342 | | RN 00 0 1 00/0 RN | ||
343 | | RN 00 1 0 00/0 RN | ||
344 | | RN 00 1 1 00/0 RN | ||
345 | | RZ 01 0 0 11/3 RP | ||
346 | | RZ 01 0 1 11/3 RP | ||
347 | | RZ 01 1 0 10/2 RM | ||
348 | | RZ 01 1 1 10/2 RM | ||
349 | | RM 10 0 0 11/3 RP | ||
350 | | RM 10 0 1 10/2 RM | ||
351 | | RM 10 1 0 10/2 RM | ||
352 | | RM 10 1 1 11/3 RP | ||
353 | | RP 11 0 0 10/2 RM | ||
354 | | RP 11 0 1 11/3 RP | ||
355 | | RP 11 1 0 11/3 RP | ||
356 | | RP 11 1 1 10/2 RM | ||
357 | | | ||
358 | | Register usage: | ||
359 | | Input/Output | ||
360 | | d0: exponent/scratch - final is 0 | ||
361 | | d2: x/0 or 24 for A9 | ||
362 | | d3: x/scratch - offset ptr into PTENRM array | ||
363 | | d4: LEN/Unchanged | ||
364 | | d5: 0/ICTR:LAMBDA | ||
365 | | d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k)) | ||
366 | | d7: k-factor/Unchanged | ||
367 | | a0: ptr for original operand/final result | ||
368 | | a1: x/ptr to PTENRM array | ||
369 | | a2: x/x | ||
370 | | fp0: float(ILOG)/Unchanged | ||
371 | | fp1: x/10^ISCALE | ||
372 | | fp2: x/x | ||
373 | | F_SCR1:x/x | ||
374 | | F_SCR2:Abs(X) with $3fff exponent/Unchanged | ||
375 | | L_SCR1:x/x | ||
376 | | L_SCR2:first word of X packed/Unchanged | ||
377 | |||
378 | A7_str: | ||
379 | tstl %d7 |test sign of k | ||
380 | bgts k_pos |if pos and > 0, skip this | ||
381 | cmpl %d6,%d7 |test k - ILOG | ||
382 | blts k_pos |if ILOG >= k, skip this | ||
383 | movel %d7,%d6 |if ((k<0) & (ILOG < k)) ILOG = k | ||
384 | k_pos: | ||
385 | movel %d6,%d0 |calc ILOG + 1 - LEN in d0 | ||
386 | addql #1,%d0 |add the 1 | ||
387 | subl %d4,%d0 |sub off LEN | ||
388 | swap %d5 |use upper word of d5 for LAMBDA | ||
389 | clrw %d5 |set it zero initially | ||
390 | clrw %d2 |set up d2 for very small case | ||
391 | tstl %d0 |test sign of ISCALE | ||
392 | bges iscale |if pos, skip next inst | ||
393 | addqw #1,%d5 |if neg, set LAMBDA true | ||
394 | cmpl #0xffffecd4,%d0 |test iscale <= -4908 | ||
395 | bgts no_inf |if false, skip rest | ||
396 | addil #24,%d0 |add in 24 to iscale | ||
397 | movel #24,%d2 |put 24 in d2 for A9 | ||
398 | no_inf: | ||
399 | negl %d0 |and take abs of ISCALE | ||
400 | iscale: | ||
401 | fmoves FONE,%fp1 |init fp1 to 1 | ||
402 | bfextu USER_FPCR(%a6){#26:#2},%d1 |get initial rmode bits | ||
403 | lslw #1,%d1 |put them in bits 2:1 | ||
404 | addw %d5,%d1 |add in LAMBDA | ||
405 | lslw #1,%d1 |put them in bits 3:1 | ||
406 | tstl L_SCR2(%a6) |test sign of original x | ||
407 | bges x_pos |if pos, don't set bit 0 | ||
408 | addql #1,%d1 |if neg, set bit 0 | ||
409 | x_pos: | ||
410 | leal RBDTBL,%a2 |load rbdtbl base | ||
411 | moveb (%a2,%d1),%d3 |load d3 with new rmode | ||
412 | lsll #4,%d3 |put bits in proper position | ||
413 | fmovel %d3,%fpcr |load bits into fpu | ||
414 | lsrl #4,%d3 |put bits in proper position | ||
415 | tstb %d3 |decode new rmode for pten table | ||
416 | bnes not_rn |if zero, it is RN | ||
417 | leal PTENRN,%a1 |load a1 with RN table base | ||
418 | bras rmode |exit decode | ||
419 | not_rn: | ||
420 | lsrb #1,%d3 |get lsb in carry | ||
421 | bccs not_rp |if carry clear, it is RM | ||
422 | leal PTENRP,%a1 |load a1 with RP table base | ||
423 | bras rmode |exit decode | ||
424 | not_rp: | ||
425 | leal PTENRM,%a1 |load a1 with RM table base | ||
426 | rmode: | ||
427 | clrl %d3 |clr table index | ||
428 | e_loop: | ||
429 | lsrl #1,%d0 |shift next bit into carry | ||
430 | bccs e_next |if zero, skip the mul | ||
431 | fmulx (%a1,%d3),%fp1 |mul by 10**(d3_bit_no) | ||
432 | e_next: | ||
433 | addl #12,%d3 |inc d3 to next pwrten table entry | ||
434 | tstl %d0 |test if ISCALE is zero | ||
435 | bnes e_loop |if not, loop | ||
436 | |||
437 | |||
438 | | A8. Clr INEX; Force RZ. | ||
439 | | The operation in A3 above may have set INEX2. | ||
440 | | RZ mode is forced for the scaling operation to insure | ||
441 | | only one rounding error. The grs bits are collected in | ||
442 | | the INEX flag for use in A10. | ||
443 | | | ||
444 | | Register usage: | ||
445 | | Input/Output | ||
446 | |||
447 | fmovel #0,%FPSR |clr INEX | ||
448 | fmovel #rz_mode,%FPCR |set RZ rounding mode | ||
449 | |||
450 | |||
451 | | A9. Scale X -> Y. | ||
452 | | The mantissa is scaled to the desired number of significant | ||
453 | | digits. The excess digits are collected in INEX2. If mul, | ||
454 | | Check d2 for excess 10 exponential value. If not zero, | ||
455 | | the iscale value would have caused the pwrten calculation | ||
456 | | to overflow. Only a negative iscale can cause this, so | ||
457 | | multiply by 10^(d2), which is now only allowed to be 24, | ||
458 | | with a multiply by 10^8 and 10^16, which is exact since | ||
459 | | 10^24 is exact. If the input was denormalized, we must | ||
460 | | create a busy stack frame with the mul command and the | ||
461 | | two operands, and allow the fpu to complete the multiply. | ||
462 | | | ||
463 | | Register usage: | ||
464 | | Input/Output | ||
465 | | d0: FPCR with RZ mode/Unchanged | ||
466 | | d2: 0 or 24/unchanged | ||
467 | | d3: x/x | ||
468 | | d4: LEN/Unchanged | ||
469 | | d5: ICTR:LAMBDA | ||
470 | | d6: ILOG/Unchanged | ||
471 | | d7: k-factor/Unchanged | ||
472 | | a0: ptr for original operand/final result | ||
473 | | a1: ptr to PTENRM array/Unchanged | ||
474 | | a2: x/x | ||
475 | | fp0: float(ILOG)/X adjusted for SCALE (Y) | ||
476 | | fp1: 10^ISCALE/Unchanged | ||
477 | | fp2: x/x | ||
478 | | F_SCR1:x/x | ||
479 | | F_SCR2:Abs(X) with $3fff exponent/Unchanged | ||
480 | | L_SCR1:x/x | ||
481 | | L_SCR2:first word of X packed/Unchanged | ||
482 | |||
483 | A9_str: | ||
484 | fmovex (%a0),%fp0 |load X from memory | ||
485 | fabsx %fp0 |use abs(X) | ||
486 | tstw %d5 |LAMBDA is in lower word of d5 | ||
487 | bne sc_mul |if neg (LAMBDA = 1), scale by mul | ||
488 | fdivx %fp1,%fp0 |calculate X / SCALE -> Y to fp0 | ||
489 | bras A10_st |branch to A10 | ||
490 | |||
491 | sc_mul: | ||
492 | tstb BINDEC_FLG(%a6) |check for denorm | ||
493 | beqs A9_norm |if norm, continue with mul | ||
494 | fmovemx %fp1-%fp1,-(%a7) |load ETEMP with 10^ISCALE | ||
495 | movel 8(%a0),-(%a7) |load FPTEMP with input arg | ||
496 | movel 4(%a0),-(%a7) | ||
497 | movel (%a0),-(%a7) | ||
498 | movel #18,%d3 |load count for busy stack | ||
499 | A9_loop: | ||
500 | clrl -(%a7) |clear lword on stack | ||
501 | dbf %d3,A9_loop | ||
502 | moveb VER_TMP(%a6),(%a7) |write current version number | ||
503 | moveb #BUSY_SIZE-4,1(%a7) |write current busy size | ||
504 | moveb #0x10,0x44(%a7) |set fcefpte[15] bit | ||
505 | movew #0x0023,0x40(%a7) |load cmdreg1b with mul command | ||
506 | moveb #0xfe,0x8(%a7) |load all 1s to cu savepc | ||
507 | frestore (%a7)+ |restore frame to fpu for completion | ||
508 | fmulx 36(%a1),%fp0 |multiply fp0 by 10^8 | ||
509 | fmulx 48(%a1),%fp0 |multiply fp0 by 10^16 | ||
510 | bras A10_st | ||
511 | A9_norm: | ||
512 | tstw %d2 |test for small exp case | ||
513 | beqs A9_con |if zero, continue as normal | ||
514 | fmulx 36(%a1),%fp0 |multiply fp0 by 10^8 | ||
515 | fmulx 48(%a1),%fp0 |multiply fp0 by 10^16 | ||
516 | A9_con: | ||
517 | fmulx %fp1,%fp0 |calculate X * SCALE -> Y to fp0 | ||
518 | |||
519 | |||
520 | | A10. Or in INEX. | ||
521 | | If INEX is set, round error occurred. This is compensated | ||
522 | | for by 'or-ing' in the INEX2 flag to the lsb of Y. | ||
523 | | | ||
524 | | Register usage: | ||
525 | | Input/Output | ||
526 | | d0: FPCR with RZ mode/FPSR with INEX2 isolated | ||
527 | | d2: x/x | ||
528 | | d3: x/x | ||
529 | | d4: LEN/Unchanged | ||
530 | | d5: ICTR:LAMBDA | ||
531 | | d6: ILOG/Unchanged | ||
532 | | d7: k-factor/Unchanged | ||
533 | | a0: ptr for original operand/final result | ||
534 | | a1: ptr to PTENxx array/Unchanged | ||
535 | | a2: x/ptr to FP_SCR2(a6) | ||
536 | | fp0: Y/Y with lsb adjusted | ||
537 | | fp1: 10^ISCALE/Unchanged | ||
538 | | fp2: x/x | ||
539 | |||
540 | A10_st: | ||
541 | fmovel %FPSR,%d0 |get FPSR | ||
542 | fmovex %fp0,FP_SCR2(%a6) |move Y to memory | ||
543 | leal FP_SCR2(%a6),%a2 |load a2 with ptr to FP_SCR2 | ||
544 | btstl #9,%d0 |check if INEX2 set | ||
545 | beqs A11_st |if clear, skip rest | ||
546 | oril #1,8(%a2) |or in 1 to lsb of mantissa | ||
547 | fmovex FP_SCR2(%a6),%fp0 |write adjusted Y back to fpu | ||
548 | |||
549 | |||
550 | | A11. Restore original FPCR; set size ext. | ||
551 | | Perform FINT operation in the user's rounding mode. Keep | ||
552 | | the size to extended. The sintdo entry point in the sint | ||
553 | | routine expects the FPCR value to be in USER_FPCR for | ||
554 | | mode and precision. The original FPCR is saved in L_SCR1. | ||
555 | |||
556 | A11_st: | ||
557 | movel USER_FPCR(%a6),L_SCR1(%a6) |save it for later | ||
558 | andil #0x00000030,USER_FPCR(%a6) |set size to ext, | ||
559 | | ;block exceptions | ||
560 | |||
561 | |||
562 | | A12. Calculate YINT = FINT(Y) according to user's rounding mode. | ||
563 | | The FPSP routine sintd0 is used. The output is in fp0. | ||
564 | | | ||
565 | | Register usage: | ||
566 | | Input/Output | ||
567 | | d0: FPSR with AINEX cleared/FPCR with size set to ext | ||
568 | | d2: x/x/scratch | ||
569 | | d3: x/x | ||
570 | | d4: LEN/Unchanged | ||
571 | | d5: ICTR:LAMBDA/Unchanged | ||
572 | | d6: ILOG/Unchanged | ||
573 | | d7: k-factor/Unchanged | ||
574 | | a0: ptr for original operand/src ptr for sintdo | ||
575 | | a1: ptr to PTENxx array/Unchanged | ||
576 | | a2: ptr to FP_SCR2(a6)/Unchanged | ||
577 | | a6: temp pointer to FP_SCR2(a6) - orig value saved and restored | ||
578 | | fp0: Y/YINT | ||
579 | | fp1: 10^ISCALE/Unchanged | ||
580 | | fp2: x/x | ||
581 | | F_SCR1:x/x | ||
582 | | F_SCR2:Y adjusted for inex/Y with original exponent | ||
583 | | L_SCR1:x/original USER_FPCR | ||
584 | | L_SCR2:first word of X packed/Unchanged | ||
585 | |||
586 | A12_st: | ||
587 | moveml %d0-%d1/%a0-%a1,-(%a7) |save regs used by sintd0 | ||
588 | movel L_SCR1(%a6),-(%a7) | ||
589 | movel L_SCR2(%a6),-(%a7) | ||
590 | leal FP_SCR2(%a6),%a0 |a0 is ptr to F_SCR2(a6) | ||
591 | fmovex %fp0,(%a0) |move Y to memory at FP_SCR2(a6) | ||
592 | tstl L_SCR2(%a6) |test sign of original operand | ||
593 | bges do_fint |if pos, use Y | ||
594 | orl #0x80000000,(%a0) |if neg, use -Y | ||
595 | do_fint: | ||
596 | movel USER_FPSR(%a6),-(%a7) | ||
597 | bsr sintdo |sint routine returns int in fp0 | ||
598 | moveb (%a7),USER_FPSR(%a6) | ||
599 | addl #4,%a7 | ||
600 | movel (%a7)+,L_SCR2(%a6) | ||
601 | movel (%a7)+,L_SCR1(%a6) | ||
602 | moveml (%a7)+,%d0-%d1/%a0-%a1 |restore regs used by sint | ||
603 | movel L_SCR2(%a6),FP_SCR2(%a6) |restore original exponent | ||
604 | movel L_SCR1(%a6),USER_FPCR(%a6) |restore user's FPCR | ||
605 | |||
606 | |||
607 | | A13. Check for LEN digits. | ||
608 | | If the int operation results in more than LEN digits, | ||
609 | | or less than LEN -1 digits, adjust ILOG and repeat from | ||
610 | | A6. This test occurs only on the first pass. If the | ||
611 | | result is exactly 10^LEN, decrement ILOG and divide | ||
612 | | the mantissa by 10. The calculation of 10^LEN cannot | ||
613 | | be inexact, since all powers of ten upto 10^27 are exact | ||
614 | | in extended precision, so the use of a previous power-of-ten | ||
615 | | table will introduce no error. | ||
616 | | | ||
617 | | | ||
618 | | Register usage: | ||
619 | | Input/Output | ||
620 | | d0: FPCR with size set to ext/scratch final = 0 | ||
621 | | d2: x/x | ||
622 | | d3: x/scratch final = x | ||
623 | | d4: LEN/LEN adjusted | ||
624 | | d5: ICTR:LAMBDA/LAMBDA:ICTR | ||
625 | | d6: ILOG/ILOG adjusted | ||
626 | | d7: k-factor/Unchanged | ||
627 | | a0: pointer into memory for packed bcd string formation | ||
628 | | a1: ptr to PTENxx array/Unchanged | ||
629 | | a2: ptr to FP_SCR2(a6)/Unchanged | ||
630 | | fp0: int portion of Y/abs(YINT) adjusted | ||
631 | | fp1: 10^ISCALE/Unchanged | ||
632 | | fp2: x/10^LEN | ||
633 | | F_SCR1:x/x | ||
634 | | F_SCR2:Y with original exponent/Unchanged | ||
635 | | L_SCR1:original USER_FPCR/Unchanged | ||
636 | | L_SCR2:first word of X packed/Unchanged | ||
637 | |||
638 | A13_st: | ||
639 | swap %d5 |put ICTR in lower word of d5 | ||
640 | tstw %d5 |check if ICTR = 0 | ||
641 | bne not_zr |if non-zero, go to second test | ||
642 | | | ||
643 | | Compute 10^(LEN-1) | ||
644 | | | ||
645 | fmoves FONE,%fp2 |init fp2 to 1.0 | ||
646 | movel %d4,%d0 |put LEN in d0 | ||
647 | subql #1,%d0 |d0 = LEN -1 | ||
648 | clrl %d3 |clr table index | ||
649 | l_loop: | ||
650 | lsrl #1,%d0 |shift next bit into carry | ||
651 | bccs l_next |if zero, skip the mul | ||
652 | fmulx (%a1,%d3),%fp2 |mul by 10**(d3_bit_no) | ||
653 | l_next: | ||
654 | addl #12,%d3 |inc d3 to next pwrten table entry | ||
655 | tstl %d0 |test if LEN is zero | ||
656 | bnes l_loop |if not, loop | ||
657 | | | ||
658 | | 10^LEN-1 is computed for this test and A14. If the input was | ||
659 | | denormalized, check only the case in which YINT > 10^LEN. | ||
660 | | | ||
661 | tstb BINDEC_FLG(%a6) |check if input was norm | ||
662 | beqs A13_con |if norm, continue with checking | ||
663 | fabsx %fp0 |take abs of YINT | ||
664 | bra test_2 | ||
665 | | | ||
666 | | Compare abs(YINT) to 10^(LEN-1) and 10^LEN | ||
667 | | | ||
668 | A13_con: | ||
669 | fabsx %fp0 |take abs of YINT | ||
670 | fcmpx %fp2,%fp0 |compare abs(YINT) with 10^(LEN-1) | ||
671 | fbge test_2 |if greater, do next test | ||
672 | subql #1,%d6 |subtract 1 from ILOG | ||
673 | movew #1,%d5 |set ICTR | ||
674 | fmovel #rm_mode,%FPCR |set rmode to RM | ||
675 | fmuls FTEN,%fp2 |compute 10^LEN | ||
676 | bra A6_str |return to A6 and recompute YINT | ||
677 | test_2: | ||
678 | fmuls FTEN,%fp2 |compute 10^LEN | ||
679 | fcmpx %fp2,%fp0 |compare abs(YINT) with 10^LEN | ||
680 | fblt A14_st |if less, all is ok, go to A14 | ||
681 | fbgt fix_ex |if greater, fix and redo | ||
682 | fdivs FTEN,%fp0 |if equal, divide by 10 | ||
683 | addql #1,%d6 | and inc ILOG | ||
684 | bras A14_st | and continue elsewhere | ||
685 | fix_ex: | ||
686 | addql #1,%d6 |increment ILOG by 1 | ||
687 | movew #1,%d5 |set ICTR | ||
688 | fmovel #rm_mode,%FPCR |set rmode to RM | ||
689 | bra A6_str |return to A6 and recompute YINT | ||
690 | | | ||
691 | | Since ICTR <> 0, we have already been through one adjustment, | ||
692 | | and shouldn't have another; this is to check if abs(YINT) = 10^LEN | ||
693 | | 10^LEN is again computed using whatever table is in a1 since the | ||
694 | | value calculated cannot be inexact. | ||
695 | | | ||
696 | not_zr: | ||
697 | fmoves FONE,%fp2 |init fp2 to 1.0 | ||
698 | movel %d4,%d0 |put LEN in d0 | ||
699 | clrl %d3 |clr table index | ||
700 | z_loop: | ||
701 | lsrl #1,%d0 |shift next bit into carry | ||
702 | bccs z_next |if zero, skip the mul | ||
703 | fmulx (%a1,%d3),%fp2 |mul by 10**(d3_bit_no) | ||
704 | z_next: | ||
705 | addl #12,%d3 |inc d3 to next pwrten table entry | ||
706 | tstl %d0 |test if LEN is zero | ||
707 | bnes z_loop |if not, loop | ||
708 | fabsx %fp0 |get abs(YINT) | ||
709 | fcmpx %fp2,%fp0 |check if abs(YINT) = 10^LEN | ||
710 | fbne A14_st |if not, skip this | ||
711 | fdivs FTEN,%fp0 |divide abs(YINT) by 10 | ||
712 | addql #1,%d6 |and inc ILOG by 1 | ||
713 | addql #1,%d4 | and inc LEN | ||
714 | fmuls FTEN,%fp2 | if LEN++, the get 10^^LEN | ||
715 | |||
716 | |||
717 | | A14. Convert the mantissa to bcd. | ||
718 | | The binstr routine is used to convert the LEN digit | ||
719 | | mantissa to bcd in memory. The input to binstr is | ||
720 | | to be a fraction; i.e. (mantissa)/10^LEN and adjusted | ||
721 | | such that the decimal point is to the left of bit 63. | ||
722 | | The bcd digits are stored in the correct position in | ||
723 | | the final string area in memory. | ||
724 | | | ||
725 | | | ||
726 | | Register usage: | ||
727 | | Input/Output | ||
728 | | d0: x/LEN call to binstr - final is 0 | ||
729 | | d1: x/0 | ||
730 | | d2: x/ms 32-bits of mant of abs(YINT) | ||
731 | | d3: x/ls 32-bits of mant of abs(YINT) | ||
732 | | d4: LEN/Unchanged | ||
733 | | d5: ICTR:LAMBDA/LAMBDA:ICTR | ||
734 | | d6: ILOG | ||
735 | | d7: k-factor/Unchanged | ||
736 | | a0: pointer into memory for packed bcd string formation | ||
737 | | /ptr to first mantissa byte in result string | ||
738 | | a1: ptr to PTENxx array/Unchanged | ||
739 | | a2: ptr to FP_SCR2(a6)/Unchanged | ||
740 | | fp0: int portion of Y/abs(YINT) adjusted | ||
741 | | fp1: 10^ISCALE/Unchanged | ||
742 | | fp2: 10^LEN/Unchanged | ||
743 | | F_SCR1:x/Work area for final result | ||
744 | | F_SCR2:Y with original exponent/Unchanged | ||
745 | | L_SCR1:original USER_FPCR/Unchanged | ||
746 | | L_SCR2:first word of X packed/Unchanged | ||
747 | |||
748 | A14_st: | ||
749 | fmovel #rz_mode,%FPCR |force rz for conversion | ||
750 | fdivx %fp2,%fp0 |divide abs(YINT) by 10^LEN | ||
751 | leal FP_SCR1(%a6),%a0 | ||
752 | fmovex %fp0,(%a0) |move abs(YINT)/10^LEN to memory | ||
753 | movel 4(%a0),%d2 |move 2nd word of FP_RES to d2 | ||
754 | movel 8(%a0),%d3 |move 3rd word of FP_RES to d3 | ||
755 | clrl 4(%a0) |zero word 2 of FP_RES | ||
756 | clrl 8(%a0) |zero word 3 of FP_RES | ||
757 | movel (%a0),%d0 |move exponent to d0 | ||
758 | swap %d0 |put exponent in lower word | ||
759 | beqs no_sft |if zero, don't shift | ||
760 | subil #0x3ffd,%d0 |sub bias less 2 to make fract | ||
761 | tstl %d0 |check if > 1 | ||
762 | bgts no_sft |if so, don't shift | ||
763 | negl %d0 |make exp positive | ||
764 | m_loop: | ||
765 | lsrl #1,%d2 |shift d2:d3 right, add 0s | ||
766 | roxrl #1,%d3 |the number of places | ||
767 | dbf %d0,m_loop |given in d0 | ||
768 | no_sft: | ||
769 | tstl %d2 |check for mantissa of zero | ||
770 | bnes no_zr |if not, go on | ||
771 | tstl %d3 |continue zero check | ||
772 | beqs zer_m |if zero, go directly to binstr | ||
773 | no_zr: | ||
774 | clrl %d1 |put zero in d1 for addx | ||
775 | addil #0x00000080,%d3 |inc at bit 7 | ||
776 | addxl %d1,%d2 |continue inc | ||
777 | andil #0xffffff80,%d3 |strip off lsb not used by 882 | ||
778 | zer_m: | ||
779 | movel %d4,%d0 |put LEN in d0 for binstr call | ||
780 | addql #3,%a0 |a0 points to M16 byte in result | ||
781 | bsr binstr |call binstr to convert mant | ||
782 | |||
783 | |||
784 | | A15. Convert the exponent to bcd. | ||
785 | | As in A14 above, the exp is converted to bcd and the | ||
786 | | digits are stored in the final string. | ||
787 | | | ||
788 | | Digits are stored in L_SCR1(a6) on return from BINDEC as: | ||
789 | | | ||
790 | | 32 16 15 0 | ||
791 | | ----------------------------------------- | ||
792 | | | 0 | e3 | e2 | e1 | e4 | X | X | X | | ||
793 | | ----------------------------------------- | ||
794 | | | ||
795 | | And are moved into their proper places in FP_SCR1. If digit e4 | ||
796 | | is non-zero, OPERR is signaled. In all cases, all 4 digits are | ||
797 | | written as specified in the 881/882 manual for packed decimal. | ||
798 | | | ||
799 | | Register usage: | ||
800 | | Input/Output | ||
801 | | d0: x/LEN call to binstr - final is 0 | ||
802 | | d1: x/scratch (0);shift count for final exponent packing | ||
803 | | d2: x/ms 32-bits of exp fraction/scratch | ||
804 | | d3: x/ls 32-bits of exp fraction | ||
805 | | d4: LEN/Unchanged | ||
806 | | d5: ICTR:LAMBDA/LAMBDA:ICTR | ||
807 | | d6: ILOG | ||
808 | | d7: k-factor/Unchanged | ||
809 | | a0: ptr to result string/ptr to L_SCR1(a6) | ||
810 | | a1: ptr to PTENxx array/Unchanged | ||
811 | | a2: ptr to FP_SCR2(a6)/Unchanged | ||
812 | | fp0: abs(YINT) adjusted/float(ILOG) | ||
813 | | fp1: 10^ISCALE/Unchanged | ||
814 | | fp2: 10^LEN/Unchanged | ||
815 | | F_SCR1:Work area for final result/BCD result | ||
816 | | F_SCR2:Y with original exponent/ILOG/10^4 | ||
817 | | L_SCR1:original USER_FPCR/Exponent digits on return from binstr | ||
818 | | L_SCR2:first word of X packed/Unchanged | ||
819 | |||
820 | A15_st: | ||
821 | tstb BINDEC_FLG(%a6) |check for denorm | ||
822 | beqs not_denorm | ||
823 | ftstx %fp0 |test for zero | ||
824 | fbeq den_zero |if zero, use k-factor or 4933 | ||
825 | fmovel %d6,%fp0 |float ILOG | ||
826 | fabsx %fp0 |get abs of ILOG | ||
827 | bras convrt | ||
828 | den_zero: | ||
829 | tstl %d7 |check sign of the k-factor | ||
830 | blts use_ilog |if negative, use ILOG | ||
831 | fmoves F4933,%fp0 |force exponent to 4933 | ||
832 | bras convrt |do it | ||
833 | use_ilog: | ||
834 | fmovel %d6,%fp0 |float ILOG | ||
835 | fabsx %fp0 |get abs of ILOG | ||
836 | bras convrt | ||
837 | not_denorm: | ||
838 | ftstx %fp0 |test for zero | ||
839 | fbne not_zero |if zero, force exponent | ||
840 | fmoves FONE,%fp0 |force exponent to 1 | ||
841 | bras convrt |do it | ||
842 | not_zero: | ||
843 | fmovel %d6,%fp0 |float ILOG | ||
844 | fabsx %fp0 |get abs of ILOG | ||
845 | convrt: | ||
846 | fdivx 24(%a1),%fp0 |compute ILOG/10^4 | ||
847 | fmovex %fp0,FP_SCR2(%a6) |store fp0 in memory | ||
848 | movel 4(%a2),%d2 |move word 2 to d2 | ||
849 | movel 8(%a2),%d3 |move word 3 to d3 | ||
850 | movew (%a2),%d0 |move exp to d0 | ||
851 | beqs x_loop_fin |if zero, skip the shift | ||
852 | subiw #0x3ffd,%d0 |subtract off bias | ||
853 | negw %d0 |make exp positive | ||
854 | x_loop: | ||
855 | lsrl #1,%d2 |shift d2:d3 right | ||
856 | roxrl #1,%d3 |the number of places | ||
857 | dbf %d0,x_loop |given in d0 | ||
858 | x_loop_fin: | ||
859 | clrl %d1 |put zero in d1 for addx | ||
860 | addil #0x00000080,%d3 |inc at bit 6 | ||
861 | addxl %d1,%d2 |continue inc | ||
862 | andil #0xffffff80,%d3 |strip off lsb not used by 882 | ||
863 | movel #4,%d0 |put 4 in d0 for binstr call | ||
864 | leal L_SCR1(%a6),%a0 |a0 is ptr to L_SCR1 for exp digits | ||
865 | bsr binstr |call binstr to convert exp | ||
866 | movel L_SCR1(%a6),%d0 |load L_SCR1 lword to d0 | ||
867 | movel #12,%d1 |use d1 for shift count | ||
868 | lsrl %d1,%d0 |shift d0 right by 12 | ||
869 | bfins %d0,FP_SCR1(%a6){#4:#12} |put e3:e2:e1 in FP_SCR1 | ||
870 | lsrl %d1,%d0 |shift d0 right by 12 | ||
871 | bfins %d0,FP_SCR1(%a6){#16:#4} |put e4 in FP_SCR1 | ||
872 | tstb %d0 |check if e4 is zero | ||
873 | beqs A16_st |if zero, skip rest | ||
874 | orl #opaop_mask,USER_FPSR(%a6) |set OPERR & AIOP in USER_FPSR | ||
875 | |||
876 | |||
877 | | A16. Write sign bits to final string. | ||
878 | | Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG). | ||
879 | | | ||
880 | | Register usage: | ||
881 | | Input/Output | ||
882 | | d0: x/scratch - final is x | ||
883 | | d2: x/x | ||
884 | | d3: x/x | ||
885 | | d4: LEN/Unchanged | ||
886 | | d5: ICTR:LAMBDA/LAMBDA:ICTR | ||
887 | | d6: ILOG/ILOG adjusted | ||
888 | | d7: k-factor/Unchanged | ||
889 | | a0: ptr to L_SCR1(a6)/Unchanged | ||
890 | | a1: ptr to PTENxx array/Unchanged | ||
891 | | a2: ptr to FP_SCR2(a6)/Unchanged | ||
892 | | fp0: float(ILOG)/Unchanged | ||
893 | | fp1: 10^ISCALE/Unchanged | ||
894 | | fp2: 10^LEN/Unchanged | ||
895 | | F_SCR1:BCD result with correct signs | ||
896 | | F_SCR2:ILOG/10^4 | ||
897 | | L_SCR1:Exponent digits on return from binstr | ||
898 | | L_SCR2:first word of X packed/Unchanged | ||
899 | |||
900 | A16_st: | ||
901 | clrl %d0 |clr d0 for collection of signs | ||
902 | andib #0x0f,FP_SCR1(%a6) |clear first nibble of FP_SCR1 | ||
903 | tstl L_SCR2(%a6) |check sign of original mantissa | ||
904 | bges mant_p |if pos, don't set SM | ||
905 | moveql #2,%d0 |move 2 in to d0 for SM | ||
906 | mant_p: | ||
907 | tstl %d6 |check sign of ILOG | ||
908 | bges wr_sgn |if pos, don't set SE | ||
909 | addql #1,%d0 |set bit 0 in d0 for SE | ||
910 | wr_sgn: | ||
911 | bfins %d0,FP_SCR1(%a6){#0:#2} |insert SM and SE into FP_SCR1 | ||
912 | |||
913 | | Clean up and restore all registers used. | ||
914 | |||
915 | fmovel #0,%FPSR |clear possible inex2/ainex bits | ||
916 | fmovemx (%a7)+,%fp0-%fp2 | ||
917 | moveml (%a7)+,%d2-%d7/%a2 | ||
918 | rts | ||
919 | |||
920 | |end | ||
diff --git a/arch/m68k/fpsp040/binstr.S b/arch/m68k/fpsp040/binstr.S new file mode 100644 index 000000000000..d53555c0a2b6 --- /dev/null +++ b/arch/m68k/fpsp040/binstr.S | |||
@@ -0,0 +1,140 @@ | |||
1 | | | ||
2 | | binstr.sa 3.3 12/19/90 | ||
3 | | | ||
4 | | | ||
5 | | Description: Converts a 64-bit binary integer to bcd. | ||
6 | | | ||
7 | | Input: 64-bit binary integer in d2:d3, desired length (LEN) in | ||
8 | | d0, and a pointer to start in memory for bcd characters | ||
9 | | in d0. (This pointer must point to byte 4 of the first | ||
10 | | lword of the packed decimal memory string.) | ||
11 | | | ||
12 | | Output: LEN bcd digits representing the 64-bit integer. | ||
13 | | | ||
14 | | Algorithm: | ||
15 | | The 64-bit binary is assumed to have a decimal point before | ||
16 | | bit 63. The fraction is multiplied by 10 using a mul by 2 | ||
17 | | shift and a mul by 8 shift. The bits shifted out of the | ||
18 | | msb form a decimal digit. This process is iterated until | ||
19 | | LEN digits are formed. | ||
20 | | | ||
21 | | A1. Init d7 to 1. D7 is the byte digit counter, and if 1, the | ||
22 | | digit formed will be assumed the least significant. This is | ||
23 | | to force the first byte formed to have a 0 in the upper 4 bits. | ||
24 | | | ||
25 | | A2. Beginning of the loop: | ||
26 | | Copy the fraction in d2:d3 to d4:d5. | ||
27 | | | ||
28 | | A3. Multiply the fraction in d2:d3 by 8 using bit-field | ||
29 | | extracts and shifts. The three msbs from d2 will go into | ||
30 | | d1. | ||
31 | | | ||
32 | | A4. Multiply the fraction in d4:d5 by 2 using shifts. The msb | ||
33 | | will be collected by the carry. | ||
34 | | | ||
35 | | A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5 | ||
36 | | into d2:d3. D1 will contain the bcd digit formed. | ||
37 | | | ||
38 | | A6. Test d7. If zero, the digit formed is the ms digit. If non- | ||
39 | | zero, it is the ls digit. Put the digit in its place in the | ||
40 | | upper word of d0. If it is the ls digit, write the word | ||
41 | | from d0 to memory. | ||
42 | | | ||
43 | | A7. Decrement d6 (LEN counter) and repeat the loop until zero. | ||
44 | | | ||
45 | | Implementation Notes: | ||
46 | | | ||
47 | | The registers are used as follows: | ||
48 | | | ||
49 | | d0: LEN counter | ||
50 | | d1: temp used to form the digit | ||
51 | | d2: upper 32-bits of fraction for mul by 8 | ||
52 | | d3: lower 32-bits of fraction for mul by 8 | ||
53 | | d4: upper 32-bits of fraction for mul by 2 | ||
54 | | d5: lower 32-bits of fraction for mul by 2 | ||
55 | | d6: temp for bit-field extracts | ||
56 | | d7: byte digit formation word;digit count {0,1} | ||
57 | | a0: pointer into memory for packed bcd string formation | ||
58 | | | ||
59 | |||
60 | | Copyright (C) Motorola, Inc. 1990 | ||
61 | | All Rights Reserved | ||
62 | | | ||
63 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
64 | | The copyright notice above does not evidence any | ||
65 | | actual or intended publication of such source code. | ||
66 | |||
67 | |BINSTR idnt 2,1 | Motorola 040 Floating Point Software Package | ||
68 | |||
69 | |section 8 | ||
70 | |||
71 | #include "fpsp.h" | ||
72 | |||
73 | .global binstr | ||
74 | binstr: | ||
75 | moveml %d0-%d7,-(%a7) | ||
76 | | | ||
77 | | A1: Init d7 | ||
78 | | | ||
79 | moveql #1,%d7 |init d7 for second digit | ||
80 | subql #1,%d0 |for dbf d0 would have LEN+1 passes | ||
81 | | | ||
82 | | A2. Copy d2:d3 to d4:d5. Start loop. | ||
83 | | | ||
84 | loop: | ||
85 | movel %d2,%d4 |copy the fraction before muls | ||
86 | movel %d3,%d5 |to d4:d5 | ||
87 | | | ||
88 | | A3. Multiply d2:d3 by 8; extract msbs into d1. | ||
89 | | | ||
90 | bfextu %d2{#0:#3},%d1 |copy 3 msbs of d2 into d1 | ||
91 | asll #3,%d2 |shift d2 left by 3 places | ||
92 | bfextu %d3{#0:#3},%d6 |copy 3 msbs of d3 into d6 | ||
93 | asll #3,%d3 |shift d3 left by 3 places | ||
94 | orl %d6,%d2 |or in msbs from d3 into d2 | ||
95 | | | ||
96 | | A4. Multiply d4:d5 by 2; add carry out to d1. | ||
97 | | | ||
98 | asll #1,%d5 |mul d5 by 2 | ||
99 | roxll #1,%d4 |mul d4 by 2 | ||
100 | swap %d6 |put 0 in d6 lower word | ||
101 | addxw %d6,%d1 |add in extend from mul by 2 | ||
102 | | | ||
103 | | A5. Add mul by 8 to mul by 2. D1 contains the digit formed. | ||
104 | | | ||
105 | addl %d5,%d3 |add lower 32 bits | ||
106 | nop |ERRATA ; FIX #13 (Rev. 1.2 6/6/90) | ||
107 | addxl %d4,%d2 |add with extend upper 32 bits | ||
108 | nop |ERRATA ; FIX #13 (Rev. 1.2 6/6/90) | ||
109 | addxw %d6,%d1 |add in extend from add to d1 | ||
110 | swap %d6 |with d6 = 0; put 0 in upper word | ||
111 | | | ||
112 | | A6. Test d7 and branch. | ||
113 | | | ||
114 | tstw %d7 |if zero, store digit & to loop | ||
115 | beqs first_d |if non-zero, form byte & write | ||
116 | sec_d: | ||
117 | swap %d7 |bring first digit to word d7b | ||
118 | aslw #4,%d7 |first digit in upper 4 bits d7b | ||
119 | addw %d1,%d7 |add in ls digit to d7b | ||
120 | moveb %d7,(%a0)+ |store d7b byte in memory | ||
121 | swap %d7 |put LEN counter in word d7a | ||
122 | clrw %d7 |set d7a to signal no digits done | ||
123 | dbf %d0,loop |do loop some more! | ||
124 | bras end_bstr |finished, so exit | ||
125 | first_d: | ||
126 | swap %d7 |put digit word in d7b | ||
127 | movew %d1,%d7 |put new digit in d7b | ||
128 | swap %d7 |put LEN counter in word d7a | ||
129 | addqw #1,%d7 |set d7a to signal first digit done | ||
130 | dbf %d0,loop |do loop some more! | ||
131 | swap %d7 |put last digit in string | ||
132 | lslw #4,%d7 |move it to upper 4 bits | ||
133 | moveb %d7,(%a0)+ |store it in memory string | ||
134 | | | ||
135 | | Clean up and return with result in fp0. | ||
136 | | | ||
137 | end_bstr: | ||
138 | moveml (%a7)+,%d0-%d7 | ||
139 | rts | ||
140 | |end | ||
diff --git a/arch/m68k/fpsp040/bugfix.S b/arch/m68k/fpsp040/bugfix.S new file mode 100644 index 000000000000..942c4f6f4fd1 --- /dev/null +++ b/arch/m68k/fpsp040/bugfix.S | |||
@@ -0,0 +1,496 @@ | |||
1 | | | ||
2 | | bugfix.sa 3.2 1/31/91 | ||
3 | | | ||
4 | | | ||
5 | | This file contains workarounds for bugs in the 040 | ||
6 | | relating to the Floating-Point Software Package (FPSP) | ||
7 | | | ||
8 | | Fixes for bugs: 1238 | ||
9 | | | ||
10 | | Bug: 1238 | ||
11 | | | ||
12 | | | ||
13 | | /* The following dirty_bit clear should be left in | ||
14 | | * the handler permanently to improve throughput. | ||
15 | | * The dirty_bits are located at bits [23:16] in | ||
16 | | * longword $08 in the busy frame $4x60. Bit 16 | ||
17 | | * corresponds to FP0, bit 17 corresponds to FP1, | ||
18 | | * and so on. | ||
19 | | */ | ||
20 | | if (E3_exception_just_serviced) { | ||
21 | | dirty_bit[cmdreg3b[9:7]] = 0; | ||
22 | | } | ||
23 | | | ||
24 | | if (fsave_format_version != $40) {goto NOFIX} | ||
25 | | | ||
26 | | if !(E3_exception_just_serviced) {goto NOFIX} | ||
27 | | if (cupc == 0000000) {goto NOFIX} | ||
28 | | if ((cmdreg1b[15:13] != 000) && | ||
29 | | (cmdreg1b[15:10] != 010001)) {goto NOFIX} | ||
30 | | if (((cmdreg1b[15:13] != 000) || ((cmdreg1b[12:10] != cmdreg2b[9:7]) && | ||
31 | | (cmdreg1b[12:10] != cmdreg3b[9:7])) ) && | ||
32 | | ((cmdreg1b[ 9: 7] != cmdreg2b[9:7]) && | ||
33 | | (cmdreg1b[ 9: 7] != cmdreg3b[9:7])) ) {goto NOFIX} | ||
34 | | | ||
35 | | /* Note: for 6d43b or 8d43b, you may want to add the following code | ||
36 | | * to get better coverage. (If you do not insert this code, the part | ||
37 | | * won't lock up; it will simply get the wrong answer.) | ||
38 | | * Do NOT insert this code for 10d43b or later parts. | ||
39 | | * | ||
40 | | * if (fpiarcu == integer stack return address) { | ||
41 | | * cupc = 0000000; | ||
42 | | * goto NOFIX; | ||
43 | | * } | ||
44 | | */ | ||
45 | | | ||
46 | | if (cmdreg1b[15:13] != 000) {goto FIX_OPCLASS2} | ||
47 | | FIX_OPCLASS0: | ||
48 | | if (((cmdreg1b[12:10] == cmdreg2b[9:7]) || | ||
49 | | (cmdreg1b[ 9: 7] == cmdreg2b[9:7])) && | ||
50 | | (cmdreg1b[12:10] != cmdreg3b[9:7]) && | ||
51 | | (cmdreg1b[ 9: 7] != cmdreg3b[9:7])) { /* xu conflict only */ | ||
52 | | /* We execute the following code if there is an | ||
53 | | xu conflict and NOT an nu conflict */ | ||
54 | | | ||
55 | | /* first save some values on the fsave frame */ | ||
56 | | stag_temp = STAG[fsave_frame]; | ||
57 | | cmdreg1b_temp = CMDREG1B[fsave_frame]; | ||
58 | | dtag_temp = DTAG[fsave_frame]; | ||
59 | | ete15_temp = ETE15[fsave_frame]; | ||
60 | | | ||
61 | | CUPC[fsave_frame] = 0000000; | ||
62 | | FRESTORE | ||
63 | | FSAVE | ||
64 | | | ||
65 | | /* If the xu instruction is exceptional, we punt. | ||
66 | | * Otherwise, we would have to include OVFL/UNFL handler | ||
67 | | * code here to get the correct answer. | ||
68 | | */ | ||
69 | | if (fsave_frame_format == $4060) {goto KILL_PROCESS} | ||
70 | | | ||
71 | | fsave_frame = /* build a long frame of all zeros */ | ||
72 | | fsave_frame_format = $4060; /* label it as long frame */ | ||
73 | | | ||
74 | | /* load it with the temps we saved */ | ||
75 | | STAG[fsave_frame] = stag_temp; | ||
76 | | CMDREG1B[fsave_frame] = cmdreg1b_temp; | ||
77 | | DTAG[fsave_frame] = dtag_temp; | ||
78 | | ETE15[fsave_frame] = ete15_temp; | ||
79 | | | ||
80 | | /* Make sure that the cmdreg3b dest reg is not going to | ||
81 | | * be destroyed by a FMOVEM at the end of all this code. | ||
82 | | * If it is, you should move the current value of the reg | ||
83 | | * onto the stack so that the reg will loaded with that value. | ||
84 | | */ | ||
85 | | | ||
86 | | /* All done. Proceed with the code below */ | ||
87 | | } | ||
88 | | | ||
89 | | etemp = FP_reg_[cmdreg1b[12:10]]; | ||
90 | | ete15 = ~ete14; | ||
91 | | cmdreg1b[15:10] = 010010; | ||
92 | | clear(bug_flag_procIDxxxx); | ||
93 | | FRESTORE and return; | ||
94 | | | ||
95 | | | ||
96 | | FIX_OPCLASS2: | ||
97 | | if ((cmdreg1b[9:7] == cmdreg2b[9:7]) && | ||
98 | | (cmdreg1b[9:7] != cmdreg3b[9:7])) { /* xu conflict only */ | ||
99 | | /* We execute the following code if there is an | ||
100 | | xu conflict and NOT an nu conflict */ | ||
101 | | | ||
102 | | /* first save some values on the fsave frame */ | ||
103 | | stag_temp = STAG[fsave_frame]; | ||
104 | | cmdreg1b_temp = CMDREG1B[fsave_frame]; | ||
105 | | dtag_temp = DTAG[fsave_frame]; | ||
106 | | ete15_temp = ETE15[fsave_frame]; | ||
107 | | etemp_temp = ETEMP[fsave_frame]; | ||
108 | | | ||
109 | | CUPC[fsave_frame] = 0000000; | ||
110 | | FRESTORE | ||
111 | | FSAVE | ||
112 | | | ||
113 | | | ||
114 | | /* If the xu instruction is exceptional, we punt. | ||
115 | | * Otherwise, we would have to include OVFL/UNFL handler | ||
116 | | * code here to get the correct answer. | ||
117 | | */ | ||
118 | | if (fsave_frame_format == $4060) {goto KILL_PROCESS} | ||
119 | | | ||
120 | | fsave_frame = /* build a long frame of all zeros */ | ||
121 | | fsave_frame_format = $4060; /* label it as long frame */ | ||
122 | | | ||
123 | | /* load it with the temps we saved */ | ||
124 | | STAG[fsave_frame] = stag_temp; | ||
125 | | CMDREG1B[fsave_frame] = cmdreg1b_temp; | ||
126 | | DTAG[fsave_frame] = dtag_temp; | ||
127 | | ETE15[fsave_frame] = ete15_temp; | ||
128 | | ETEMP[fsave_frame] = etemp_temp; | ||
129 | | | ||
130 | | /* Make sure that the cmdreg3b dest reg is not going to | ||
131 | | * be destroyed by a FMOVEM at the end of all this code. | ||
132 | | * If it is, you should move the current value of the reg | ||
133 | | * onto the stack so that the reg will loaded with that value. | ||
134 | | */ | ||
135 | | | ||
136 | | /* All done. Proceed with the code below */ | ||
137 | | } | ||
138 | | | ||
139 | | if (etemp_exponent == min_sgl) etemp_exponent = min_dbl; | ||
140 | | if (etemp_exponent == max_sgl) etemp_exponent = max_dbl; | ||
141 | | cmdreg1b[15:10] = 010101; | ||
142 | | clear(bug_flag_procIDxxxx); | ||
143 | | FRESTORE and return; | ||
144 | | | ||
145 | | | ||
146 | | NOFIX: | ||
147 | | clear(bug_flag_procIDxxxx); | ||
148 | | FRESTORE and return; | ||
149 | | | ||
150 | |||
151 | |||
152 | | Copyright (C) Motorola, Inc. 1990 | ||
153 | | All Rights Reserved | ||
154 | | | ||
155 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
156 | | The copyright notice above does not evidence any | ||
157 | | actual or intended publication of such source code. | ||
158 | |||
159 | |BUGFIX idnt 2,1 | Motorola 040 Floating Point Software Package | ||
160 | |||
161 | |section 8 | ||
162 | |||
163 | #include "fpsp.h" | ||
164 | |||
165 | |xref fpsp_fmt_error | ||
166 | |||
167 | .global b1238_fix | ||
168 | b1238_fix: | ||
169 | | | ||
170 | | This code is entered only on completion of the handling of an | ||
171 | | nu-generated ovfl, unfl, or inex exception. If the version | ||
172 | | number of the fsave is not $40, this handler is not necessary. | ||
173 | | Simply branch to fix_done and exit normally. | ||
174 | | | ||
175 | cmpib #VER_40,4(%a7) | ||
176 | bne fix_done | ||
177 | | | ||
178 | | Test for cu_savepc equal to zero. If not, this is not a bug | ||
179 | | #1238 case. | ||
180 | | | ||
181 | moveb CU_SAVEPC(%a6),%d0 | ||
182 | andib #0xFE,%d0 | ||
183 | beq fix_done |if zero, this is not bug #1238 | ||
184 | |||
185 | | | ||
186 | | Test the register conflict aspect. If opclass0, check for | ||
187 | | cu src equal to xu dest or equal to nu dest. If so, go to | ||
188 | | op0. Else, or if opclass2, check for cu dest equal to | ||
189 | | xu dest or equal to nu dest. If so, go to tst_opcl. Else, | ||
190 | | exit, it is not the bug case. | ||
191 | | | ||
192 | | Check for opclass 0. If not, go and check for opclass 2 and sgl. | ||
193 | | | ||
194 | movew CMDREG1B(%a6),%d0 | ||
195 | andiw #0xE000,%d0 |strip all but opclass | ||
196 | bne op2sgl |not opclass 0, check op2 | ||
197 | | | ||
198 | | Check for cu and nu register conflict. If one exists, this takes | ||
199 | | priority over a cu and xu conflict. | ||
200 | | | ||
201 | bfextu CMDREG1B(%a6){#3:#3},%d0 |get 1st src | ||
202 | bfextu CMDREG3B(%a6){#6:#3},%d1 |get 3rd dest | ||
203 | cmpb %d0,%d1 | ||
204 | beqs op0 |if equal, continue bugfix | ||
205 | | | ||
206 | | Check for cu dest equal to nu dest. If so, go and fix the | ||
207 | | bug condition. Otherwise, exit. | ||
208 | | | ||
209 | bfextu CMDREG1B(%a6){#6:#3},%d0 |get 1st dest | ||
210 | cmpb %d0,%d1 |cmp 1st dest with 3rd dest | ||
211 | beqs op0 |if equal, continue bugfix | ||
212 | | | ||
213 | | Check for cu and xu register conflict. | ||
214 | | | ||
215 | bfextu CMDREG2B(%a6){#6:#3},%d1 |get 2nd dest | ||
216 | cmpb %d0,%d1 |cmp 1st dest with 2nd dest | ||
217 | beqs op0_xu |if equal, continue bugfix | ||
218 | bfextu CMDREG1B(%a6){#3:#3},%d0 |get 1st src | ||
219 | cmpb %d0,%d1 |cmp 1st src with 2nd dest | ||
220 | beq op0_xu | ||
221 | bne fix_done |if the reg checks fail, exit | ||
222 | | | ||
223 | | We have the opclass 0 situation. | ||
224 | | | ||
225 | op0: | ||
226 | bfextu CMDREG1B(%a6){#3:#3},%d0 |get source register no | ||
227 | movel #7,%d1 | ||
228 | subl %d0,%d1 | ||
229 | clrl %d0 | ||
230 | bsetl %d1,%d0 | ||
231 | fmovemx %d0,ETEMP(%a6) |load source to ETEMP | ||
232 | |||
233 | moveb #0x12,%d0 | ||
234 | bfins %d0,CMDREG1B(%a6){#0:#6} |opclass 2, extended | ||
235 | | | ||
236 | | Set ETEMP exponent bit 15 as the opposite of ete14 | ||
237 | | | ||
238 | btst #6,ETEMP_EX(%a6) |check etemp exponent bit 14 | ||
239 | beq setete15 | ||
240 | bclr #etemp15_bit,STAG(%a6) | ||
241 | bra finish | ||
242 | setete15: | ||
243 | bset #etemp15_bit,STAG(%a6) | ||
244 | bra finish | ||
245 | |||
246 | | | ||
247 | | We have the case in which a conflict exists between the cu src or | ||
248 | | dest and the dest of the xu. We must clear the instruction in | ||
249 | | the cu and restore the state, allowing the instruction in the | ||
250 | | xu to complete. Remember, the instruction in the nu | ||
251 | | was exceptional, and was completed by the appropriate handler. | ||
252 | | If the result of the xu instruction is not exceptional, we can | ||
253 | | restore the instruction from the cu to the frame and continue | ||
254 | | processing the original exception. If the result is also | ||
255 | | exceptional, we choose to kill the process. | ||
256 | | | ||
257 | | Items saved from the stack: | ||
258 | | | ||
259 | | $3c stag - L_SCR1 | ||
260 | | $40 cmdreg1b - L_SCR2 | ||
261 | | $44 dtag - L_SCR3 | ||
262 | | | ||
263 | | The cu savepc is set to zero, and the frame is restored to the | ||
264 | | fpu. | ||
265 | | | ||
266 | op0_xu: | ||
267 | movel STAG(%a6),L_SCR1(%a6) | ||
268 | movel CMDREG1B(%a6),L_SCR2(%a6) | ||
269 | movel DTAG(%a6),L_SCR3(%a6) | ||
270 | andil #0xe0000000,L_SCR3(%a6) | ||
271 | moveb #0,CU_SAVEPC(%a6) | ||
272 | movel (%a7)+,%d1 |save return address from bsr | ||
273 | frestore (%a7)+ | ||
274 | fsave -(%a7) | ||
275 | | | ||
276 | | Check if the instruction which just completed was exceptional. | ||
277 | | | ||
278 | cmpw #0x4060,(%a7) | ||
279 | beq op0_xb | ||
280 | | | ||
281 | | It is necessary to isolate the result of the instruction in the | ||
282 | | xu if it is to fp0 - fp3 and write that value to the USER_FPn | ||
283 | | locations on the stack. The correct destination register is in | ||
284 | | cmdreg2b. | ||
285 | | | ||
286 | bfextu CMDREG2B(%a6){#6:#3},%d0 |get dest register no | ||
287 | cmpil #3,%d0 | ||
288 | bgts op0_xi | ||
289 | beqs op0_fp3 | ||
290 | cmpil #1,%d0 | ||
291 | blts op0_fp0 | ||
292 | beqs op0_fp1 | ||
293 | op0_fp2: | ||
294 | fmovemx %fp2-%fp2,USER_FP2(%a6) | ||
295 | bras op0_xi | ||
296 | op0_fp1: | ||
297 | fmovemx %fp1-%fp1,USER_FP1(%a6) | ||
298 | bras op0_xi | ||
299 | op0_fp0: | ||
300 | fmovemx %fp0-%fp0,USER_FP0(%a6) | ||
301 | bras op0_xi | ||
302 | op0_fp3: | ||
303 | fmovemx %fp3-%fp3,USER_FP3(%a6) | ||
304 | | | ||
305 | | The frame returned is idle. We must build a busy frame to hold | ||
306 | | the cu state information and setup etemp. | ||
307 | | | ||
308 | op0_xi: | ||
309 | movel #22,%d0 |clear 23 lwords | ||
310 | clrl (%a7) | ||
311 | op0_loop: | ||
312 | clrl -(%a7) | ||
313 | dbf %d0,op0_loop | ||
314 | movel #0x40600000,-(%a7) | ||
315 | movel L_SCR1(%a6),STAG(%a6) | ||
316 | movel L_SCR2(%a6),CMDREG1B(%a6) | ||
317 | movel L_SCR3(%a6),DTAG(%a6) | ||
318 | moveb #0x6,CU_SAVEPC(%a6) | ||
319 | movel %d1,-(%a7) |return bsr return address | ||
320 | bfextu CMDREG1B(%a6){#3:#3},%d0 |get source register no | ||
321 | movel #7,%d1 | ||
322 | subl %d0,%d1 | ||
323 | clrl %d0 | ||
324 | bsetl %d1,%d0 | ||
325 | fmovemx %d0,ETEMP(%a6) |load source to ETEMP | ||
326 | |||
327 | moveb #0x12,%d0 | ||
328 | bfins %d0,CMDREG1B(%a6){#0:#6} |opclass 2, extended | ||
329 | | | ||
330 | | Set ETEMP exponent bit 15 as the opposite of ete14 | ||
331 | | | ||
332 | btst #6,ETEMP_EX(%a6) |check etemp exponent bit 14 | ||
333 | beq op0_sete15 | ||
334 | bclr #etemp15_bit,STAG(%a6) | ||
335 | bra finish | ||
336 | op0_sete15: | ||
337 | bset #etemp15_bit,STAG(%a6) | ||
338 | bra finish | ||
339 | |||
340 | | | ||
341 | | The frame returned is busy. It is not possible to reconstruct | ||
342 | | the code sequence to allow completion. We will jump to | ||
343 | | fpsp_fmt_error and allow the kernel to kill the process. | ||
344 | | | ||
345 | op0_xb: | ||
346 | jmp fpsp_fmt_error | ||
347 | |||
348 | | | ||
349 | | Check for opclass 2 and single size. If not both, exit. | ||
350 | | | ||
351 | op2sgl: | ||
352 | movew CMDREG1B(%a6),%d0 | ||
353 | andiw #0xFC00,%d0 |strip all but opclass and size | ||
354 | cmpiw #0x4400,%d0 |test for opclass 2 and size=sgl | ||
355 | bne fix_done |if not, it is not bug 1238 | ||
356 | | | ||
357 | | Check for cu dest equal to nu dest or equal to xu dest, with | ||
358 | | a cu and nu conflict taking priority an nu conflict. If either, | ||
359 | | go and fix the bug condition. Otherwise, exit. | ||
360 | | | ||
361 | bfextu CMDREG1B(%a6){#6:#3},%d0 |get 1st dest | ||
362 | bfextu CMDREG3B(%a6){#6:#3},%d1 |get 3rd dest | ||
363 | cmpb %d0,%d1 |cmp 1st dest with 3rd dest | ||
364 | beq op2_com |if equal, continue bugfix | ||
365 | bfextu CMDREG2B(%a6){#6:#3},%d1 |get 2nd dest | ||
366 | cmpb %d0,%d1 |cmp 1st dest with 2nd dest | ||
367 | bne fix_done |if the reg checks fail, exit | ||
368 | | | ||
369 | | We have the case in which a conflict exists between the cu src or | ||
370 | | dest and the dest of the xu. We must clear the instruction in | ||
371 | | the cu and restore the state, allowing the instruction in the | ||
372 | | xu to complete. Remember, the instruction in the nu | ||
373 | | was exceptional, and was completed by the appropriate handler. | ||
374 | | If the result of the xu instruction is not exceptional, we can | ||
375 | | restore the instruction from the cu to the frame and continue | ||
376 | | processing the original exception. If the result is also | ||
377 | | exceptional, we choose to kill the process. | ||
378 | | | ||
379 | | Items saved from the stack: | ||
380 | | | ||
381 | | $3c stag - L_SCR1 | ||
382 | | $40 cmdreg1b - L_SCR2 | ||
383 | | $44 dtag - L_SCR3 | ||
384 | | etemp - FP_SCR2 | ||
385 | | | ||
386 | | The cu savepc is set to zero, and the frame is restored to the | ||
387 | | fpu. | ||
388 | | | ||
389 | op2_xu: | ||
390 | movel STAG(%a6),L_SCR1(%a6) | ||
391 | movel CMDREG1B(%a6),L_SCR2(%a6) | ||
392 | movel DTAG(%a6),L_SCR3(%a6) | ||
393 | andil #0xe0000000,L_SCR3(%a6) | ||
394 | moveb #0,CU_SAVEPC(%a6) | ||
395 | movel ETEMP(%a6),FP_SCR2(%a6) | ||
396 | movel ETEMP_HI(%a6),FP_SCR2+4(%a6) | ||
397 | movel ETEMP_LO(%a6),FP_SCR2+8(%a6) | ||
398 | movel (%a7)+,%d1 |save return address from bsr | ||
399 | frestore (%a7)+ | ||
400 | fsave -(%a7) | ||
401 | | | ||
402 | | Check if the instruction which just completed was exceptional. | ||
403 | | | ||
404 | cmpw #0x4060,(%a7) | ||
405 | beq op2_xb | ||
406 | | | ||
407 | | It is necessary to isolate the result of the instruction in the | ||
408 | | xu if it is to fp0 - fp3 and write that value to the USER_FPn | ||
409 | | locations on the stack. The correct destination register is in | ||
410 | | cmdreg2b. | ||
411 | | | ||
412 | bfextu CMDREG2B(%a6){#6:#3},%d0 |get dest register no | ||
413 | cmpil #3,%d0 | ||
414 | bgts op2_xi | ||
415 | beqs op2_fp3 | ||
416 | cmpil #1,%d0 | ||
417 | blts op2_fp0 | ||
418 | beqs op2_fp1 | ||
419 | op2_fp2: | ||
420 | fmovemx %fp2-%fp2,USER_FP2(%a6) | ||
421 | bras op2_xi | ||
422 | op2_fp1: | ||
423 | fmovemx %fp1-%fp1,USER_FP1(%a6) | ||
424 | bras op2_xi | ||
425 | op2_fp0: | ||
426 | fmovemx %fp0-%fp0,USER_FP0(%a6) | ||
427 | bras op2_xi | ||
428 | op2_fp3: | ||
429 | fmovemx %fp3-%fp3,USER_FP3(%a6) | ||
430 | | | ||
431 | | The frame returned is idle. We must build a busy frame to hold | ||
432 | | the cu state information and fix up etemp. | ||
433 | | | ||
434 | op2_xi: | ||
435 | movel #22,%d0 |clear 23 lwords | ||
436 | clrl (%a7) | ||
437 | op2_loop: | ||
438 | clrl -(%a7) | ||
439 | dbf %d0,op2_loop | ||
440 | movel #0x40600000,-(%a7) | ||
441 | movel L_SCR1(%a6),STAG(%a6) | ||
442 | movel L_SCR2(%a6),CMDREG1B(%a6) | ||
443 | movel L_SCR3(%a6),DTAG(%a6) | ||
444 | moveb #0x6,CU_SAVEPC(%a6) | ||
445 | movel FP_SCR2(%a6),ETEMP(%a6) | ||
446 | movel FP_SCR2+4(%a6),ETEMP_HI(%a6) | ||
447 | movel FP_SCR2+8(%a6),ETEMP_LO(%a6) | ||
448 | movel %d1,-(%a7) | ||
449 | bra op2_com | ||
450 | |||
451 | | | ||
452 | | We have the opclass 2 single source situation. | ||
453 | | | ||
454 | op2_com: | ||
455 | moveb #0x15,%d0 | ||
456 | bfins %d0,CMDREG1B(%a6){#0:#6} |opclass 2, double | ||
457 | |||
458 | cmpw #0x407F,ETEMP_EX(%a6) |single +max | ||
459 | bnes case2 | ||
460 | movew #0x43FF,ETEMP_EX(%a6) |to double +max | ||
461 | bra finish | ||
462 | case2: | ||
463 | cmpw #0xC07F,ETEMP_EX(%a6) |single -max | ||
464 | bnes case3 | ||
465 | movew #0xC3FF,ETEMP_EX(%a6) |to double -max | ||
466 | bra finish | ||
467 | case3: | ||
468 | cmpw #0x3F80,ETEMP_EX(%a6) |single +min | ||
469 | bnes case4 | ||
470 | movew #0x3C00,ETEMP_EX(%a6) |to double +min | ||
471 | bra finish | ||
472 | case4: | ||
473 | cmpw #0xBF80,ETEMP_EX(%a6) |single -min | ||
474 | bne fix_done | ||
475 | movew #0xBC00,ETEMP_EX(%a6) |to double -min | ||
476 | bra finish | ||
477 | | | ||
478 | | The frame returned is busy. It is not possible to reconstruct | ||
479 | | the code sequence to allow completion. fpsp_fmt_error causes | ||
480 | | an fline illegal instruction to be executed. | ||
481 | | | ||
482 | | You should replace the jump to fpsp_fmt_error with a jump | ||
483 | | to the entry point used to kill a process. | ||
484 | | | ||
485 | op2_xb: | ||
486 | jmp fpsp_fmt_error | ||
487 | |||
488 | | | ||
489 | | Enter here if the case is not of the situations affected by | ||
490 | | bug #1238, or if the fix is completed, and exit. | ||
491 | | | ||
492 | finish: | ||
493 | fix_done: | ||
494 | rts | ||
495 | |||
496 | |end | ||
diff --git a/arch/m68k/fpsp040/decbin.S b/arch/m68k/fpsp040/decbin.S new file mode 100644 index 000000000000..2160609e328d --- /dev/null +++ b/arch/m68k/fpsp040/decbin.S | |||
@@ -0,0 +1,506 @@ | |||
1 | | | ||
2 | | decbin.sa 3.3 12/19/90 | ||
3 | | | ||
4 | | Description: Converts normalized packed bcd value pointed to by | ||
5 | | register A6 to extended-precision value in FP0. | ||
6 | | | ||
7 | | Input: Normalized packed bcd value in ETEMP(a6). | ||
8 | | | ||
9 | | Output: Exact floating-point representation of the packed bcd value. | ||
10 | | | ||
11 | | Saves and Modifies: D2-D5 | ||
12 | | | ||
13 | | Speed: The program decbin takes ??? cycles to execute. | ||
14 | | | ||
15 | | Object Size: | ||
16 | | | ||
17 | | External Reference(s): None. | ||
18 | | | ||
19 | | Algorithm: | ||
20 | | Expected is a normal bcd (i.e. non-exceptional; all inf, zero, | ||
21 | | and NaN operands are dispatched without entering this routine) | ||
22 | | value in 68881/882 format at location ETEMP(A6). | ||
23 | | | ||
24 | | A1. Convert the bcd exponent to binary by successive adds and muls. | ||
25 | | Set the sign according to SE. Subtract 16 to compensate | ||
26 | | for the mantissa which is to be interpreted as 17 integer | ||
27 | | digits, rather than 1 integer and 16 fraction digits. | ||
28 | | Note: this operation can never overflow. | ||
29 | | | ||
30 | | A2. Convert the bcd mantissa to binary by successive | ||
31 | | adds and muls in FP0. Set the sign according to SM. | ||
32 | | The mantissa digits will be converted with the decimal point | ||
33 | | assumed following the least-significant digit. | ||
34 | | Note: this operation can never overflow. | ||
35 | | | ||
36 | | A3. Count the number of leading/trailing zeros in the | ||
37 | | bcd string. If SE is positive, count the leading zeros; | ||
38 | | if negative, count the trailing zeros. Set the adjusted | ||
39 | | exponent equal to the exponent from A1 and the zero count | ||
40 | | added if SM = 1 and subtracted if SM = 0. Scale the | ||
41 | | mantissa the equivalent of forcing in the bcd value: | ||
42 | | | ||
43 | | SM = 0 a non-zero digit in the integer position | ||
44 | | SM = 1 a non-zero digit in Mant0, lsd of the fraction | ||
45 | | | ||
46 | | this will insure that any value, regardless of its | ||
47 | | representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted | ||
48 | | consistently. | ||
49 | | | ||
50 | | A4. Calculate the factor 10^exp in FP1 using a table of | ||
51 | | 10^(2^n) values. To reduce the error in forming factors | ||
52 | | greater than 10^27, a directed rounding scheme is used with | ||
53 | | tables rounded to RN, RM, and RP, according to the table | ||
54 | | in the comments of the pwrten section. | ||
55 | | | ||
56 | | A5. Form the final binary number by scaling the mantissa by | ||
57 | | the exponent factor. This is done by multiplying the | ||
58 | | mantissa in FP0 by the factor in FP1 if the adjusted | ||
59 | | exponent sign is positive, and dividing FP0 by FP1 if | ||
60 | | it is negative. | ||
61 | | | ||
62 | | Clean up and return. Check if the final mul or div resulted | ||
63 | | in an inex2 exception. If so, set inex1 in the fpsr and | ||
64 | | check if the inex1 exception is enabled. If so, set d7 upper | ||
65 | | word to $0100. This will signal unimp.sa that an enabled inex1 | ||
66 | | exception occurred. Unimp will fix the stack. | ||
67 | | | ||
68 | |||
69 | | Copyright (C) Motorola, Inc. 1990 | ||
70 | | All Rights Reserved | ||
71 | | | ||
72 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
73 | | The copyright notice above does not evidence any | ||
74 | | actual or intended publication of such source code. | ||
75 | |||
76 | |DECBIN idnt 2,1 | Motorola 040 Floating Point Software Package | ||
77 | |||
78 | |section 8 | ||
79 | |||
80 | #include "fpsp.h" | ||
81 | |||
82 | | | ||
83 | | PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded | ||
84 | | to nearest, minus, and plus, respectively. The tables include | ||
85 | | 10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}. No rounding | ||
86 | | is required until the power is greater than 27, however, all | ||
87 | | tables include the first 5 for ease of indexing. | ||
88 | | | ||
89 | |xref PTENRN | ||
90 | |xref PTENRM | ||
91 | |xref PTENRP | ||
92 | |||
93 | RTABLE: .byte 0,0,0,0 | ||
94 | .byte 2,3,2,3 | ||
95 | .byte 2,3,3,2 | ||
96 | .byte 3,2,2,3 | ||
97 | |||
98 | .global decbin | ||
99 | .global calc_e | ||
100 | .global pwrten | ||
101 | .global calc_m | ||
102 | .global norm | ||
103 | .global ap_st_z | ||
104 | .global ap_st_n | ||
105 | | | ||
106 | .set FNIBS,7 | ||
107 | .set FSTRT,0 | ||
108 | | | ||
109 | .set ESTRT,4 | ||
110 | .set EDIGITS,2 | | ||
111 | | | ||
112 | | Constants in single precision | ||
113 | FZERO: .long 0x00000000 | ||
114 | FONE: .long 0x3F800000 | ||
115 | FTEN: .long 0x41200000 | ||
116 | |||
117 | .set TEN,10 | ||
118 | |||
119 | | | ||
120 | decbin: | ||
121 | | fmovel #0,FPCR ;clr real fpcr | ||
122 | moveml %d2-%d5,-(%a7) | ||
123 | | | ||
124 | | Calculate exponent: | ||
125 | | 1. Copy bcd value in memory for use as a working copy. | ||
126 | | 2. Calculate absolute value of exponent in d1 by mul and add. | ||
127 | | 3. Correct for exponent sign. | ||
128 | | 4. Subtract 16 to compensate for interpreting the mant as all integer digits. | ||
129 | | (i.e., all digits assumed left of the decimal point.) | ||
130 | | | ||
131 | | Register usage: | ||
132 | | | ||
133 | | calc_e: | ||
134 | | (*) d0: temp digit storage | ||
135 | | (*) d1: accumulator for binary exponent | ||
136 | | (*) d2: digit count | ||
137 | | (*) d3: offset pointer | ||
138 | | ( ) d4: first word of bcd | ||
139 | | ( ) a0: pointer to working bcd value | ||
140 | | ( ) a6: pointer to original bcd value | ||
141 | | (*) FP_SCR1: working copy of original bcd value | ||
142 | | (*) L_SCR1: copy of original exponent word | ||
143 | | | ||
144 | calc_e: | ||
145 | movel #EDIGITS,%d2 |# of nibbles (digits) in fraction part | ||
146 | moveql #ESTRT,%d3 |counter to pick up digits | ||
147 | leal FP_SCR1(%a6),%a0 |load tmp bcd storage address | ||
148 | movel ETEMP(%a6),(%a0) |save input bcd value | ||
149 | movel ETEMP_HI(%a6),4(%a0) |save words 2 and 3 | ||
150 | movel ETEMP_LO(%a6),8(%a0) |and work with these | ||
151 | movel (%a0),%d4 |get first word of bcd | ||
152 | clrl %d1 |zero d1 for accumulator | ||
153 | e_gd: | ||
154 | mulul #TEN,%d1 |mul partial product by one digit place | ||
155 | bfextu %d4{%d3:#4},%d0 |get the digit and zero extend into d0 | ||
156 | addl %d0,%d1 |d1 = d1 + d0 | ||
157 | addqb #4,%d3 |advance d3 to the next digit | ||
158 | dbf %d2,e_gd |if we have used all 3 digits, exit loop | ||
159 | btst #30,%d4 |get SE | ||
160 | beqs e_pos |don't negate if pos | ||
161 | negl %d1 |negate before subtracting | ||
162 | e_pos: | ||
163 | subl #16,%d1 |sub to compensate for shift of mant | ||
164 | bges e_save |if still pos, do not neg | ||
165 | negl %d1 |now negative, make pos and set SE | ||
166 | orl #0x40000000,%d4 |set SE in d4, | ||
167 | orl #0x40000000,(%a0) |and in working bcd | ||
168 | e_save: | ||
169 | movel %d1,L_SCR1(%a6) |save exp in memory | ||
170 | | | ||
171 | | | ||
172 | | Calculate mantissa: | ||
173 | | 1. Calculate absolute value of mantissa in fp0 by mul and add. | ||
174 | | 2. Correct for mantissa sign. | ||
175 | | (i.e., all digits assumed left of the decimal point.) | ||
176 | | | ||
177 | | Register usage: | ||
178 | | | ||
179 | | calc_m: | ||
180 | | (*) d0: temp digit storage | ||
181 | | (*) d1: lword counter | ||
182 | | (*) d2: digit count | ||
183 | | (*) d3: offset pointer | ||
184 | | ( ) d4: words 2 and 3 of bcd | ||
185 | | ( ) a0: pointer to working bcd value | ||
186 | | ( ) a6: pointer to original bcd value | ||
187 | | (*) fp0: mantissa accumulator | ||
188 | | ( ) FP_SCR1: working copy of original bcd value | ||
189 | | ( ) L_SCR1: copy of original exponent word | ||
190 | | | ||
191 | calc_m: | ||
192 | moveql #1,%d1 |word counter, init to 1 | ||
193 | fmoves FZERO,%fp0 |accumulator | ||
194 | | | ||
195 | | | ||
196 | | Since the packed number has a long word between the first & second parts, | ||
197 | | get the integer digit then skip down & get the rest of the | ||
198 | | mantissa. We will unroll the loop once. | ||
199 | | | ||
200 | bfextu (%a0){#28:#4},%d0 |integer part is ls digit in long word | ||
201 | faddb %d0,%fp0 |add digit to sum in fp0 | ||
202 | | | ||
203 | | | ||
204 | | Get the rest of the mantissa. | ||
205 | | | ||
206 | loadlw: | ||
207 | movel (%a0,%d1.L*4),%d4 |load mantissa longword into d4 | ||
208 | moveql #FSTRT,%d3 |counter to pick up digits | ||
209 | moveql #FNIBS,%d2 |reset number of digits per a0 ptr | ||
210 | md2b: | ||
211 | fmuls FTEN,%fp0 |fp0 = fp0 * 10 | ||
212 | bfextu %d4{%d3:#4},%d0 |get the digit and zero extend | ||
213 | faddb %d0,%fp0 |fp0 = fp0 + digit | ||
214 | | | ||
215 | | | ||
216 | | If all the digits (8) in that long word have been converted (d2=0), | ||
217 | | then inc d1 (=2) to point to the next long word and reset d3 to 0 | ||
218 | | to initialize the digit offset, and set d2 to 7 for the digit count; | ||
219 | | else continue with this long word. | ||
220 | | | ||
221 | addqb #4,%d3 |advance d3 to the next digit | ||
222 | dbf %d2,md2b |check for last digit in this lw | ||
223 | nextlw: | ||
224 | addql #1,%d1 |inc lw pointer in mantissa | ||
225 | cmpl #2,%d1 |test for last lw | ||
226 | ble loadlw |if not, get last one | ||
227 | |||
228 | | | ||
229 | | Check the sign of the mant and make the value in fp0 the same sign. | ||
230 | | | ||
231 | m_sign: | ||
232 | btst #31,(%a0) |test sign of the mantissa | ||
233 | beq ap_st_z |if clear, go to append/strip zeros | ||
234 | fnegx %fp0 |if set, negate fp0 | ||
235 | |||
236 | | | ||
237 | | Append/strip zeros: | ||
238 | | | ||
239 | | For adjusted exponents which have an absolute value greater than 27*, | ||
240 | | this routine calculates the amount needed to normalize the mantissa | ||
241 | | for the adjusted exponent. That number is subtracted from the exp | ||
242 | | if the exp was positive, and added if it was negative. The purpose | ||
243 | | of this is to reduce the value of the exponent and the possibility | ||
244 | | of error in calculation of pwrten. | ||
245 | | | ||
246 | | 1. Branch on the sign of the adjusted exponent. | ||
247 | | 2p.(positive exp) | ||
248 | | 2. Check M16 and the digits in lwords 2 and 3 in descending order. | ||
249 | | 3. Add one for each zero encountered until a non-zero digit. | ||
250 | | 4. Subtract the count from the exp. | ||
251 | | 5. Check if the exp has crossed zero in #3 above; make the exp abs | ||
252 | | and set SE. | ||
253 | | 6. Multiply the mantissa by 10**count. | ||
254 | | 2n.(negative exp) | ||
255 | | 2. Check the digits in lwords 3 and 2 in descending order. | ||
256 | | 3. Add one for each zero encountered until a non-zero digit. | ||
257 | | 4. Add the count to the exp. | ||
258 | | 5. Check if the exp has crossed zero in #3 above; clear SE. | ||
259 | | 6. Divide the mantissa by 10**count. | ||
260 | | | ||
261 | | *Why 27? If the adjusted exponent is within -28 < expA < 28, than | ||
262 | | any adjustment due to append/strip zeros will drive the resultant | ||
263 | | exponent towards zero. Since all pwrten constants with a power | ||
264 | | of 27 or less are exact, there is no need to use this routine to | ||
265 | | attempt to lessen the resultant exponent. | ||
266 | | | ||
267 | | Register usage: | ||
268 | | | ||
269 | | ap_st_z: | ||
270 | | (*) d0: temp digit storage | ||
271 | | (*) d1: zero count | ||
272 | | (*) d2: digit count | ||
273 | | (*) d3: offset pointer | ||
274 | | ( ) d4: first word of bcd | ||
275 | | (*) d5: lword counter | ||
276 | | ( ) a0: pointer to working bcd value | ||
277 | | ( ) FP_SCR1: working copy of original bcd value | ||
278 | | ( ) L_SCR1: copy of original exponent word | ||
279 | | | ||
280 | | | ||
281 | | First check the absolute value of the exponent to see if this | ||
282 | | routine is necessary. If so, then check the sign of the exponent | ||
283 | | and do append (+) or strip (-) zeros accordingly. | ||
284 | | This section handles a positive adjusted exponent. | ||
285 | | | ||
286 | ap_st_z: | ||
287 | movel L_SCR1(%a6),%d1 |load expA for range test | ||
288 | cmpl #27,%d1 |test is with 27 | ||
289 | ble pwrten |if abs(expA) <28, skip ap/st zeros | ||
290 | btst #30,(%a0) |check sign of exp | ||
291 | bne ap_st_n |if neg, go to neg side | ||
292 | clrl %d1 |zero count reg | ||
293 | movel (%a0),%d4 |load lword 1 to d4 | ||
294 | bfextu %d4{#28:#4},%d0 |get M16 in d0 | ||
295 | bnes ap_p_fx |if M16 is non-zero, go fix exp | ||
296 | addql #1,%d1 |inc zero count | ||
297 | moveql #1,%d5 |init lword counter | ||
298 | movel (%a0,%d5.L*4),%d4 |get lword 2 to d4 | ||
299 | bnes ap_p_cl |if lw 2 is zero, skip it | ||
300 | addql #8,%d1 |and inc count by 8 | ||
301 | addql #1,%d5 |inc lword counter | ||
302 | movel (%a0,%d5.L*4),%d4 |get lword 3 to d4 | ||
303 | ap_p_cl: | ||
304 | clrl %d3 |init offset reg | ||
305 | moveql #7,%d2 |init digit counter | ||
306 | ap_p_gd: | ||
307 | bfextu %d4{%d3:#4},%d0 |get digit | ||
308 | bnes ap_p_fx |if non-zero, go to fix exp | ||
309 | addql #4,%d3 |point to next digit | ||
310 | addql #1,%d1 |inc digit counter | ||
311 | dbf %d2,ap_p_gd |get next digit | ||
312 | ap_p_fx: | ||
313 | movel %d1,%d0 |copy counter to d2 | ||
314 | movel L_SCR1(%a6),%d1 |get adjusted exp from memory | ||
315 | subl %d0,%d1 |subtract count from exp | ||
316 | bges ap_p_fm |if still pos, go to pwrten | ||
317 | negl %d1 |now its neg; get abs | ||
318 | movel (%a0),%d4 |load lword 1 to d4 | ||
319 | orl #0x40000000,%d4 | and set SE in d4 | ||
320 | orl #0x40000000,(%a0) | and in memory | ||
321 | | | ||
322 | | Calculate the mantissa multiplier to compensate for the striping of | ||
323 | | zeros from the mantissa. | ||
324 | | | ||
325 | ap_p_fm: | ||
326 | movel #PTENRN,%a1 |get address of power-of-ten table | ||
327 | clrl %d3 |init table index | ||
328 | fmoves FONE,%fp1 |init fp1 to 1 | ||
329 | moveql #3,%d2 |init d2 to count bits in counter | ||
330 | ap_p_el: | ||
331 | asrl #1,%d0 |shift lsb into carry | ||
332 | bccs ap_p_en |if 1, mul fp1 by pwrten factor | ||
333 | fmulx (%a1,%d3),%fp1 |mul by 10**(d3_bit_no) | ||
334 | ap_p_en: | ||
335 | addl #12,%d3 |inc d3 to next rtable entry | ||
336 | tstl %d0 |check if d0 is zero | ||
337 | bnes ap_p_el |if not, get next bit | ||
338 | fmulx %fp1,%fp0 |mul mantissa by 10**(no_bits_shifted) | ||
339 | bra pwrten |go calc pwrten | ||
340 | | | ||
341 | | This section handles a negative adjusted exponent. | ||
342 | | | ||
343 | ap_st_n: | ||
344 | clrl %d1 |clr counter | ||
345 | moveql #2,%d5 |set up d5 to point to lword 3 | ||
346 | movel (%a0,%d5.L*4),%d4 |get lword 3 | ||
347 | bnes ap_n_cl |if not zero, check digits | ||
348 | subl #1,%d5 |dec d5 to point to lword 2 | ||
349 | addql #8,%d1 |inc counter by 8 | ||
350 | movel (%a0,%d5.L*4),%d4 |get lword 2 | ||
351 | ap_n_cl: | ||
352 | movel #28,%d3 |point to last digit | ||
353 | moveql #7,%d2 |init digit counter | ||
354 | ap_n_gd: | ||
355 | bfextu %d4{%d3:#4},%d0 |get digit | ||
356 | bnes ap_n_fx |if non-zero, go to exp fix | ||
357 | subql #4,%d3 |point to previous digit | ||
358 | addql #1,%d1 |inc digit counter | ||
359 | dbf %d2,ap_n_gd |get next digit | ||
360 | ap_n_fx: | ||
361 | movel %d1,%d0 |copy counter to d0 | ||
362 | movel L_SCR1(%a6),%d1 |get adjusted exp from memory | ||
363 | subl %d0,%d1 |subtract count from exp | ||
364 | bgts ap_n_fm |if still pos, go fix mantissa | ||
365 | negl %d1 |take abs of exp and clr SE | ||
366 | movel (%a0),%d4 |load lword 1 to d4 | ||
367 | andl #0xbfffffff,%d4 | and clr SE in d4 | ||
368 | andl #0xbfffffff,(%a0) | and in memory | ||
369 | | | ||
370 | | Calculate the mantissa multiplier to compensate for the appending of | ||
371 | | zeros to the mantissa. | ||
372 | | | ||
373 | ap_n_fm: | ||
374 | movel #PTENRN,%a1 |get address of power-of-ten table | ||
375 | clrl %d3 |init table index | ||
376 | fmoves FONE,%fp1 |init fp1 to 1 | ||
377 | moveql #3,%d2 |init d2 to count bits in counter | ||
378 | ap_n_el: | ||
379 | asrl #1,%d0 |shift lsb into carry | ||
380 | bccs ap_n_en |if 1, mul fp1 by pwrten factor | ||
381 | fmulx (%a1,%d3),%fp1 |mul by 10**(d3_bit_no) | ||
382 | ap_n_en: | ||
383 | addl #12,%d3 |inc d3 to next rtable entry | ||
384 | tstl %d0 |check if d0 is zero | ||
385 | bnes ap_n_el |if not, get next bit | ||
386 | fdivx %fp1,%fp0 |div mantissa by 10**(no_bits_shifted) | ||
387 | | | ||
388 | | | ||
389 | | Calculate power-of-ten factor from adjusted and shifted exponent. | ||
390 | | | ||
391 | | Register usage: | ||
392 | | | ||
393 | | pwrten: | ||
394 | | (*) d0: temp | ||
395 | | ( ) d1: exponent | ||
396 | | (*) d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp | ||
397 | | (*) d3: FPCR work copy | ||
398 | | ( ) d4: first word of bcd | ||
399 | | (*) a1: RTABLE pointer | ||
400 | | calc_p: | ||
401 | | (*) d0: temp | ||
402 | | ( ) d1: exponent | ||
403 | | (*) d3: PWRTxx table index | ||
404 | | ( ) a0: pointer to working copy of bcd | ||
405 | | (*) a1: PWRTxx pointer | ||
406 | | (*) fp1: power-of-ten accumulator | ||
407 | | | ||
408 | | Pwrten calculates the exponent factor in the selected rounding mode | ||
409 | | according to the following table: | ||
410 | | | ||
411 | | Sign of Mant Sign of Exp Rounding Mode PWRTEN Rounding Mode | ||
412 | | | ||
413 | | ANY ANY RN RN | ||
414 | | | ||
415 | | + + RP RP | ||
416 | | - + RP RM | ||
417 | | + - RP RM | ||
418 | | - - RP RP | ||
419 | | | ||
420 | | + + RM RM | ||
421 | | - + RM RP | ||
422 | | + - RM RP | ||
423 | | - - RM RM | ||
424 | | | ||
425 | | + + RZ RM | ||
426 | | - + RZ RM | ||
427 | | + - RZ RP | ||
428 | | - - RZ RP | ||
429 | | | ||
430 | | | ||
431 | pwrten: | ||
432 | movel USER_FPCR(%a6),%d3 |get user's FPCR | ||
433 | bfextu %d3{#26:#2},%d2 |isolate rounding mode bits | ||
434 | movel (%a0),%d4 |reload 1st bcd word to d4 | ||
435 | asll #2,%d2 |format d2 to be | ||
436 | bfextu %d4{#0:#2},%d0 | {FPCR[6],FPCR[5],SM,SE} | ||
437 | addl %d0,%d2 |in d2 as index into RTABLE | ||
438 | leal RTABLE,%a1 |load rtable base | ||
439 | moveb (%a1,%d2),%d0 |load new rounding bits from table | ||
440 | clrl %d3 |clear d3 to force no exc and extended | ||
441 | bfins %d0,%d3{#26:#2} |stuff new rounding bits in FPCR | ||
442 | fmovel %d3,%FPCR |write new FPCR | ||
443 | asrl #1,%d0 |write correct PTENxx table | ||
444 | bccs not_rp |to a1 | ||
445 | leal PTENRP,%a1 |it is RP | ||
446 | bras calc_p |go to init section | ||
447 | not_rp: | ||
448 | asrl #1,%d0 |keep checking | ||
449 | bccs not_rm | ||
450 | leal PTENRM,%a1 |it is RM | ||
451 | bras calc_p |go to init section | ||
452 | not_rm: | ||
453 | leal PTENRN,%a1 |it is RN | ||
454 | calc_p: | ||
455 | movel %d1,%d0 |copy exp to d0;use d0 | ||
456 | bpls no_neg |if exp is negative, | ||
457 | negl %d0 |invert it | ||
458 | orl #0x40000000,(%a0) |and set SE bit | ||
459 | no_neg: | ||
460 | clrl %d3 |table index | ||
461 | fmoves FONE,%fp1 |init fp1 to 1 | ||
462 | e_loop: | ||
463 | asrl #1,%d0 |shift next bit into carry | ||
464 | bccs e_next |if zero, skip the mul | ||
465 | fmulx (%a1,%d3),%fp1 |mul by 10**(d3_bit_no) | ||
466 | e_next: | ||
467 | addl #12,%d3 |inc d3 to next rtable entry | ||
468 | tstl %d0 |check if d0 is zero | ||
469 | bnes e_loop |not zero, continue shifting | ||
470 | | | ||
471 | | | ||
472 | | Check the sign of the adjusted exp and make the value in fp0 the | ||
473 | | same sign. If the exp was pos then multiply fp1*fp0; | ||
474 | | else divide fp0/fp1. | ||
475 | | | ||
476 | | Register Usage: | ||
477 | | norm: | ||
478 | | ( ) a0: pointer to working bcd value | ||
479 | | (*) fp0: mantissa accumulator | ||
480 | | ( ) fp1: scaling factor - 10**(abs(exp)) | ||
481 | | | ||
482 | norm: | ||
483 | btst #30,(%a0) |test the sign of the exponent | ||
484 | beqs mul |if clear, go to multiply | ||
485 | div: | ||
486 | fdivx %fp1,%fp0 |exp is negative, so divide mant by exp | ||
487 | bras end_dec | ||
488 | mul: | ||
489 | fmulx %fp1,%fp0 |exp is positive, so multiply by exp | ||
490 | | | ||
491 | | | ||
492 | | Clean up and return with result in fp0. | ||
493 | | | ||
494 | | If the final mul/div in decbin incurred an inex exception, | ||
495 | | it will be inex2, but will be reported as inex1 by get_op. | ||
496 | | | ||
497 | end_dec: | ||
498 | fmovel %FPSR,%d0 |get status register | ||
499 | bclrl #inex2_bit+8,%d0 |test for inex2 and clear it | ||
500 | fmovel %d0,%FPSR |return status reg w/o inex2 | ||
501 | beqs no_exc |skip this if no exc | ||
502 | orl #inx1a_mask,USER_FPSR(%a6) |set inex1/ainex | ||
503 | no_exc: | ||
504 | moveml (%a7)+,%d2-%d5 | ||
505 | rts | ||
506 | |end | ||
diff --git a/arch/m68k/fpsp040/do_func.S b/arch/m68k/fpsp040/do_func.S new file mode 100644 index 000000000000..81f6a9856dce --- /dev/null +++ b/arch/m68k/fpsp040/do_func.S | |||
@@ -0,0 +1,559 @@ | |||
1 | | | ||
2 | | do_func.sa 3.4 2/18/91 | ||
3 | | | ||
4 | | Do_func performs the unimplemented operation. The operation | ||
5 | | to be performed is determined from the lower 7 bits of the | ||
6 | | extension word (except in the case of fmovecr and fsincos). | ||
7 | | The opcode and tag bits form an index into a jump table in | ||
8 | | tbldo.sa. Cases of zero, infinity and NaN are handled in | ||
9 | | do_func by forcing the default result. Normalized and | ||
10 | | denormalized (there are no unnormalized numbers at this | ||
11 | | point) are passed onto the emulation code. | ||
12 | | | ||
13 | | CMDREG1B and STAG are extracted from the fsave frame | ||
14 | | and combined to form the table index. The function called | ||
15 | | will start with a0 pointing to the ETEMP operand. Dyadic | ||
16 | | functions can find FPTEMP at -12(a0). | ||
17 | | | ||
18 | | Called functions return their result in fp0. Sincos returns | ||
19 | | sin(x) in fp0 and cos(x) in fp1. | ||
20 | | | ||
21 | |||
22 | | Copyright (C) Motorola, Inc. 1990 | ||
23 | | All Rights Reserved | ||
24 | | | ||
25 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
26 | | The copyright notice above does not evidence any | ||
27 | | actual or intended publication of such source code. | ||
28 | |||
29 | DO_FUNC: |idnt 2,1 | Motorola 040 Floating Point Software Package | ||
30 | |||
31 | |section 8 | ||
32 | |||
33 | #include "fpsp.h" | ||
34 | |||
35 | |xref t_dz2 | ||
36 | |xref t_operr | ||
37 | |xref t_inx2 | ||
38 | |xref t_resdnrm | ||
39 | |xref dst_nan | ||
40 | |xref src_nan | ||
41 | |xref nrm_set | ||
42 | |xref sto_cos | ||
43 | |||
44 | |xref tblpre | ||
45 | |xref slognp1,slogn,slog10,slog2 | ||
46 | |xref slognd,slog10d,slog2d | ||
47 | |xref smod,srem | ||
48 | |xref sscale | ||
49 | |xref smovcr | ||
50 | |||
51 | PONE: .long 0x3fff0000,0x80000000,0x00000000 |+1 | ||
52 | MONE: .long 0xbfff0000,0x80000000,0x00000000 |-1 | ||
53 | PZERO: .long 0x00000000,0x00000000,0x00000000 |+0 | ||
54 | MZERO: .long 0x80000000,0x00000000,0x00000000 |-0 | ||
55 | PINF: .long 0x7fff0000,0x00000000,0x00000000 |+inf | ||
56 | MINF: .long 0xffff0000,0x00000000,0x00000000 |-inf | ||
57 | QNAN: .long 0x7fff0000,0xffffffff,0xffffffff |non-signaling nan | ||
58 | PPIBY2: .long 0x3FFF0000,0xC90FDAA2,0x2168C235 |+PI/2 | ||
59 | MPIBY2: .long 0xbFFF0000,0xC90FDAA2,0x2168C235 |-PI/2 | ||
60 | |||
61 | .global do_func | ||
62 | do_func: | ||
63 | clrb CU_ONLY(%a6) | ||
64 | | | ||
65 | | Check for fmovecr. It does not follow the format of fp gen | ||
66 | | unimplemented instructions. The test is on the upper 6 bits; | ||
67 | | if they are $17, the inst is fmovecr. Call entry smovcr | ||
68 | | directly. | ||
69 | | | ||
70 | bfextu CMDREG1B(%a6){#0:#6},%d0 |get opclass and src fields | ||
71 | cmpil #0x17,%d0 |if op class and size fields are $17, | ||
72 | | ;it is FMOVECR; if not, continue | ||
73 | bnes not_fmovecr | ||
74 | jmp smovcr |fmovecr; jmp directly to emulation | ||
75 | |||
76 | not_fmovecr: | ||
77 | movew CMDREG1B(%a6),%d0 | ||
78 | andl #0x7F,%d0 | ||
79 | cmpil #0x38,%d0 |if the extension is >= $38, | ||
80 | bge serror |it is illegal | ||
81 | bfextu STAG(%a6){#0:#3},%d1 | ||
82 | lsll #3,%d0 |make room for STAG | ||
83 | addl %d1,%d0 |combine for final index into table | ||
84 | leal tblpre,%a1 |start of monster jump table | ||
85 | movel (%a1,%d0.w*4),%a1 |real target address | ||
86 | leal ETEMP(%a6),%a0 |a0 is pointer to src op | ||
87 | movel USER_FPCR(%a6),%d1 | ||
88 | andl #0xFF,%d1 | discard all but rounding mode/prec | ||
89 | fmovel #0,%fpcr | ||
90 | jmp (%a1) | ||
91 | | | ||
92 | | ERROR | ||
93 | | | ||
94 | .global serror | ||
95 | serror: | ||
96 | st STORE_FLG(%a6) | ||
97 | rts | ||
98 | | | ||
99 | | These routines load forced values into fp0. They are called | ||
100 | | by index into tbldo. | ||
101 | | | ||
102 | | Load a signed zero to fp0 and set inex2/ainex | ||
103 | | | ||
104 | .global snzrinx | ||
105 | snzrinx: | ||
106 | btstb #sign_bit,LOCAL_EX(%a0) |get sign of source operand | ||
107 | bnes ld_mzinx |if negative, branch | ||
108 | bsr ld_pzero |bsr so we can return and set inx | ||
109 | bra t_inx2 |now, set the inx for the next inst | ||
110 | ld_mzinx: | ||
111 | bsr ld_mzero |if neg, load neg zero, return here | ||
112 | bra t_inx2 |now, set the inx for the next inst | ||
113 | | | ||
114 | | Load a signed zero to fp0; do not set inex2/ainex | ||
115 | | | ||
116 | .global szero | ||
117 | szero: | ||
118 | btstb #sign_bit,LOCAL_EX(%a0) |get sign of source operand | ||
119 | bne ld_mzero |if neg, load neg zero | ||
120 | bra ld_pzero |load positive zero | ||
121 | | | ||
122 | | Load a signed infinity to fp0; do not set inex2/ainex | ||
123 | | | ||
124 | .global sinf | ||
125 | sinf: | ||
126 | btstb #sign_bit,LOCAL_EX(%a0) |get sign of source operand | ||
127 | bne ld_minf |if negative branch | ||
128 | bra ld_pinf | ||
129 | | | ||
130 | | Load a signed one to fp0; do not set inex2/ainex | ||
131 | | | ||
132 | .global sone | ||
133 | sone: | ||
134 | btstb #sign_bit,LOCAL_EX(%a0) |check sign of source | ||
135 | bne ld_mone | ||
136 | bra ld_pone | ||
137 | | | ||
138 | | Load a signed pi/2 to fp0; do not set inex2/ainex | ||
139 | | | ||
140 | .global spi_2 | ||
141 | spi_2: | ||
142 | btstb #sign_bit,LOCAL_EX(%a0) |check sign of source | ||
143 | bne ld_mpi2 | ||
144 | bra ld_ppi2 | ||
145 | | | ||
146 | | Load either a +0 or +inf for plus/minus operand | ||
147 | | | ||
148 | .global szr_inf | ||
149 | szr_inf: | ||
150 | btstb #sign_bit,LOCAL_EX(%a0) |check sign of source | ||
151 | bne ld_pzero | ||
152 | bra ld_pinf | ||
153 | | | ||
154 | | Result is either an operr or +inf for plus/minus operand | ||
155 | | [Used by slogn, slognp1, slog10, and slog2] | ||
156 | | | ||
157 | .global sopr_inf | ||
158 | sopr_inf: | ||
159 | btstb #sign_bit,LOCAL_EX(%a0) |check sign of source | ||
160 | bne t_operr | ||
161 | bra ld_pinf | ||
162 | | | ||
163 | | FLOGNP1 | ||
164 | | | ||
165 | .global sslognp1 | ||
166 | sslognp1: | ||
167 | fmovemx (%a0),%fp0-%fp0 | ||
168 | fcmpb #-1,%fp0 | ||
169 | fbgt slognp1 | ||
170 | fbeq t_dz2 |if = -1, divide by zero exception | ||
171 | fmovel #0,%FPSR |clr N flag | ||
172 | bra t_operr |take care of operands < -1 | ||
173 | | | ||
174 | | FETOXM1 | ||
175 | | | ||
176 | .global setoxm1i | ||
177 | setoxm1i: | ||
178 | btstb #sign_bit,LOCAL_EX(%a0) |check sign of source | ||
179 | bne ld_mone | ||
180 | bra ld_pinf | ||
181 | | | ||
182 | | FLOGN | ||
183 | | | ||
184 | | Test for 1.0 as an input argument, returning +zero. Also check | ||
185 | | the sign and return operr if negative. | ||
186 | | | ||
187 | .global sslogn | ||
188 | sslogn: | ||
189 | btstb #sign_bit,LOCAL_EX(%a0) | ||
190 | bne t_operr |take care of operands < 0 | ||
191 | cmpiw #0x3fff,LOCAL_EX(%a0) |test for 1.0 input | ||
192 | bne slogn | ||
193 | cmpil #0x80000000,LOCAL_HI(%a0) | ||
194 | bne slogn | ||
195 | tstl LOCAL_LO(%a0) | ||
196 | bne slogn | ||
197 | fmovex PZERO,%fp0 | ||
198 | rts | ||
199 | |||
200 | .global sslognd | ||
201 | sslognd: | ||
202 | btstb #sign_bit,LOCAL_EX(%a0) | ||
203 | beq slognd | ||
204 | bra t_operr |take care of operands < 0 | ||
205 | |||
206 | | | ||
207 | | FLOG10 | ||
208 | | | ||
209 | .global sslog10 | ||
210 | sslog10: | ||
211 | btstb #sign_bit,LOCAL_EX(%a0) | ||
212 | bne t_operr |take care of operands < 0 | ||
213 | cmpiw #0x3fff,LOCAL_EX(%a0) |test for 1.0 input | ||
214 | bne slog10 | ||
215 | cmpil #0x80000000,LOCAL_HI(%a0) | ||
216 | bne slog10 | ||
217 | tstl LOCAL_LO(%a0) | ||
218 | bne slog10 | ||
219 | fmovex PZERO,%fp0 | ||
220 | rts | ||
221 | |||
222 | .global sslog10d | ||
223 | sslog10d: | ||
224 | btstb #sign_bit,LOCAL_EX(%a0) | ||
225 | beq slog10d | ||
226 | bra t_operr |take care of operands < 0 | ||
227 | |||
228 | | | ||
229 | | FLOG2 | ||
230 | | | ||
231 | .global sslog2 | ||
232 | sslog2: | ||
233 | btstb #sign_bit,LOCAL_EX(%a0) | ||
234 | bne t_operr |take care of operands < 0 | ||
235 | cmpiw #0x3fff,LOCAL_EX(%a0) |test for 1.0 input | ||
236 | bne slog2 | ||
237 | cmpil #0x80000000,LOCAL_HI(%a0) | ||
238 | bne slog2 | ||
239 | tstl LOCAL_LO(%a0) | ||
240 | bne slog2 | ||
241 | fmovex PZERO,%fp0 | ||
242 | rts | ||
243 | |||
244 | .global sslog2d | ||
245 | sslog2d: | ||
246 | btstb #sign_bit,LOCAL_EX(%a0) | ||
247 | beq slog2d | ||
248 | bra t_operr |take care of operands < 0 | ||
249 | |||
250 | | | ||
251 | | FMOD | ||
252 | | | ||
253 | pmodt: | ||
254 | | ;$21 fmod | ||
255 | | ;dtag,stag | ||
256 | .long smod | 00,00 norm,norm = normal | ||
257 | .long smod_oper | 00,01 norm,zero = nan with operr | ||
258 | .long smod_fpn | 00,10 norm,inf = fpn | ||
259 | .long smod_snan | 00,11 norm,nan = nan | ||
260 | .long smod_zro | 01,00 zero,norm = +-zero | ||
261 | .long smod_oper | 01,01 zero,zero = nan with operr | ||
262 | .long smod_zro | 01,10 zero,inf = +-zero | ||
263 | .long smod_snan | 01,11 zero,nan = nan | ||
264 | .long smod_oper | 10,00 inf,norm = nan with operr | ||
265 | .long smod_oper | 10,01 inf,zero = nan with operr | ||
266 | .long smod_oper | 10,10 inf,inf = nan with operr | ||
267 | .long smod_snan | 10,11 inf,nan = nan | ||
268 | .long smod_dnan | 11,00 nan,norm = nan | ||
269 | .long smod_dnan | 11,01 nan,zero = nan | ||
270 | .long smod_dnan | 11,10 nan,inf = nan | ||
271 | .long smod_dnan | 11,11 nan,nan = nan | ||
272 | |||
273 | .global pmod | ||
274 | pmod: | ||
275 | clrb FPSR_QBYTE(%a6) | clear quotient field | ||
276 | bfextu STAG(%a6){#0:#3},%d0 |stag = d0 | ||
277 | bfextu DTAG(%a6){#0:#3},%d1 |dtag = d1 | ||
278 | |||
279 | | | ||
280 | | Alias extended denorms to norms for the jump table. | ||
281 | | | ||
282 | bclrl #2,%d0 | ||
283 | bclrl #2,%d1 | ||
284 | |||
285 | lslb #2,%d1 | ||
286 | orb %d0,%d1 |d1{3:2} = dtag, d1{1:0} = stag | ||
287 | | ;Tag values: | ||
288 | | ;00 = norm or denorm | ||
289 | | ;01 = zero | ||
290 | | ;10 = inf | ||
291 | | ;11 = nan | ||
292 | lea pmodt,%a1 | ||
293 | movel (%a1,%d1.w*4),%a1 | ||
294 | jmp (%a1) | ||
295 | |||
296 | smod_snan: | ||
297 | bra src_nan | ||
298 | smod_dnan: | ||
299 | bra dst_nan | ||
300 | smod_oper: | ||
301 | bra t_operr | ||
302 | smod_zro: | ||
303 | moveb ETEMP(%a6),%d1 |get sign of src op | ||
304 | moveb FPTEMP(%a6),%d0 |get sign of dst op | ||
305 | eorb %d0,%d1 |get exor of sign bits | ||
306 | btstl #7,%d1 |test for sign | ||
307 | beqs smod_zsn |if clr, do not set sign big | ||
308 | bsetb #q_sn_bit,FPSR_QBYTE(%a6) |set q-byte sign bit | ||
309 | smod_zsn: | ||
310 | btstl #7,%d0 |test if + or - | ||
311 | beq ld_pzero |if pos then load +0 | ||
312 | bra ld_mzero |else neg load -0 | ||
313 | |||
314 | smod_fpn: | ||
315 | moveb ETEMP(%a6),%d1 |get sign of src op | ||
316 | moveb FPTEMP(%a6),%d0 |get sign of dst op | ||
317 | eorb %d0,%d1 |get exor of sign bits | ||
318 | btstl #7,%d1 |test for sign | ||
319 | beqs smod_fsn |if clr, do not set sign big | ||
320 | bsetb #q_sn_bit,FPSR_QBYTE(%a6) |set q-byte sign bit | ||
321 | smod_fsn: | ||
322 | tstb DTAG(%a6) |filter out denormal destination case | ||
323 | bpls smod_nrm | | ||
324 | leal FPTEMP(%a6),%a0 |a0<- addr(FPTEMP) | ||
325 | bra t_resdnrm |force UNFL(but exact) result | ||
326 | smod_nrm: | ||
327 | fmovel USER_FPCR(%a6),%fpcr |use user's rmode and precision | ||
328 | fmovex FPTEMP(%a6),%fp0 |return dest to fp0 | ||
329 | rts | ||
330 | |||
331 | | | ||
332 | | FREM | ||
333 | | | ||
334 | premt: | ||
335 | | ;$25 frem | ||
336 | | ;dtag,stag | ||
337 | .long srem | 00,00 norm,norm = normal | ||
338 | .long srem_oper | 00,01 norm,zero = nan with operr | ||
339 | .long srem_fpn | 00,10 norm,inf = fpn | ||
340 | .long srem_snan | 00,11 norm,nan = nan | ||
341 | .long srem_zro | 01,00 zero,norm = +-zero | ||
342 | .long srem_oper | 01,01 zero,zero = nan with operr | ||
343 | .long srem_zro | 01,10 zero,inf = +-zero | ||
344 | .long srem_snan | 01,11 zero,nan = nan | ||
345 | .long srem_oper | 10,00 inf,norm = nan with operr | ||
346 | .long srem_oper | 10,01 inf,zero = nan with operr | ||
347 | .long srem_oper | 10,10 inf,inf = nan with operr | ||
348 | .long srem_snan | 10,11 inf,nan = nan | ||
349 | .long srem_dnan | 11,00 nan,norm = nan | ||
350 | .long srem_dnan | 11,01 nan,zero = nan | ||
351 | .long srem_dnan | 11,10 nan,inf = nan | ||
352 | .long srem_dnan | 11,11 nan,nan = nan | ||
353 | |||
354 | .global prem | ||
355 | prem: | ||
356 | clrb FPSR_QBYTE(%a6) |clear quotient field | ||
357 | bfextu STAG(%a6){#0:#3},%d0 |stag = d0 | ||
358 | bfextu DTAG(%a6){#0:#3},%d1 |dtag = d1 | ||
359 | | | ||
360 | | Alias extended denorms to norms for the jump table. | ||
361 | | | ||
362 | bclr #2,%d0 | ||
363 | bclr #2,%d1 | ||
364 | |||
365 | lslb #2,%d1 | ||
366 | orb %d0,%d1 |d1{3:2} = dtag, d1{1:0} = stag | ||
367 | | ;Tag values: | ||
368 | | ;00 = norm or denorm | ||
369 | | ;01 = zero | ||
370 | | ;10 = inf | ||
371 | | ;11 = nan | ||
372 | lea premt,%a1 | ||
373 | movel (%a1,%d1.w*4),%a1 | ||
374 | jmp (%a1) | ||
375 | |||
376 | srem_snan: | ||
377 | bra src_nan | ||
378 | srem_dnan: | ||
379 | bra dst_nan | ||
380 | srem_oper: | ||
381 | bra t_operr | ||
382 | srem_zro: | ||
383 | moveb ETEMP(%a6),%d1 |get sign of src op | ||
384 | moveb FPTEMP(%a6),%d0 |get sign of dst op | ||
385 | eorb %d0,%d1 |get exor of sign bits | ||
386 | btstl #7,%d1 |test for sign | ||
387 | beqs srem_zsn |if clr, do not set sign big | ||
388 | bsetb #q_sn_bit,FPSR_QBYTE(%a6) |set q-byte sign bit | ||
389 | srem_zsn: | ||
390 | btstl #7,%d0 |test if + or - | ||
391 | beq ld_pzero |if pos then load +0 | ||
392 | bra ld_mzero |else neg load -0 | ||
393 | |||
394 | srem_fpn: | ||
395 | moveb ETEMP(%a6),%d1 |get sign of src op | ||
396 | moveb FPTEMP(%a6),%d0 |get sign of dst op | ||
397 | eorb %d0,%d1 |get exor of sign bits | ||
398 | btstl #7,%d1 |test for sign | ||
399 | beqs srem_fsn |if clr, do not set sign big | ||
400 | bsetb #q_sn_bit,FPSR_QBYTE(%a6) |set q-byte sign bit | ||
401 | srem_fsn: | ||
402 | tstb DTAG(%a6) |filter out denormal destination case | ||
403 | bpls srem_nrm | | ||
404 | leal FPTEMP(%a6),%a0 |a0<- addr(FPTEMP) | ||
405 | bra t_resdnrm |force UNFL(but exact) result | ||
406 | srem_nrm: | ||
407 | fmovel USER_FPCR(%a6),%fpcr |use user's rmode and precision | ||
408 | fmovex FPTEMP(%a6),%fp0 |return dest to fp0 | ||
409 | rts | ||
410 | | | ||
411 | | FSCALE | ||
412 | | | ||
413 | pscalet: | ||
414 | | ;$26 fscale | ||
415 | | ;dtag,stag | ||
416 | .long sscale | 00,00 norm,norm = result | ||
417 | .long sscale | 00,01 norm,zero = fpn | ||
418 | .long scl_opr | 00,10 norm,inf = nan with operr | ||
419 | .long scl_snan | 00,11 norm,nan = nan | ||
420 | .long scl_zro | 01,00 zero,norm = +-zero | ||
421 | .long scl_zro | 01,01 zero,zero = +-zero | ||
422 | .long scl_opr | 01,10 zero,inf = nan with operr | ||
423 | .long scl_snan | 01,11 zero,nan = nan | ||
424 | .long scl_inf | 10,00 inf,norm = +-inf | ||
425 | .long scl_inf | 10,01 inf,zero = +-inf | ||
426 | .long scl_opr | 10,10 inf,inf = nan with operr | ||
427 | .long scl_snan | 10,11 inf,nan = nan | ||
428 | .long scl_dnan | 11,00 nan,norm = nan | ||
429 | .long scl_dnan | 11,01 nan,zero = nan | ||
430 | .long scl_dnan | 11,10 nan,inf = nan | ||
431 | .long scl_dnan | 11,11 nan,nan = nan | ||
432 | |||
433 | .global pscale | ||
434 | pscale: | ||
435 | bfextu STAG(%a6){#0:#3},%d0 |stag in d0 | ||
436 | bfextu DTAG(%a6){#0:#3},%d1 |dtag in d1 | ||
437 | bclrl #2,%d0 |alias denorm into norm | ||
438 | bclrl #2,%d1 |alias denorm into norm | ||
439 | lslb #2,%d1 | ||
440 | orb %d0,%d1 |d1{4:2} = dtag, d1{1:0} = stag | ||
441 | | ;dtag values stag values: | ||
442 | | ;000 = norm 00 = norm | ||
443 | | ;001 = zero 01 = zero | ||
444 | | ;010 = inf 10 = inf | ||
445 | | ;011 = nan 11 = nan | ||
446 | | ;100 = dnrm | ||
447 | | | ||
448 | | | ||
449 | leal pscalet,%a1 |load start of jump table | ||
450 | movel (%a1,%d1.w*4),%a1 |load a1 with label depending on tag | ||
451 | jmp (%a1) |go to the routine | ||
452 | |||
453 | scl_opr: | ||
454 | bra t_operr | ||
455 | |||
456 | scl_dnan: | ||
457 | bra dst_nan | ||
458 | |||
459 | scl_zro: | ||
460 | btstb #sign_bit,FPTEMP_EX(%a6) |test if + or - | ||
461 | beq ld_pzero |if pos then load +0 | ||
462 | bra ld_mzero |if neg then load -0 | ||
463 | scl_inf: | ||
464 | btstb #sign_bit,FPTEMP_EX(%a6) |test if + or - | ||
465 | beq ld_pinf |if pos then load +inf | ||
466 | bra ld_minf |else neg load -inf | ||
467 | scl_snan: | ||
468 | bra src_nan | ||
469 | | | ||
470 | | FSINCOS | ||
471 | | | ||
472 | .global ssincosz | ||
473 | ssincosz: | ||
474 | btstb #sign_bit,ETEMP(%a6) |get sign | ||
475 | beqs sincosp | ||
476 | fmovex MZERO,%fp0 | ||
477 | bras sincoscom | ||
478 | sincosp: | ||
479 | fmovex PZERO,%fp0 | ||
480 | sincoscom: | ||
481 | fmovemx PONE,%fp1-%fp1 |do not allow FPSR to be affected | ||
482 | bra sto_cos |store cosine result | ||
483 | |||
484 | .global ssincosi | ||
485 | ssincosi: | ||
486 | fmovex QNAN,%fp1 |load NAN | ||
487 | bsr sto_cos |store cosine result | ||
488 | fmovex QNAN,%fp0 |load NAN | ||
489 | bra t_operr | ||
490 | |||
491 | .global ssincosnan | ||
492 | ssincosnan: | ||
493 | movel ETEMP_EX(%a6),FP_SCR1(%a6) | ||
494 | movel ETEMP_HI(%a6),FP_SCR1+4(%a6) | ||
495 | movel ETEMP_LO(%a6),FP_SCR1+8(%a6) | ||
496 | bsetb #signan_bit,FP_SCR1+4(%a6) | ||
497 | fmovemx FP_SCR1(%a6),%fp1-%fp1 | ||
498 | bsr sto_cos | ||
499 | bra src_nan | ||
500 | | | ||
501 | | This code forces default values for the zero, inf, and nan cases | ||
502 | | in the transcendentals code. The CC bits must be set in the | ||
503 | | stacked FPSR to be correctly reported. | ||
504 | | | ||
505 | |**Returns +PI/2 | ||
506 | .global ld_ppi2 | ||
507 | ld_ppi2: | ||
508 | fmovex PPIBY2,%fp0 |load +pi/2 | ||
509 | bra t_inx2 |set inex2 exc | ||
510 | |||
511 | |**Returns -PI/2 | ||
512 | .global ld_mpi2 | ||
513 | ld_mpi2: | ||
514 | fmovex MPIBY2,%fp0 |load -pi/2 | ||
515 | orl #neg_mask,USER_FPSR(%a6) |set N bit | ||
516 | bra t_inx2 |set inex2 exc | ||
517 | |||
518 | |**Returns +inf | ||
519 | .global ld_pinf | ||
520 | ld_pinf: | ||
521 | fmovex PINF,%fp0 |load +inf | ||
522 | orl #inf_mask,USER_FPSR(%a6) |set I bit | ||
523 | rts | ||
524 | |||
525 | |**Returns -inf | ||
526 | .global ld_minf | ||
527 | ld_minf: | ||
528 | fmovex MINF,%fp0 |load -inf | ||
529 | orl #neg_mask+inf_mask,USER_FPSR(%a6) |set N and I bits | ||
530 | rts | ||
531 | |||
532 | |**Returns +1 | ||
533 | .global ld_pone | ||
534 | ld_pone: | ||
535 | fmovex PONE,%fp0 |load +1 | ||
536 | rts | ||
537 | |||
538 | |**Returns -1 | ||
539 | .global ld_mone | ||
540 | ld_mone: | ||
541 | fmovex MONE,%fp0 |load -1 | ||
542 | orl #neg_mask,USER_FPSR(%a6) |set N bit | ||
543 | rts | ||
544 | |||
545 | |**Returns +0 | ||
546 | .global ld_pzero | ||
547 | ld_pzero: | ||
548 | fmovex PZERO,%fp0 |load +0 | ||
549 | orl #z_mask,USER_FPSR(%a6) |set Z bit | ||
550 | rts | ||
551 | |||
552 | |**Returns -0 | ||
553 | .global ld_mzero | ||
554 | ld_mzero: | ||
555 | fmovex MZERO,%fp0 |load -0 | ||
556 | orl #neg_mask+z_mask,USER_FPSR(%a6) |set N and Z bits | ||
557 | rts | ||
558 | |||
559 | |end | ||
diff --git a/arch/m68k/fpsp040/fpsp.h b/arch/m68k/fpsp040/fpsp.h new file mode 100644 index 000000000000..984a4eb8010a --- /dev/null +++ b/arch/m68k/fpsp040/fpsp.h | |||
@@ -0,0 +1,348 @@ | |||
1 | | | ||
2 | | fpsp.h 3.3 3.3 | ||
3 | | | ||
4 | |||
5 | | Copyright (C) Motorola, Inc. 1990 | ||
6 | | All Rights Reserved | ||
7 | | | ||
8 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
9 | | The copyright notice above does not evidence any | ||
10 | | actual or intended publication of such source code. | ||
11 | |||
12 | | fpsp.h --- stack frame offsets during FPSP exception handling | ||
13 | | | ||
14 | | These equates are used to access the exception frame, the fsave | ||
15 | | frame and any local variables needed by the FPSP package. | ||
16 | | | ||
17 | | All FPSP handlers begin by executing: | ||
18 | | | ||
19 | | link a6,#-LOCAL_SIZE | ||
20 | | fsave -(a7) | ||
21 | | movem.l d0-d1/a0-a1,USER_DA(a6) | ||
22 | | fmovem.x fp0-fp3,USER_FP0(a6) | ||
23 | | fmove.l fpsr/fpcr/fpiar,USER_FPSR(a6) | ||
24 | | | ||
25 | | After initialization, the stack looks like this: | ||
26 | | | ||
27 | | A7 ---> +-------------------------------+ | ||
28 | | | | | ||
29 | | | FPU fsave area | | ||
30 | | | | | ||
31 | | +-------------------------------+ | ||
32 | | | | | ||
33 | | | FPSP Local Variables | | ||
34 | | | including | | ||
35 | | | saved registers | | ||
36 | | | | | ||
37 | | +-------------------------------+ | ||
38 | | A6 ---> | Saved A6 | | ||
39 | | +-------------------------------+ | ||
40 | | | | | ||
41 | | | Exception Frame | | ||
42 | | | | | ||
43 | | | | | ||
44 | | | ||
45 | | Positive offsets from A6 refer to the exception frame. Negative | ||
46 | | offsets refer to the Local Variable area and the fsave area. | ||
47 | | The fsave frame is also accessible from the top via A7. | ||
48 | | | ||
49 | | On exit, the handlers execute: | ||
50 | | | ||
51 | | movem.l USER_DA(a6),d0-d1/a0-a1 | ||
52 | | fmovem.x USER_FP0(a6),fp0-fp3 | ||
53 | | fmove.l USER_FPSR(a6),fpsr/fpcr/fpiar | ||
54 | | frestore (a7)+ | ||
55 | | unlk a6 | ||
56 | | | ||
57 | | and then either "bra fpsp_done" if the exception was completely | ||
58 | | handled by the package, or "bra real_xxxx" which is an external | ||
59 | | label to a routine that will process a real exception of the | ||
60 | | type that was generated. Some handlers may omit the "frestore" | ||
61 | | if the FPU state after the exception is idle. | ||
62 | | | ||
63 | | Sometimes the exception handler will transform the fsave area | ||
64 | | because it needs to report an exception back to the user. This | ||
65 | | can happen if the package is entered for an unimplemented float | ||
66 | | instruction that generates (say) an underflow. Alternatively, | ||
67 | | a second fsave frame can be pushed onto the stack and the | ||
68 | | handler exit code will reload the new frame and discard the old. | ||
69 | | | ||
70 | | The registers d0, d1, a0, a1 and fp0-fp3 are always saved and | ||
71 | | restored from the "local variable" area and can be used as | ||
72 | | temporaries. If a routine needs to change any | ||
73 | | of these registers, it should modify the saved copy and let | ||
74 | | the handler exit code restore the value. | ||
75 | | | ||
76 | |---------------------------------------------------------------------- | ||
77 | | | ||
78 | | Local Variables on the stack | ||
79 | | | ||
80 | .set LOCAL_SIZE,192 | bytes needed for local variables | ||
81 | .set LV,-LOCAL_SIZE | convenient base value | ||
82 | | | ||
83 | .set USER_DA,LV+0 | save space for D0-D1,A0-A1 | ||
84 | .set USER_D0,LV+0 | saved user D0 | ||
85 | .set USER_D1,LV+4 | saved user D1 | ||
86 | .set USER_A0,LV+8 | saved user A0 | ||
87 | .set USER_A1,LV+12 | saved user A1 | ||
88 | .set USER_FP0,LV+16 | saved user FP0 | ||
89 | .set USER_FP1,LV+28 | saved user FP1 | ||
90 | .set USER_FP2,LV+40 | saved user FP2 | ||
91 | .set USER_FP3,LV+52 | saved user FP3 | ||
92 | .set USER_FPCR,LV+64 | saved user FPCR | ||
93 | .set FPCR_ENABLE,USER_FPCR+2 | FPCR exception enable | ||
94 | .set FPCR_MODE,USER_FPCR+3 | FPCR rounding mode control | ||
95 | .set USER_FPSR,LV+68 | saved user FPSR | ||
96 | .set FPSR_CC,USER_FPSR+0 | FPSR condition code | ||
97 | .set FPSR_QBYTE,USER_FPSR+1 | FPSR quotient | ||
98 | .set FPSR_EXCEPT,USER_FPSR+2 | FPSR exception | ||
99 | .set FPSR_AEXCEPT,USER_FPSR+3 | FPSR accrued exception | ||
100 | .set USER_FPIAR,LV+72 | saved user FPIAR | ||
101 | .set FP_SCR1,LV+76 | room for a temporary float value | ||
102 | .set FP_SCR2,LV+92 | room for a temporary float value | ||
103 | .set L_SCR1,LV+108 | room for a temporary long value | ||
104 | .set L_SCR2,LV+112 | room for a temporary long value | ||
105 | .set STORE_FLG,LV+116 | ||
106 | .set BINDEC_FLG,LV+117 | used in bindec | ||
107 | .set DNRM_FLG,LV+118 | used in res_func | ||
108 | .set RES_FLG,LV+119 | used in res_func | ||
109 | .set DY_MO_FLG,LV+120 | dyadic/monadic flag | ||
110 | .set UFLG_TMP,LV+121 | temporary for uflag errata | ||
111 | .set CU_ONLY,LV+122 | cu-only flag | ||
112 | .set VER_TMP,LV+123 | temp holding for version number | ||
113 | .set L_SCR3,LV+124 | room for a temporary long value | ||
114 | .set FP_SCR3,LV+128 | room for a temporary float value | ||
115 | .set FP_SCR4,LV+144 | room for a temporary float value | ||
116 | .set FP_SCR5,LV+160 | room for a temporary float value | ||
117 | .set FP_SCR6,LV+176 | ||
118 | | | ||
119 | |NEXT equ LV+192 ;need to increase LOCAL_SIZE | ||
120 | | | ||
121 | |-------------------------------------------------------------------------- | ||
122 | | | ||
123 | | fsave offsets and bit definitions | ||
124 | | | ||
125 | | Offsets are defined from the end of an fsave because the last 10 | ||
126 | | words of a busy frame are the same as the unimplemented frame. | ||
127 | | | ||
128 | .set CU_SAVEPC,LV-92 | micro-pc for CU (1 byte) | ||
129 | .set FPR_DIRTY_BITS,LV-91 | fpr dirty bits | ||
130 | | | ||
131 | .set WBTEMP,LV-76 | write back temp (12 bytes) | ||
132 | .set WBTEMP_EX,WBTEMP | wbtemp sign and exponent (2 bytes) | ||
133 | .set WBTEMP_HI,WBTEMP+4 | wbtemp mantissa [63:32] (4 bytes) | ||
134 | .set WBTEMP_LO,WBTEMP+8 | wbtemp mantissa [31:00] (4 bytes) | ||
135 | | | ||
136 | .set WBTEMP_SGN,WBTEMP+2 | used to store sign | ||
137 | | | ||
138 | .set FPSR_SHADOW,LV-64 | fpsr shadow reg | ||
139 | | | ||
140 | .set FPIARCU,LV-60 | Instr. addr. reg. for CU (4 bytes) | ||
141 | | | ||
142 | .set CMDREG2B,LV-52 | cmd reg for machine 2 | ||
143 | .set CMDREG3B,LV-48 | cmd reg for E3 exceptions (2 bytes) | ||
144 | | | ||
145 | .set NMNEXC,LV-44 | NMNEXC (unsup,snan bits only) | ||
146 | .set nmn_unsup_bit,1 | | ||
147 | .set nmn_snan_bit,0 | | ||
148 | | | ||
149 | .set NMCEXC,LV-43 | NMNEXC & NMCEXC | ||
150 | .set nmn_operr_bit,7 | ||
151 | .set nmn_ovfl_bit,6 | ||
152 | .set nmn_unfl_bit,5 | ||
153 | .set nmc_unsup_bit,4 | ||
154 | .set nmc_snan_bit,3 | ||
155 | .set nmc_operr_bit,2 | ||
156 | .set nmc_ovfl_bit,1 | ||
157 | .set nmc_unfl_bit,0 | ||
158 | | | ||
159 | .set STAG,LV-40 | source tag (1 byte) | ||
160 | .set WBTEMP_GRS,LV-40 | alias wbtemp guard, round, sticky | ||
161 | .set guard_bit,1 | guard bit is bit number 1 | ||
162 | .set round_bit,0 | round bit is bit number 0 | ||
163 | .set stag_mask,0xE0 | upper 3 bits are source tag type | ||
164 | .set denorm_bit,7 | bit determines if denorm or unnorm | ||
165 | .set etemp15_bit,4 | etemp exponent bit #15 | ||
166 | .set wbtemp66_bit,2 | wbtemp mantissa bit #66 | ||
167 | .set wbtemp1_bit,1 | wbtemp mantissa bit #1 | ||
168 | .set wbtemp0_bit,0 | wbtemp mantissa bit #0 | ||
169 | | | ||
170 | .set STICKY,LV-39 | holds sticky bit | ||
171 | .set sticky_bit,7 | ||
172 | | | ||
173 | .set CMDREG1B,LV-36 | cmd reg for E1 exceptions (2 bytes) | ||
174 | .set kfact_bit,12 | distinguishes static/dynamic k-factor | ||
175 | | ;on packed move outs. NOTE: this | ||
176 | | ;equate only works when CMDREG1B is in | ||
177 | | ;a register. | ||
178 | | | ||
179 | .set CMDWORD,LV-35 | command word in cmd1b | ||
180 | .set direction_bit,5 | bit 0 in opclass | ||
181 | .set size_bit2,12 | bit 2 in size field | ||
182 | | | ||
183 | .set DTAG,LV-32 | dest tag (1 byte) | ||
184 | .set dtag_mask,0xE0 | upper 3 bits are dest type tag | ||
185 | .set fptemp15_bit,4 | fptemp exponent bit #15 | ||
186 | | | ||
187 | .set WB_BYTE,LV-31 | holds WBTE15 bit (1 byte) | ||
188 | .set wbtemp15_bit,4 | wbtemp exponent bit #15 | ||
189 | | | ||
190 | .set E_BYTE,LV-28 | holds E1 and E3 bits (1 byte) | ||
191 | .set E1,2 | which bit is E1 flag | ||
192 | .set E3,1 | which bit is E3 flag | ||
193 | .set SFLAG,0 | which bit is S flag | ||
194 | | | ||
195 | .set T_BYTE,LV-27 | holds T and U bits (1 byte) | ||
196 | .set XFLAG,7 | which bit is X flag | ||
197 | .set UFLAG,5 | which bit is U flag | ||
198 | .set TFLAG,4 | which bit is T flag | ||
199 | | | ||
200 | .set FPTEMP,LV-24 | fptemp (12 bytes) | ||
201 | .set FPTEMP_EX,FPTEMP | fptemp sign and exponent (2 bytes) | ||
202 | .set FPTEMP_HI,FPTEMP+4 | fptemp mantissa [63:32] (4 bytes) | ||
203 | .set FPTEMP_LO,FPTEMP+8 | fptemp mantissa [31:00] (4 bytes) | ||
204 | | | ||
205 | .set FPTEMP_SGN,FPTEMP+2 | used to store sign | ||
206 | | | ||
207 | .set ETEMP,LV-12 | etemp (12 bytes) | ||
208 | .set ETEMP_EX,ETEMP | etemp sign and exponent (2 bytes) | ||
209 | .set ETEMP_HI,ETEMP+4 | etemp mantissa [63:32] (4 bytes) | ||
210 | .set ETEMP_LO,ETEMP+8 | etemp mantissa [31:00] (4 bytes) | ||
211 | | | ||
212 | .set ETEMP_SGN,ETEMP+2 | used to store sign | ||
213 | | | ||
214 | .set EXC_SR,4 | exception frame status register | ||
215 | .set EXC_PC,6 | exception frame program counter | ||
216 | .set EXC_VEC,10 | exception frame vector (format+vector#) | ||
217 | .set EXC_EA,12 | exception frame effective address | ||
218 | | | ||
219 | |-------------------------------------------------------------------------- | ||
220 | | | ||
221 | | FPSR/FPCR bits | ||
222 | | | ||
223 | .set neg_bit,3 | negative result | ||
224 | .set z_bit,2 | zero result | ||
225 | .set inf_bit,1 | infinity result | ||
226 | .set nan_bit,0 | not-a-number result | ||
227 | | | ||
228 | .set q_sn_bit,7 | sign bit of quotient byte | ||
229 | | | ||
230 | .set bsun_bit,7 | branch on unordered | ||
231 | .set snan_bit,6 | signalling nan | ||
232 | .set operr_bit,5 | operand error | ||
233 | .set ovfl_bit,4 | overflow | ||
234 | .set unfl_bit,3 | underflow | ||
235 | .set dz_bit,2 | divide by zero | ||
236 | .set inex2_bit,1 | inexact result 2 | ||
237 | .set inex1_bit,0 | inexact result 1 | ||
238 | | | ||
239 | .set aiop_bit,7 | accrued illegal operation | ||
240 | .set aovfl_bit,6 | accrued overflow | ||
241 | .set aunfl_bit,5 | accrued underflow | ||
242 | .set adz_bit,4 | accrued divide by zero | ||
243 | .set ainex_bit,3 | accrued inexact | ||
244 | | | ||
245 | | FPSR individual bit masks | ||
246 | | | ||
247 | .set neg_mask,0x08000000 | ||
248 | .set z_mask,0x04000000 | ||
249 | .set inf_mask,0x02000000 | ||
250 | .set nan_mask,0x01000000 | ||
251 | | | ||
252 | .set bsun_mask,0x00008000 | | ||
253 | .set snan_mask,0x00004000 | ||
254 | .set operr_mask,0x00002000 | ||
255 | .set ovfl_mask,0x00001000 | ||
256 | .set unfl_mask,0x00000800 | ||
257 | .set dz_mask,0x00000400 | ||
258 | .set inex2_mask,0x00000200 | ||
259 | .set inex1_mask,0x00000100 | ||
260 | | | ||
261 | .set aiop_mask,0x00000080 | accrued illegal operation | ||
262 | .set aovfl_mask,0x00000040 | accrued overflow | ||
263 | .set aunfl_mask,0x00000020 | accrued underflow | ||
264 | .set adz_mask,0x00000010 | accrued divide by zero | ||
265 | .set ainex_mask,0x00000008 | accrued inexact | ||
266 | | | ||
267 | | FPSR combinations used in the FPSP | ||
268 | | | ||
269 | .set dzinf_mask,inf_mask+dz_mask+adz_mask | ||
270 | .set opnan_mask,nan_mask+operr_mask+aiop_mask | ||
271 | .set nzi_mask,0x01ffffff | clears N, Z, and I | ||
272 | .set unfinx_mask,unfl_mask+inex2_mask+aunfl_mask+ainex_mask | ||
273 | .set unf2inx_mask,unfl_mask+inex2_mask+ainex_mask | ||
274 | .set ovfinx_mask,ovfl_mask+inex2_mask+aovfl_mask+ainex_mask | ||
275 | .set inx1a_mask,inex1_mask+ainex_mask | ||
276 | .set inx2a_mask,inex2_mask+ainex_mask | ||
277 | .set snaniop_mask,nan_mask+snan_mask+aiop_mask | ||
278 | .set naniop_mask,nan_mask+aiop_mask | ||
279 | .set neginf_mask,neg_mask+inf_mask | ||
280 | .set infaiop_mask,inf_mask+aiop_mask | ||
281 | .set negz_mask,neg_mask+z_mask | ||
282 | .set opaop_mask,operr_mask+aiop_mask | ||
283 | .set unfl_inx_mask,unfl_mask+aunfl_mask+ainex_mask | ||
284 | .set ovfl_inx_mask,ovfl_mask+aovfl_mask+ainex_mask | ||
285 | | | ||
286 | |-------------------------------------------------------------------------- | ||
287 | | | ||
288 | | FPCR rounding modes | ||
289 | | | ||
290 | .set x_mode,0x00 | round to extended | ||
291 | .set s_mode,0x40 | round to single | ||
292 | .set d_mode,0x80 | round to double | ||
293 | | | ||
294 | .set rn_mode,0x00 | round nearest | ||
295 | .set rz_mode,0x10 | round to zero | ||
296 | .set rm_mode,0x20 | round to minus infinity | ||
297 | .set rp_mode,0x30 | round to plus infinity | ||
298 | | | ||
299 | |-------------------------------------------------------------------------- | ||
300 | | | ||
301 | | Miscellaneous equates | ||
302 | | | ||
303 | .set signan_bit,6 | signalling nan bit in mantissa | ||
304 | .set sign_bit,7 | ||
305 | | | ||
306 | .set rnd_stky_bit,29 | round/sticky bit of mantissa | ||
307 | | this can only be used if in a data register | ||
308 | .set sx_mask,0x01800000 | set s and x bits in word $48 | ||
309 | | | ||
310 | .set LOCAL_EX,0 | ||
311 | .set LOCAL_SGN,2 | ||
312 | .set LOCAL_HI,4 | ||
313 | .set LOCAL_LO,8 | ||
314 | .set LOCAL_GRS,12 | valid ONLY for FP_SCR1, FP_SCR2 | ||
315 | | | ||
316 | | | ||
317 | .set norm_tag,0x00 | tag bits in {7:5} position | ||
318 | .set zero_tag,0x20 | ||
319 | .set inf_tag,0x40 | ||
320 | .set nan_tag,0x60 | ||
321 | .set dnrm_tag,0x80 | ||
322 | | | ||
323 | | fsave sizes and formats | ||
324 | | | ||
325 | .set VER_4,0x40 | fpsp compatible version numbers | ||
326 | | are in the $40s {$40-$4f} | ||
327 | .set VER_40,0x40 | original version number | ||
328 | .set VER_41,0x41 | revision version number | ||
329 | | | ||
330 | .set BUSY_SIZE,100 | size of busy frame | ||
331 | .set BUSY_FRAME,LV-BUSY_SIZE | start of busy frame | ||
332 | | | ||
333 | .set UNIMP_40_SIZE,44 | size of orig unimp frame | ||
334 | .set UNIMP_41_SIZE,52 | size of rev unimp frame | ||
335 | | | ||
336 | .set IDLE_SIZE,4 | size of idle frame | ||
337 | .set IDLE_FRAME,LV-IDLE_SIZE | start of idle frame | ||
338 | | | ||
339 | | exception vectors | ||
340 | | | ||
341 | .set TRACE_VEC,0x2024 | trace trap | ||
342 | .set FLINE_VEC,0x002C | real F-line | ||
343 | .set UNIMP_VEC,0x202C | unimplemented | ||
344 | .set INEX_VEC,0x00C4 | ||
345 | | | ||
346 | .set dbl_thresh,0x3C01 | ||
347 | .set sgl_thresh,0x3F81 | ||
348 | | | ||
diff --git a/arch/m68k/fpsp040/gen_except.S b/arch/m68k/fpsp040/gen_except.S new file mode 100644 index 000000000000..401d06f39f73 --- /dev/null +++ b/arch/m68k/fpsp040/gen_except.S | |||
@@ -0,0 +1,468 @@ | |||
1 | | | ||
2 | | gen_except.sa 3.7 1/16/92 | ||
3 | | | ||
4 | | gen_except --- FPSP routine to detect reportable exceptions | ||
5 | | | ||
6 | | This routine compares the exception enable byte of the | ||
7 | | user_fpcr on the stack with the exception status byte | ||
8 | | of the user_fpsr. | ||
9 | | | ||
10 | | Any routine which may report an exceptions must load | ||
11 | | the stack frame in memory with the exceptional operand(s). | ||
12 | | | ||
13 | | Priority for exceptions is: | ||
14 | | | ||
15 | | Highest: bsun | ||
16 | | snan | ||
17 | | operr | ||
18 | | ovfl | ||
19 | | unfl | ||
20 | | dz | ||
21 | | inex2 | ||
22 | | Lowest: inex1 | ||
23 | | | ||
24 | | Note: The IEEE standard specifies that inex2 is to be | ||
25 | | reported if ovfl occurs and the ovfl enable bit is not | ||
26 | | set but the inex2 enable bit is. | ||
27 | | | ||
28 | | | ||
29 | | Copyright (C) Motorola, Inc. 1990 | ||
30 | | All Rights Reserved | ||
31 | | | ||
32 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
33 | | The copyright notice above does not evidence any | ||
34 | | actual or intended publication of such source code. | ||
35 | |||
36 | GEN_EXCEPT: |idnt 2,1 | Motorola 040 Floating Point Software Package | ||
37 | |||
38 | |section 8 | ||
39 | |||
40 | #include "fpsp.h" | ||
41 | |||
42 | |xref real_trace | ||
43 | |xref fpsp_done | ||
44 | |xref fpsp_fmt_error | ||
45 | |||
46 | exc_tbl: | ||
47 | .long bsun_exc | ||
48 | .long commonE1 | ||
49 | .long commonE1 | ||
50 | .long ovfl_unfl | ||
51 | .long ovfl_unfl | ||
52 | .long commonE1 | ||
53 | .long commonE3 | ||
54 | .long commonE3 | ||
55 | .long no_match | ||
56 | |||
57 | .global gen_except | ||
58 | gen_except: | ||
59 | cmpib #IDLE_SIZE-4,1(%a7) |test for idle frame | ||
60 | beq do_check |go handle idle frame | ||
61 | cmpib #UNIMP_40_SIZE-4,1(%a7) |test for orig unimp frame | ||
62 | beqs unimp_x |go handle unimp frame | ||
63 | cmpib #UNIMP_41_SIZE-4,1(%a7) |test for rev unimp frame | ||
64 | beqs unimp_x |go handle unimp frame | ||
65 | cmpib #BUSY_SIZE-4,1(%a7) |if size <> $60, fmt error | ||
66 | bnel fpsp_fmt_error | ||
67 | leal BUSY_SIZE+LOCAL_SIZE(%a7),%a1 |init a1 so fpsp.h | ||
68 | | ;equates will work | ||
69 | | Fix up the new busy frame with entries from the unimp frame | ||
70 | | | ||
71 | movel ETEMP_EX(%a6),ETEMP_EX(%a1) |copy etemp from unimp | ||
72 | movel ETEMP_HI(%a6),ETEMP_HI(%a1) |frame to busy frame | ||
73 | movel ETEMP_LO(%a6),ETEMP_LO(%a1) | ||
74 | movel CMDREG1B(%a6),CMDREG1B(%a1) |set inst in frame to unimp | ||
75 | movel CMDREG1B(%a6),%d0 |fix cmd1b to make it | ||
76 | andl #0x03c30000,%d0 |work for cmd3b | ||
77 | bfextu CMDREG1B(%a6){#13:#1},%d1 |extract bit 2 | ||
78 | lsll #5,%d1 | ||
79 | swap %d1 | ||
80 | orl %d1,%d0 |put it in the right place | ||
81 | bfextu CMDREG1B(%a6){#10:#3},%d1 |extract bit 3,4,5 | ||
82 | lsll #2,%d1 | ||
83 | swap %d1 | ||
84 | orl %d1,%d0 |put them in the right place | ||
85 | movel %d0,CMDREG3B(%a1) |in the busy frame | ||
86 | | | ||
87 | | Or in the FPSR from the emulation with the USER_FPSR on the stack. | ||
88 | | | ||
89 | fmovel %FPSR,%d0 | ||
90 | orl %d0,USER_FPSR(%a6) | ||
91 | movel USER_FPSR(%a6),FPSR_SHADOW(%a1) |set exc bits | ||
92 | orl #sx_mask,E_BYTE(%a1) | ||
93 | bra do_clean | ||
94 | |||
95 | | | ||
96 | | Frame is an unimp frame possible resulting from an fmove <ea>,fp0 | ||
97 | | that caused an exception | ||
98 | | | ||
99 | | a1 is modified to point into the new frame allowing fpsp equates | ||
100 | | to be valid. | ||
101 | | | ||
102 | unimp_x: | ||
103 | cmpib #UNIMP_40_SIZE-4,1(%a7) |test for orig unimp frame | ||
104 | bnes test_rev | ||
105 | leal UNIMP_40_SIZE+LOCAL_SIZE(%a7),%a1 | ||
106 | bras unimp_con | ||
107 | test_rev: | ||
108 | cmpib #UNIMP_41_SIZE-4,1(%a7) |test for rev unimp frame | ||
109 | bnel fpsp_fmt_error |if not $28 or $30 | ||
110 | leal UNIMP_41_SIZE+LOCAL_SIZE(%a7),%a1 | ||
111 | |||
112 | unimp_con: | ||
113 | | | ||
114 | | Fix up the new unimp frame with entries from the old unimp frame | ||
115 | | | ||
116 | movel CMDREG1B(%a6),CMDREG1B(%a1) |set inst in frame to unimp | ||
117 | | | ||
118 | | Or in the FPSR from the emulation with the USER_FPSR on the stack. | ||
119 | | | ||
120 | fmovel %FPSR,%d0 | ||
121 | orl %d0,USER_FPSR(%a6) | ||
122 | bra do_clean | ||
123 | |||
124 | | | ||
125 | | Frame is idle, so check for exceptions reported through | ||
126 | | USER_FPSR and set the unimp frame accordingly. | ||
127 | | A7 must be incremented to the point before the | ||
128 | | idle fsave vector to the unimp vector. | ||
129 | | | ||
130 | |||
131 | do_check: | ||
132 | addl #4,%a7 |point A7 back to unimp frame | ||
133 | | | ||
134 | | Or in the FPSR from the emulation with the USER_FPSR on the stack. | ||
135 | | | ||
136 | fmovel %FPSR,%d0 | ||
137 | orl %d0,USER_FPSR(%a6) | ||
138 | | | ||
139 | | On a busy frame, we must clear the nmnexc bits. | ||
140 | | | ||
141 | cmpib #BUSY_SIZE-4,1(%a7) |check frame type | ||
142 | bnes check_fr |if busy, clr nmnexc | ||
143 | clrw NMNEXC(%a6) |clr nmnexc & nmcexc | ||
144 | btstb #5,CMDREG1B(%a6) |test for fmove out | ||
145 | bnes frame_com | ||
146 | movel USER_FPSR(%a6),FPSR_SHADOW(%a6) |set exc bits | ||
147 | orl #sx_mask,E_BYTE(%a6) | ||
148 | bras frame_com | ||
149 | check_fr: | ||
150 | cmpb #UNIMP_40_SIZE-4,1(%a7) | ||
151 | beqs frame_com | ||
152 | clrw NMNEXC(%a6) | ||
153 | frame_com: | ||
154 | moveb FPCR_ENABLE(%a6),%d0 |get fpcr enable byte | ||
155 | andb FPSR_EXCEPT(%a6),%d0 |and in the fpsr exc byte | ||
156 | bfffo %d0{#24:#8},%d1 |test for first set bit | ||
157 | leal exc_tbl,%a0 |load jmp table address | ||
158 | subib #24,%d1 |normalize bit offset to 0-8 | ||
159 | movel (%a0,%d1.w*4),%a0 |load routine address based | ||
160 | | ;based on first enabled exc | ||
161 | jmp (%a0) |jump to routine | ||
162 | | | ||
163 | | Bsun is not possible in unimp or unsupp | ||
164 | | | ||
165 | bsun_exc: | ||
166 | bra do_clean | ||
167 | | | ||
168 | | The typical work to be done to the unimp frame to report an | ||
169 | | exception is to set the E1/E3 byte and clr the U flag. | ||
170 | | commonE1 does this for E1 exceptions, which are snan, | ||
171 | | operr, and dz. commonE3 does this for E3 exceptions, which | ||
172 | | are inex2 and inex1, and also clears the E1 exception bit | ||
173 | | left over from the unimp exception. | ||
174 | | | ||
175 | commonE1: | ||
176 | bsetb #E1,E_BYTE(%a6) |set E1 flag | ||
177 | bra commonE |go clean and exit | ||
178 | |||
179 | commonE3: | ||
180 | tstb UFLG_TMP(%a6) |test flag for unsup/unimp state | ||
181 | bnes unsE3 | ||
182 | uniE3: | ||
183 | bsetb #E3,E_BYTE(%a6) |set E3 flag | ||
184 | bclrb #E1,E_BYTE(%a6) |clr E1 from unimp | ||
185 | bra commonE | ||
186 | |||
187 | unsE3: | ||
188 | tstb RES_FLG(%a6) | ||
189 | bnes unsE3_0 | ||
190 | unsE3_1: | ||
191 | bsetb #E3,E_BYTE(%a6) |set E3 flag | ||
192 | unsE3_0: | ||
193 | bclrb #E1,E_BYTE(%a6) |clr E1 flag | ||
194 | movel CMDREG1B(%a6),%d0 | ||
195 | andl #0x03c30000,%d0 |work for cmd3b | ||
196 | bfextu CMDREG1B(%a6){#13:#1},%d1 |extract bit 2 | ||
197 | lsll #5,%d1 | ||
198 | swap %d1 | ||
199 | orl %d1,%d0 |put it in the right place | ||
200 | bfextu CMDREG1B(%a6){#10:#3},%d1 |extract bit 3,4,5 | ||
201 | lsll #2,%d1 | ||
202 | swap %d1 | ||
203 | orl %d1,%d0 |put them in the right place | ||
204 | movel %d0,CMDREG3B(%a6) |in the busy frame | ||
205 | |||
206 | commonE: | ||
207 | bclrb #UFLAG,T_BYTE(%a6) |clr U flag from unimp | ||
208 | bra do_clean |go clean and exit | ||
209 | | | ||
210 | | No bits in the enable byte match existing exceptions. Check for | ||
211 | | the case of the ovfl exc without the ovfl enabled, but with | ||
212 | | inex2 enabled. | ||
213 | | | ||
214 | no_match: | ||
215 | btstb #inex2_bit,FPCR_ENABLE(%a6) |check for ovfl/inex2 case | ||
216 | beqs no_exc |if clear, exit | ||
217 | btstb #ovfl_bit,FPSR_EXCEPT(%a6) |now check ovfl | ||
218 | beqs no_exc |if clear, exit | ||
219 | bras ovfl_unfl |go to unfl_ovfl to determine if | ||
220 | | ;it is an unsupp or unimp exc | ||
221 | |||
222 | | No exceptions are to be reported. If the instruction was | ||
223 | | unimplemented, no FPU restore is necessary. If it was | ||
224 | | unsupported, we must perform the restore. | ||
225 | no_exc: | ||
226 | tstb UFLG_TMP(%a6) |test flag for unsupp/unimp state | ||
227 | beqs uni_no_exc | ||
228 | uns_no_exc: | ||
229 | tstb RES_FLG(%a6) |check if frestore is needed | ||
230 | bne do_clean |if clear, no frestore needed | ||
231 | uni_no_exc: | ||
232 | moveml USER_DA(%a6),%d0-%d1/%a0-%a1 | ||
233 | fmovemx USER_FP0(%a6),%fp0-%fp3 | ||
234 | fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar | ||
235 | unlk %a6 | ||
236 | bra finish_up | ||
237 | | | ||
238 | | Unsupported Data Type Handler: | ||
239 | | Ovfl: | ||
240 | | An fmoveout that results in an overflow is reported this way. | ||
241 | | Unfl: | ||
242 | | An fmoveout that results in an underflow is reported this way. | ||
243 | | | ||
244 | | Unimplemented Instruction Handler: | ||
245 | | Ovfl: | ||
246 | | Only scosh, setox, ssinh, stwotox, and scale can set overflow in | ||
247 | | this manner. | ||
248 | | Unfl: | ||
249 | | Stwotox, setox, and scale can set underflow in this manner. | ||
250 | | Any of the other Library Routines such that f(x)=x in which | ||
251 | | x is an extended denorm can report an underflow exception. | ||
252 | | It is the responsibility of the exception-causing exception | ||
253 | | to make sure that WBTEMP is correct. | ||
254 | | | ||
255 | | The exceptional operand is in FP_SCR1. | ||
256 | | | ||
257 | ovfl_unfl: | ||
258 | tstb UFLG_TMP(%a6) |test flag for unsupp/unimp state | ||
259 | beqs ofuf_con | ||
260 | | | ||
261 | | The caller was from an unsupported data type trap. Test if the | ||
262 | | caller set CU_ONLY. If so, the exceptional operand is expected in | ||
263 | | FPTEMP, rather than WBTEMP. | ||
264 | | | ||
265 | tstb CU_ONLY(%a6) |test if inst is cu-only | ||
266 | beq unsE3 | ||
267 | | move.w #$fe,CU_SAVEPC(%a6) | ||
268 | clrb CU_SAVEPC(%a6) | ||
269 | bsetb #E1,E_BYTE(%a6) |set E1 exception flag | ||
270 | movew ETEMP_EX(%a6),FPTEMP_EX(%a6) | ||
271 | movel ETEMP_HI(%a6),FPTEMP_HI(%a6) | ||
272 | movel ETEMP_LO(%a6),FPTEMP_LO(%a6) | ||
273 | bsetb #fptemp15_bit,DTAG(%a6) |set fpte15 | ||
274 | bclrb #UFLAG,T_BYTE(%a6) |clr U flag from unimp | ||
275 | bra do_clean |go clean and exit | ||
276 | |||
277 | ofuf_con: | ||
278 | moveb (%a7),VER_TMP(%a6) |save version number | ||
279 | cmpib #BUSY_SIZE-4,1(%a7) |check for busy frame | ||
280 | beqs busy_fr |if unimp, grow to busy | ||
281 | cmpib #VER_40,(%a7) |test for orig unimp frame | ||
282 | bnes try_41 |if not, test for rev frame | ||
283 | moveql #13,%d0 |need to zero 14 lwords | ||
284 | bras ofuf_fin | ||
285 | try_41: | ||
286 | cmpib #VER_41,(%a7) |test for rev unimp frame | ||
287 | bnel fpsp_fmt_error |if neither, exit with error | ||
288 | moveql #11,%d0 |need to zero 12 lwords | ||
289 | |||
290 | ofuf_fin: | ||
291 | clrl (%a7) | ||
292 | loop1: | ||
293 | clrl -(%a7) |clear and dec a7 | ||
294 | dbra %d0,loop1 | ||
295 | moveb VER_TMP(%a6),(%a7) | ||
296 | moveb #BUSY_SIZE-4,1(%a7) |write busy fmt word. | ||
297 | busy_fr: | ||
298 | movel FP_SCR1(%a6),WBTEMP_EX(%a6) |write | ||
299 | movel FP_SCR1+4(%a6),WBTEMP_HI(%a6) |exceptional op to | ||
300 | movel FP_SCR1+8(%a6),WBTEMP_LO(%a6) |wbtemp | ||
301 | bsetb #E3,E_BYTE(%a6) |set E3 flag | ||
302 | bclrb #E1,E_BYTE(%a6) |make sure E1 is clear | ||
303 | bclrb #UFLAG,T_BYTE(%a6) |clr U flag | ||
304 | movel USER_FPSR(%a6),FPSR_SHADOW(%a6) | ||
305 | orl #sx_mask,E_BYTE(%a6) | ||
306 | movel CMDREG1B(%a6),%d0 |fix cmd1b to make it | ||
307 | andl #0x03c30000,%d0 |work for cmd3b | ||
308 | bfextu CMDREG1B(%a6){#13:#1},%d1 |extract bit 2 | ||
309 | lsll #5,%d1 | ||
310 | swap %d1 | ||
311 | orl %d1,%d0 |put it in the right place | ||
312 | bfextu CMDREG1B(%a6){#10:#3},%d1 |extract bit 3,4,5 | ||
313 | lsll #2,%d1 | ||
314 | swap %d1 | ||
315 | orl %d1,%d0 |put them in the right place | ||
316 | movel %d0,CMDREG3B(%a6) |in the busy frame | ||
317 | |||
318 | | | ||
319 | | Check if the frame to be restored is busy or unimp. | ||
320 | |** NOTE *** Bug fix for errata (0d43b #3) | ||
321 | | If the frame is unimp, we must create a busy frame to | ||
322 | | fix the bug with the nmnexc bits in cases in which they | ||
323 | | are set by a previous instruction and not cleared by | ||
324 | | the save. The frame will be unimp only if the final | ||
325 | | instruction in an emulation routine caused the exception | ||
326 | | by doing an fmove <ea>,fp0. The exception operand, in | ||
327 | | internal format, is in fptemp. | ||
328 | | | ||
329 | do_clean: | ||
330 | cmpib #UNIMP_40_SIZE-4,1(%a7) | ||
331 | bnes do_con | ||
332 | moveql #13,%d0 |in orig, need to zero 14 lwords | ||
333 | bras do_build | ||
334 | do_con: | ||
335 | cmpib #UNIMP_41_SIZE-4,1(%a7) | ||
336 | bnes do_restore |frame must be busy | ||
337 | moveql #11,%d0 |in rev, need to zero 12 lwords | ||
338 | |||
339 | do_build: | ||
340 | moveb (%a7),VER_TMP(%a6) | ||
341 | clrl (%a7) | ||
342 | loop2: | ||
343 | clrl -(%a7) |clear and dec a7 | ||
344 | dbra %d0,loop2 | ||
345 | | | ||
346 | | Use a1 as pointer into new frame. a6 is not correct if an unimp or | ||
347 | | busy frame was created as the result of an exception on the final | ||
348 | | instruction of an emulation routine. | ||
349 | | | ||
350 | | We need to set the nmcexc bits if the exception is E1. Otherwise, | ||
351 | | the exc taken will be inex2. | ||
352 | | | ||
353 | leal BUSY_SIZE+LOCAL_SIZE(%a7),%a1 |init a1 for new frame | ||
354 | moveb VER_TMP(%a6),(%a7) |write busy fmt word | ||
355 | moveb #BUSY_SIZE-4,1(%a7) | ||
356 | movel FP_SCR1(%a6),WBTEMP_EX(%a1) |write | ||
357 | movel FP_SCR1+4(%a6),WBTEMP_HI(%a1) |exceptional op to | ||
358 | movel FP_SCR1+8(%a6),WBTEMP_LO(%a1) |wbtemp | ||
359 | | btst.b #E1,E_BYTE(%a1) | ||
360 | | beq.b do_restore | ||
361 | bfextu USER_FPSR(%a6){#17:#4},%d0 |get snan/operr/ovfl/unfl bits | ||
362 | bfins %d0,NMCEXC(%a1){#4:#4} |and insert them in nmcexc | ||
363 | movel USER_FPSR(%a6),FPSR_SHADOW(%a1) |set exc bits | ||
364 | orl #sx_mask,E_BYTE(%a1) | ||
365 | |||
366 | do_restore: | ||
367 | moveml USER_DA(%a6),%d0-%d1/%a0-%a1 | ||
368 | fmovemx USER_FP0(%a6),%fp0-%fp3 | ||
369 | fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar | ||
370 | frestore (%a7)+ | ||
371 | tstb RES_FLG(%a6) |RES_FLG indicates a "continuation" frame | ||
372 | beq cont | ||
373 | bsr bug1384 | ||
374 | cont: | ||
375 | unlk %a6 | ||
376 | | | ||
377 | | If trace mode enabled, then go to trace handler. This handler | ||
378 | | cannot have any fp instructions. If there are fp inst's and an | ||
379 | | exception has been restored into the machine then the exception | ||
380 | | will occur upon execution of the fp inst. This is not desirable | ||
381 | | in the kernel (supervisor mode). See MC68040 manual Section 9.3.8. | ||
382 | | | ||
383 | finish_up: | ||
384 | btstb #7,(%a7) |test T1 in SR | ||
385 | bnes g_trace | ||
386 | btstb #6,(%a7) |test T0 in SR | ||
387 | bnes g_trace | ||
388 | bral fpsp_done | ||
389 | | | ||
390 | | Change integer stack to look like trace stack | ||
391 | | The address of the instruction that caused the | ||
392 | | exception is already in the integer stack (is | ||
393 | | the same as the saved friar) | ||
394 | | | ||
395 | | If the current frame is already a 6-word stack then all | ||
396 | | that needs to be done is to change the vector# to TRACE. | ||
397 | | If the frame is only a 4-word stack (meaning we got here | ||
398 | | on an Unsupported data type exception), then we need to grow | ||
399 | | the stack an extra 2 words and get the FPIAR from the FPU. | ||
400 | | | ||
401 | g_trace: | ||
402 | bftst EXC_VEC-4(%sp){#0:#4} | ||
403 | bne g_easy | ||
404 | |||
405 | subw #4,%sp | make room | ||
406 | movel 4(%sp),(%sp) | ||
407 | movel 8(%sp),4(%sp) | ||
408 | subw #BUSY_SIZE,%sp | ||
409 | fsave (%sp) | ||
410 | fmovel %fpiar,BUSY_SIZE+EXC_EA-4(%sp) | ||
411 | frestore (%sp) | ||
412 | addw #BUSY_SIZE,%sp | ||
413 | |||
414 | g_easy: | ||
415 | movew #TRACE_VEC,EXC_VEC-4(%a7) | ||
416 | bral real_trace | ||
417 | | | ||
418 | | This is a work-around for hardware bug 1384. | ||
419 | | | ||
420 | bug1384: | ||
421 | link %a5,#0 | ||
422 | fsave -(%sp) | ||
423 | cmpib #0x41,(%sp) | check for correct frame | ||
424 | beq frame_41 | ||
425 | bgt nofix | if more advanced mask, do nada | ||
426 | |||
427 | frame_40: | ||
428 | tstb 1(%sp) | check to see if idle | ||
429 | bne notidle | ||
430 | idle40: | ||
431 | clrl (%sp) | get rid of old fsave frame | ||
432 | movel %d1,USER_D1(%a6) | save d1 | ||
433 | movew #8,%d1 | place unimp frame instead | ||
434 | loop40: clrl -(%sp) | ||
435 | dbra %d1,loop40 | ||
436 | movel USER_D1(%a6),%d1 | restore d1 | ||
437 | movel #0x40280000,-(%sp) | ||
438 | frestore (%sp)+ | ||
439 | unlk %a5 | ||
440 | rts | ||
441 | |||
442 | frame_41: | ||
443 | tstb 1(%sp) | check to see if idle | ||
444 | bne notidle | ||
445 | idle41: | ||
446 | clrl (%sp) | get rid of old fsave frame | ||
447 | movel %d1,USER_D1(%a6) | save d1 | ||
448 | movew #10,%d1 | place unimp frame instead | ||
449 | loop41: clrl -(%sp) | ||
450 | dbra %d1,loop41 | ||
451 | movel USER_D1(%a6),%d1 | restore d1 | ||
452 | movel #0x41300000,-(%sp) | ||
453 | frestore (%sp)+ | ||
454 | unlk %a5 | ||
455 | rts | ||
456 | |||
457 | notidle: | ||
458 | bclrb #etemp15_bit,-40(%a5) | ||
459 | frestore (%sp)+ | ||
460 | unlk %a5 | ||
461 | rts | ||
462 | |||
463 | nofix: | ||
464 | frestore (%sp)+ | ||
465 | unlk %a5 | ||
466 | rts | ||
467 | |||
468 | |end | ||
diff --git a/arch/m68k/fpsp040/get_op.S b/arch/m68k/fpsp040/get_op.S new file mode 100644 index 000000000000..c7c2f3727425 --- /dev/null +++ b/arch/m68k/fpsp040/get_op.S | |||
@@ -0,0 +1,676 @@ | |||
1 | | | ||
2 | | get_op.sa 3.6 5/19/92 | ||
3 | | | ||
4 | | get_op.sa 3.5 4/26/91 | ||
5 | | | ||
6 | | Description: This routine is called by the unsupported format/data | ||
7 | | type exception handler ('unsupp' - vector 55) and the unimplemented | ||
8 | | instruction exception handler ('unimp' - vector 11). 'get_op' | ||
9 | | determines the opclass (0, 2, or 3) and branches to the | ||
10 | | opclass handler routine. See 68881/2 User's Manual table 4-11 | ||
11 | | for a description of the opclasses. | ||
12 | | | ||
13 | | For UNSUPPORTED data/format (exception vector 55) and for | ||
14 | | UNIMPLEMENTED instructions (exception vector 11) the following | ||
15 | | applies: | ||
16 | | | ||
17 | | - For unnormalized numbers (opclass 0, 2, or 3) the | ||
18 | | number(s) is normalized and the operand type tag is updated. | ||
19 | | | ||
20 | | - For a packed number (opclass 2) the number is unpacked and the | ||
21 | | operand type tag is updated. | ||
22 | | | ||
23 | | - For denormalized numbers (opclass 0 or 2) the number(s) is not | ||
24 | | changed but passed to the next module. The next module for | ||
25 | | unimp is do_func, the next module for unsupp is res_func. | ||
26 | | | ||
27 | | For UNSUPPORTED data/format (exception vector 55) only the | ||
28 | | following applies: | ||
29 | | | ||
30 | | - If there is a move out with a packed number (opclass 3) the | ||
31 | | number is packed and written to user memory. For the other | ||
32 | | opclasses the number(s) are written back to the fsave stack | ||
33 | | and the instruction is then restored back into the '040. The | ||
34 | | '040 is then able to complete the instruction. | ||
35 | | | ||
36 | | For example: | ||
37 | | fadd.x fpm,fpn where the fpm contains an unnormalized number. | ||
38 | | The '040 takes an unsupported data trap and gets to this | ||
39 | | routine. The number is normalized, put back on the stack and | ||
40 | | then an frestore is done to restore the instruction back into | ||
41 | | the '040. The '040 then re-executes the fadd.x fpm,fpn with | ||
42 | | a normalized number in the source and the instruction is | ||
43 | | successful. | ||
44 | | | ||
45 | | Next consider if in the process of normalizing the un- | ||
46 | | normalized number it becomes a denormalized number. The | ||
47 | | routine which converts the unnorm to a norm (called mk_norm) | ||
48 | | detects this and tags the number as a denorm. The routine | ||
49 | | res_func sees the denorm tag and converts the denorm to a | ||
50 | | norm. The instruction is then restored back into the '040 | ||
51 | | which re_executes the instruction. | ||
52 | | | ||
53 | | | ||
54 | | Copyright (C) Motorola, Inc. 1990 | ||
55 | | All Rights Reserved | ||
56 | | | ||
57 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
58 | | The copyright notice above does not evidence any | ||
59 | | actual or intended publication of such source code. | ||
60 | |||
61 | GET_OP: |idnt 2,1 | Motorola 040 Floating Point Software Package | ||
62 | |||
63 | |section 8 | ||
64 | |||
65 | #include "fpsp.h" | ||
66 | |||
67 | .global PIRN,PIRZRM,PIRP | ||
68 | .global SMALRN,SMALRZRM,SMALRP | ||
69 | .global BIGRN,BIGRZRM,BIGRP | ||
70 | |||
71 | PIRN: | ||
72 | .long 0x40000000,0xc90fdaa2,0x2168c235 |pi | ||
73 | PIRZRM: | ||
74 | .long 0x40000000,0xc90fdaa2,0x2168c234 |pi | ||
75 | PIRP: | ||
76 | .long 0x40000000,0xc90fdaa2,0x2168c235 |pi | ||
77 | |||
78 | |round to nearest | ||
79 | SMALRN: | ||
80 | .long 0x3ffd0000,0x9a209a84,0xfbcff798 |log10(2) | ||
81 | .long 0x40000000,0xadf85458,0xa2bb4a9a |e | ||
82 | .long 0x3fff0000,0xb8aa3b29,0x5c17f0bc |log2(e) | ||
83 | .long 0x3ffd0000,0xde5bd8a9,0x37287195 |log10(e) | ||
84 | .long 0x00000000,0x00000000,0x00000000 |0.0 | ||
85 | | round to zero;round to negative infinity | ||
86 | SMALRZRM: | ||
87 | .long 0x3ffd0000,0x9a209a84,0xfbcff798 |log10(2) | ||
88 | .long 0x40000000,0xadf85458,0xa2bb4a9a |e | ||
89 | .long 0x3fff0000,0xb8aa3b29,0x5c17f0bb |log2(e) | ||
90 | .long 0x3ffd0000,0xde5bd8a9,0x37287195 |log10(e) | ||
91 | .long 0x00000000,0x00000000,0x00000000 |0.0 | ||
92 | | round to positive infinity | ||
93 | SMALRP: | ||
94 | .long 0x3ffd0000,0x9a209a84,0xfbcff799 |log10(2) | ||
95 | .long 0x40000000,0xadf85458,0xa2bb4a9b |e | ||
96 | .long 0x3fff0000,0xb8aa3b29,0x5c17f0bc |log2(e) | ||
97 | .long 0x3ffd0000,0xde5bd8a9,0x37287195 |log10(e) | ||
98 | .long 0x00000000,0x00000000,0x00000000 |0.0 | ||
99 | |||
100 | |round to nearest | ||
101 | BIGRN: | ||
102 | .long 0x3ffe0000,0xb17217f7,0xd1cf79ac |ln(2) | ||
103 | .long 0x40000000,0x935d8ddd,0xaaa8ac17 |ln(10) | ||
104 | .long 0x3fff0000,0x80000000,0x00000000 |10 ^ 0 | ||
105 | |||
106 | .global PTENRN | ||
107 | PTENRN: | ||
108 | .long 0x40020000,0xA0000000,0x00000000 |10 ^ 1 | ||
109 | .long 0x40050000,0xC8000000,0x00000000 |10 ^ 2 | ||
110 | .long 0x400C0000,0x9C400000,0x00000000 |10 ^ 4 | ||
111 | .long 0x40190000,0xBEBC2000,0x00000000 |10 ^ 8 | ||
112 | .long 0x40340000,0x8E1BC9BF,0x04000000 |10 ^ 16 | ||
113 | .long 0x40690000,0x9DC5ADA8,0x2B70B59E |10 ^ 32 | ||
114 | .long 0x40D30000,0xC2781F49,0xFFCFA6D5 |10 ^ 64 | ||
115 | .long 0x41A80000,0x93BA47C9,0x80E98CE0 |10 ^ 128 | ||
116 | .long 0x43510000,0xAA7EEBFB,0x9DF9DE8E |10 ^ 256 | ||
117 | .long 0x46A30000,0xE319A0AE,0xA60E91C7 |10 ^ 512 | ||
118 | .long 0x4D480000,0xC9767586,0x81750C17 |10 ^ 1024 | ||
119 | .long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 |10 ^ 2048 | ||
120 | .long 0x75250000,0xC4605202,0x8A20979B |10 ^ 4096 | ||
121 | |round to minus infinity | ||
122 | BIGRZRM: | ||
123 | .long 0x3ffe0000,0xb17217f7,0xd1cf79ab |ln(2) | ||
124 | .long 0x40000000,0x935d8ddd,0xaaa8ac16 |ln(10) | ||
125 | .long 0x3fff0000,0x80000000,0x00000000 |10 ^ 0 | ||
126 | |||
127 | .global PTENRM | ||
128 | PTENRM: | ||
129 | .long 0x40020000,0xA0000000,0x00000000 |10 ^ 1 | ||
130 | .long 0x40050000,0xC8000000,0x00000000 |10 ^ 2 | ||
131 | .long 0x400C0000,0x9C400000,0x00000000 |10 ^ 4 | ||
132 | .long 0x40190000,0xBEBC2000,0x00000000 |10 ^ 8 | ||
133 | .long 0x40340000,0x8E1BC9BF,0x04000000 |10 ^ 16 | ||
134 | .long 0x40690000,0x9DC5ADA8,0x2B70B59D |10 ^ 32 | ||
135 | .long 0x40D30000,0xC2781F49,0xFFCFA6D5 |10 ^ 64 | ||
136 | .long 0x41A80000,0x93BA47C9,0x80E98CDF |10 ^ 128 | ||
137 | .long 0x43510000,0xAA7EEBFB,0x9DF9DE8D |10 ^ 256 | ||
138 | .long 0x46A30000,0xE319A0AE,0xA60E91C6 |10 ^ 512 | ||
139 | .long 0x4D480000,0xC9767586,0x81750C17 |10 ^ 1024 | ||
140 | .long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 |10 ^ 2048 | ||
141 | .long 0x75250000,0xC4605202,0x8A20979A |10 ^ 4096 | ||
142 | |round to positive infinity | ||
143 | BIGRP: | ||
144 | .long 0x3ffe0000,0xb17217f7,0xd1cf79ac |ln(2) | ||
145 | .long 0x40000000,0x935d8ddd,0xaaa8ac17 |ln(10) | ||
146 | .long 0x3fff0000,0x80000000,0x00000000 |10 ^ 0 | ||
147 | |||
148 | .global PTENRP | ||
149 | PTENRP: | ||
150 | .long 0x40020000,0xA0000000,0x00000000 |10 ^ 1 | ||
151 | .long 0x40050000,0xC8000000,0x00000000 |10 ^ 2 | ||
152 | .long 0x400C0000,0x9C400000,0x00000000 |10 ^ 4 | ||
153 | .long 0x40190000,0xBEBC2000,0x00000000 |10 ^ 8 | ||
154 | .long 0x40340000,0x8E1BC9BF,0x04000000 |10 ^ 16 | ||
155 | .long 0x40690000,0x9DC5ADA8,0x2B70B59E |10 ^ 32 | ||
156 | .long 0x40D30000,0xC2781F49,0xFFCFA6D6 |10 ^ 64 | ||
157 | .long 0x41A80000,0x93BA47C9,0x80E98CE0 |10 ^ 128 | ||
158 | .long 0x43510000,0xAA7EEBFB,0x9DF9DE8E |10 ^ 256 | ||
159 | .long 0x46A30000,0xE319A0AE,0xA60E91C7 |10 ^ 512 | ||
160 | .long 0x4D480000,0xC9767586,0x81750C18 |10 ^ 1024 | ||
161 | .long 0x5A920000,0x9E8B3B5D,0xC53D5DE6 |10 ^ 2048 | ||
162 | .long 0x75250000,0xC4605202,0x8A20979B |10 ^ 4096 | ||
163 | |||
164 | |xref nrm_zero | ||
165 | |xref decbin | ||
166 | |xref round | ||
167 | |||
168 | .global get_op | ||
169 | .global uns_getop | ||
170 | .global uni_getop | ||
171 | get_op: | ||
172 | clrb DY_MO_FLG(%a6) | ||
173 | tstb UFLG_TMP(%a6) |test flag for unsupp/unimp state | ||
174 | beq uni_getop | ||
175 | |||
176 | uns_getop: | ||
177 | btstb #direction_bit,CMDREG1B(%a6) | ||
178 | bne opclass3 |branch if a fmove out (any kind) | ||
179 | btstb #6,CMDREG1B(%a6) | ||
180 | beqs uns_notpacked | ||
181 | |||
182 | bfextu CMDREG1B(%a6){#3:#3},%d0 | ||
183 | cmpb #3,%d0 | ||
184 | beq pack_source |check for a packed src op, branch if so | ||
185 | uns_notpacked: | ||
186 | bsr chk_dy_mo |set the dyadic/monadic flag | ||
187 | tstb DY_MO_FLG(%a6) | ||
188 | beqs src_op_ck |if monadic, go check src op | ||
189 | | ;else, check dst op (fall through) | ||
190 | |||
191 | btstb #7,DTAG(%a6) | ||
192 | beqs src_op_ck |if dst op is norm, check src op | ||
193 | bras dst_ex_dnrm |else, handle destination unnorm/dnrm | ||
194 | |||
195 | uni_getop: | ||
196 | bfextu CMDREG1B(%a6){#0:#6},%d0 |get opclass and src fields | ||
197 | cmpil #0x17,%d0 |if op class and size fields are $17, | ||
198 | | ;it is FMOVECR; if not, continue | ||
199 | | | ||
200 | | If the instruction is fmovecr, exit get_op. It is handled | ||
201 | | in do_func and smovecr.sa. | ||
202 | | | ||
203 | bne not_fmovecr |handle fmovecr as an unimplemented inst | ||
204 | rts | ||
205 | |||
206 | not_fmovecr: | ||
207 | btstb #E1,E_BYTE(%a6) |if set, there is a packed operand | ||
208 | bne pack_source |check for packed src op, branch if so | ||
209 | |||
210 | | The following lines of are coded to optimize on normalized operands | ||
211 | moveb STAG(%a6),%d0 | ||
212 | orb DTAG(%a6),%d0 |check if either of STAG/DTAG msb set | ||
213 | bmis dest_op_ck |if so, some op needs to be fixed | ||
214 | rts | ||
215 | |||
216 | dest_op_ck: | ||
217 | btstb #7,DTAG(%a6) |check for unsupported data types in | ||
218 | beqs src_op_ck |the destination, if not, check src op | ||
219 | bsr chk_dy_mo |set dyadic/monadic flag | ||
220 | tstb DY_MO_FLG(%a6) | | ||
221 | beqs src_op_ck |if monadic, check src op | ||
222 | | | ||
223 | | At this point, destination has an extended denorm or unnorm. | ||
224 | | | ||
225 | dst_ex_dnrm: | ||
226 | movew FPTEMP_EX(%a6),%d0 |get destination exponent | ||
227 | andiw #0x7fff,%d0 |mask sign, check if exp = 0000 | ||
228 | beqs src_op_ck |if denorm then check source op. | ||
229 | | ;denorms are taken care of in res_func | ||
230 | | ;(unsupp) or do_func (unimp) | ||
231 | | ;else unnorm fall through | ||
232 | leal FPTEMP(%a6),%a0 |point a0 to dop - used in mk_norm | ||
233 | bsr mk_norm |go normalize - mk_norm returns: | ||
234 | | ;L_SCR1{7:5} = operand tag | ||
235 | | ; (000 = norm, 100 = denorm) | ||
236 | | ;L_SCR1{4} = fpte15 or ete15 | ||
237 | | ; 0 = exp > $3fff | ||
238 | | ; 1 = exp <= $3fff | ||
239 | | ;and puts the normalized num back | ||
240 | | ;on the fsave stack | ||
241 | | | ||
242 | moveb L_SCR1(%a6),DTAG(%a6) |write the new tag & fpte15 | ||
243 | | ;to the fsave stack and fall | ||
244 | | ;through to check source operand | ||
245 | | | ||
246 | src_op_ck: | ||
247 | btstb #7,STAG(%a6) | ||
248 | beq end_getop |check for unsupported data types on the | ||
249 | | ;source operand | ||
250 | btstb #5,STAG(%a6) | ||
251 | bnes src_sd_dnrm |if bit 5 set, handle sgl/dbl denorms | ||
252 | | | ||
253 | | At this point only unnorms or extended denorms are possible. | ||
254 | | | ||
255 | src_ex_dnrm: | ||
256 | movew ETEMP_EX(%a6),%d0 |get source exponent | ||
257 | andiw #0x7fff,%d0 |mask sign, check if exp = 0000 | ||
258 | beq end_getop |if denorm then exit, denorms are | ||
259 | | ;handled in do_func | ||
260 | leal ETEMP(%a6),%a0 |point a0 to sop - used in mk_norm | ||
261 | bsr mk_norm |go normalize - mk_norm returns: | ||
262 | | ;L_SCR1{7:5} = operand tag | ||
263 | | ; (000 = norm, 100 = denorm) | ||
264 | | ;L_SCR1{4} = fpte15 or ete15 | ||
265 | | ; 0 = exp > $3fff | ||
266 | | ; 1 = exp <= $3fff | ||
267 | | ;and puts the normalized num back | ||
268 | | ;on the fsave stack | ||
269 | | | ||
270 | moveb L_SCR1(%a6),STAG(%a6) |write the new tag & ete15 | ||
271 | rts |end_getop | ||
272 | |||
273 | | | ||
274 | | At this point, only single or double denorms are possible. | ||
275 | | If the inst is not fmove, normalize the source. If it is, | ||
276 | | do nothing to the input. | ||
277 | | | ||
278 | src_sd_dnrm: | ||
279 | btstb #4,CMDREG1B(%a6) |differentiate between sgl/dbl denorm | ||
280 | bnes is_double | ||
281 | is_single: | ||
282 | movew #0x3f81,%d1 |write bias for sgl denorm | ||
283 | bras common |goto the common code | ||
284 | is_double: | ||
285 | movew #0x3c01,%d1 |write the bias for a dbl denorm | ||
286 | common: | ||
287 | btstb #sign_bit,ETEMP_EX(%a6) |grab sign bit of mantissa | ||
288 | beqs pos | ||
289 | bset #15,%d1 |set sign bit because it is negative | ||
290 | pos: | ||
291 | movew %d1,ETEMP_EX(%a6) | ||
292 | | ;put exponent on stack | ||
293 | |||
294 | movew CMDREG1B(%a6),%d1 | ||
295 | andw #0xe3ff,%d1 |clear out source specifier | ||
296 | orw #0x0800,%d1 |set source specifier to extended prec | ||
297 | movew %d1,CMDREG1B(%a6) |write back to the command word in stack | ||
298 | | ;this is needed to fix unsupp data stack | ||
299 | leal ETEMP(%a6),%a0 |point a0 to sop | ||
300 | |||
301 | bsr mk_norm |convert sgl/dbl denorm to norm | ||
302 | moveb L_SCR1(%a6),STAG(%a6) |put tag into source tag reg - d0 | ||
303 | rts |end_getop | ||
304 | | | ||
305 | | At this point, the source is definitely packed, whether | ||
306 | | instruction is dyadic or monadic is still unknown | ||
307 | | | ||
308 | pack_source: | ||
309 | movel FPTEMP_LO(%a6),ETEMP(%a6) |write ms part of packed | ||
310 | | ;number to etemp slot | ||
311 | bsr chk_dy_mo |set dyadic/monadic flag | ||
312 | bsr unpack | ||
313 | |||
314 | tstb DY_MO_FLG(%a6) | ||
315 | beqs end_getop |if monadic, exit | ||
316 | | ;else, fix FPTEMP | ||
317 | pack_dya: | ||
318 | bfextu CMDREG1B(%a6){#6:#3},%d0 |extract dest fp reg | ||
319 | movel #7,%d1 | ||
320 | subl %d0,%d1 | ||
321 | clrl %d0 | ||
322 | bsetl %d1,%d0 |set up d0 as a dynamic register mask | ||
323 | fmovemx %d0,FPTEMP(%a6) |write to FPTEMP | ||
324 | |||
325 | btstb #7,DTAG(%a6) |check dest tag for unnorm or denorm | ||
326 | bne dst_ex_dnrm |else, handle the unnorm or ext denorm | ||
327 | | | ||
328 | | Dest is not denormalized. Check for norm, and set fpte15 | ||
329 | | accordingly. | ||
330 | | | ||
331 | moveb DTAG(%a6),%d0 | ||
332 | andib #0xf0,%d0 |strip to only dtag:fpte15 | ||
333 | tstb %d0 |check for normalized value | ||
334 | bnes end_getop |if inf/nan/zero leave get_op | ||
335 | movew FPTEMP_EX(%a6),%d0 | ||
336 | andiw #0x7fff,%d0 | ||
337 | cmpiw #0x3fff,%d0 |check if fpte15 needs setting | ||
338 | bges end_getop |if >= $3fff, leave fpte15=0 | ||
339 | orb #0x10,DTAG(%a6) | ||
340 | bras end_getop | ||
341 | |||
342 | | | ||
343 | | At this point, it is either an fmoveout packed, unnorm or denorm | ||
344 | | | ||
345 | opclass3: | ||
346 | clrb DY_MO_FLG(%a6) |set dyadic/monadic flag to monadic | ||
347 | bfextu CMDREG1B(%a6){#4:#2},%d0 | ||
348 | cmpib #3,%d0 | ||
349 | bne src_ex_dnrm |if not equal, must be unnorm or denorm | ||
350 | | ;else it is a packed move out | ||
351 | | ;exit | ||
352 | end_getop: | ||
353 | rts | ||
354 | |||
355 | | | ||
356 | | Sets the DY_MO_FLG correctly. This is used only on if it is an | ||
357 | | unsupported data type exception. Set if dyadic. | ||
358 | | | ||
359 | chk_dy_mo: | ||
360 | movew CMDREG1B(%a6),%d0 | ||
361 | btstl #5,%d0 |testing extension command word | ||
362 | beqs set_mon |if bit 5 = 0 then monadic | ||
363 | btstl #4,%d0 |know that bit 5 = 1 | ||
364 | beqs set_dya |if bit 4 = 0 then dyadic | ||
365 | andiw #0x007f,%d0 |get rid of all but extension bits {6:0} | ||
366 | cmpiw #0x0038,%d0 |if extension = $38 then fcmp (dyadic) | ||
367 | bnes set_mon | ||
368 | set_dya: | ||
369 | st DY_MO_FLG(%a6) |set the inst flag type to dyadic | ||
370 | rts | ||
371 | set_mon: | ||
372 | clrb DY_MO_FLG(%a6) |set the inst flag type to monadic | ||
373 | rts | ||
374 | | | ||
375 | | MK_NORM | ||
376 | | | ||
377 | | Normalizes unnormalized numbers, sets tag to norm or denorm, sets unfl | ||
378 | | exception if denorm. | ||
379 | | | ||
380 | | CASE opclass 0x0 unsupp | ||
381 | | mk_norm till msb set | ||
382 | | set tag = norm | ||
383 | | | ||
384 | | CASE opclass 0x0 unimp | ||
385 | | mk_norm till msb set or exp = 0 | ||
386 | | if integer bit = 0 | ||
387 | | tag = denorm | ||
388 | | else | ||
389 | | tag = norm | ||
390 | | | ||
391 | | CASE opclass 011 unsupp | ||
392 | | mk_norm till msb set or exp = 0 | ||
393 | | if integer bit = 0 | ||
394 | | tag = denorm | ||
395 | | set unfl_nmcexe = 1 | ||
396 | | else | ||
397 | | tag = norm | ||
398 | | | ||
399 | | if exp <= $3fff | ||
400 | | set ete15 or fpte15 = 1 | ||
401 | | else set ete15 or fpte15 = 0 | ||
402 | |||
403 | | input: | ||
404 | | a0 = points to operand to be normalized | ||
405 | | output: | ||
406 | | L_SCR1{7:5} = operand tag (000 = norm, 100 = denorm) | ||
407 | | L_SCR1{4} = fpte15 or ete15 (0 = exp > $3fff, 1 = exp <=$3fff) | ||
408 | | the normalized operand is placed back on the fsave stack | ||
409 | mk_norm: | ||
410 | clrl L_SCR1(%a6) | ||
411 | bclrb #sign_bit,LOCAL_EX(%a0) | ||
412 | sne LOCAL_SGN(%a0) |transform into internal extended format | ||
413 | |||
414 | cmpib #0x2c,1+EXC_VEC(%a6) |check if unimp | ||
415 | bnes uns_data |branch if unsupp | ||
416 | bsr uni_inst |call if unimp (opclass 0x0) | ||
417 | bras reload | ||
418 | uns_data: | ||
419 | btstb #direction_bit,CMDREG1B(%a6) |check transfer direction | ||
420 | bnes bit_set |branch if set (opclass 011) | ||
421 | bsr uns_opx |call if opclass 0x0 | ||
422 | bras reload | ||
423 | bit_set: | ||
424 | bsr uns_op3 |opclass 011 | ||
425 | reload: | ||
426 | cmpw #0x3fff,LOCAL_EX(%a0) |if exp > $3fff | ||
427 | bgts end_mk | fpte15/ete15 already set to 0 | ||
428 | bsetb #4,L_SCR1(%a6) |else set fpte15/ete15 to 1 | ||
429 | | ;calling routine actually sets the | ||
430 | | ;value on the stack (along with the | ||
431 | | ;tag), since this routine doesn't | ||
432 | | ;know if it should set ete15 or fpte15 | ||
433 | | ;ie, it doesn't know if this is the | ||
434 | | ;src op or dest op. | ||
435 | end_mk: | ||
436 | bfclr LOCAL_SGN(%a0){#0:#8} | ||
437 | beqs end_mk_pos | ||
438 | bsetb #sign_bit,LOCAL_EX(%a0) |convert back to IEEE format | ||
439 | end_mk_pos: | ||
440 | rts | ||
441 | | | ||
442 | | CASE opclass 011 unsupp | ||
443 | | | ||
444 | uns_op3: | ||
445 | bsr nrm_zero |normalize till msb = 1 or exp = zero | ||
446 | btstb #7,LOCAL_HI(%a0) |if msb = 1 | ||
447 | bnes no_unfl |then branch | ||
448 | set_unfl: | ||
449 | orw #dnrm_tag,L_SCR1(%a6) |set denorm tag | ||
450 | bsetb #unfl_bit,FPSR_EXCEPT(%a6) |set unfl exception bit | ||
451 | no_unfl: | ||
452 | rts | ||
453 | | | ||
454 | | CASE opclass 0x0 unsupp | ||
455 | | | ||
456 | uns_opx: | ||
457 | bsr nrm_zero |normalize the number | ||
458 | btstb #7,LOCAL_HI(%a0) |check if integer bit (j-bit) is set | ||
459 | beqs uns_den |if clear then now have a denorm | ||
460 | uns_nrm: | ||
461 | orb #norm_tag,L_SCR1(%a6) |set tag to norm | ||
462 | rts | ||
463 | uns_den: | ||
464 | orb #dnrm_tag,L_SCR1(%a6) |set tag to denorm | ||
465 | rts | ||
466 | | | ||
467 | | CASE opclass 0x0 unimp | ||
468 | | | ||
469 | uni_inst: | ||
470 | bsr nrm_zero | ||
471 | btstb #7,LOCAL_HI(%a0) |check if integer bit (j-bit) is set | ||
472 | beqs uni_den |if clear then now have a denorm | ||
473 | uni_nrm: | ||
474 | orb #norm_tag,L_SCR1(%a6) |set tag to norm | ||
475 | rts | ||
476 | uni_den: | ||
477 | orb #dnrm_tag,L_SCR1(%a6) |set tag to denorm | ||
478 | rts | ||
479 | |||
480 | | | ||
481 | | Decimal to binary conversion | ||
482 | | | ||
483 | | Special cases of inf and NaNs are completed outside of decbin. | ||
484 | | If the input is an snan, the snan bit is not set. | ||
485 | | | ||
486 | | input: | ||
487 | | ETEMP(a6) - points to packed decimal string in memory | ||
488 | | output: | ||
489 | | fp0 - contains packed string converted to extended precision | ||
490 | | ETEMP - same as fp0 | ||
491 | unpack: | ||
492 | movew CMDREG1B(%a6),%d0 |examine command word, looking for fmove's | ||
493 | andw #0x3b,%d0 | ||
494 | beq move_unpack |special handling for fmove: must set FPSR_CC | ||
495 | |||
496 | movew ETEMP(%a6),%d0 |get word with inf information | ||
497 | bfextu %d0{#20:#12},%d1 |get exponent into d1 | ||
498 | cmpiw #0x0fff,%d1 |test for inf or NaN | ||
499 | bnes try_zero |if not equal, it is not special | ||
500 | bfextu %d0{#17:#3},%d1 |get SE and y bits into d1 | ||
501 | cmpiw #7,%d1 |SE and y bits must be on for special | ||
502 | bnes try_zero |if not on, it is not special | ||
503 | |input is of the special cases of inf and NaN | ||
504 | tstl ETEMP_HI(%a6) |check ms mantissa | ||
505 | bnes fix_nan |if non-zero, it is a NaN | ||
506 | tstl ETEMP_LO(%a6) |check ls mantissa | ||
507 | bnes fix_nan |if non-zero, it is a NaN | ||
508 | bra finish |special already on stack | ||
509 | fix_nan: | ||
510 | btstb #signan_bit,ETEMP_HI(%a6) |test for snan | ||
511 | bne finish | ||
512 | orl #snaniop_mask,USER_FPSR(%a6) |always set snan if it is so | ||
513 | bra finish | ||
514 | try_zero: | ||
515 | movew ETEMP_EX+2(%a6),%d0 |get word 4 | ||
516 | andiw #0x000f,%d0 |clear all but last ni(y)bble | ||
517 | tstw %d0 |check for zero. | ||
518 | bne not_spec | ||
519 | tstl ETEMP_HI(%a6) |check words 3 and 2 | ||
520 | bne not_spec | ||
521 | tstl ETEMP_LO(%a6) |check words 1 and 0 | ||
522 | bne not_spec | ||
523 | tstl ETEMP(%a6) |test sign of the zero | ||
524 | bges pos_zero | ||
525 | movel #0x80000000,ETEMP(%a6) |write neg zero to etemp | ||
526 | clrl ETEMP_HI(%a6) | ||
527 | clrl ETEMP_LO(%a6) | ||
528 | bra finish | ||
529 | pos_zero: | ||
530 | clrl ETEMP(%a6) | ||
531 | clrl ETEMP_HI(%a6) | ||
532 | clrl ETEMP_LO(%a6) | ||
533 | bra finish | ||
534 | |||
535 | not_spec: | ||
536 | fmovemx %fp0-%fp1,-(%a7) |save fp0 - decbin returns in it | ||
537 | bsr decbin | ||
538 | fmovex %fp0,ETEMP(%a6) |put the unpacked sop in the fsave stack | ||
539 | fmovemx (%a7)+,%fp0-%fp1 | ||
540 | fmovel #0,%FPSR |clr fpsr from decbin | ||
541 | bra finish | ||
542 | |||
543 | | | ||
544 | | Special handling for packed move in: Same results as all other | ||
545 | | packed cases, but we must set the FPSR condition codes properly. | ||
546 | | | ||
547 | move_unpack: | ||
548 | movew ETEMP(%a6),%d0 |get word with inf information | ||
549 | bfextu %d0{#20:#12},%d1 |get exponent into d1 | ||
550 | cmpiw #0x0fff,%d1 |test for inf or NaN | ||
551 | bnes mtry_zero |if not equal, it is not special | ||
552 | bfextu %d0{#17:#3},%d1 |get SE and y bits into d1 | ||
553 | cmpiw #7,%d1 |SE and y bits must be on for special | ||
554 | bnes mtry_zero |if not on, it is not special | ||
555 | |input is of the special cases of inf and NaN | ||
556 | tstl ETEMP_HI(%a6) |check ms mantissa | ||
557 | bnes mfix_nan |if non-zero, it is a NaN | ||
558 | tstl ETEMP_LO(%a6) |check ls mantissa | ||
559 | bnes mfix_nan |if non-zero, it is a NaN | ||
560 | |input is inf | ||
561 | orl #inf_mask,USER_FPSR(%a6) |set I bit | ||
562 | tstl ETEMP(%a6) |check sign | ||
563 | bge finish | ||
564 | orl #neg_mask,USER_FPSR(%a6) |set N bit | ||
565 | bra finish |special already on stack | ||
566 | mfix_nan: | ||
567 | orl #nan_mask,USER_FPSR(%a6) |set NaN bit | ||
568 | moveb #nan_tag,STAG(%a6) |set stag to NaN | ||
569 | btstb #signan_bit,ETEMP_HI(%a6) |test for snan | ||
570 | bnes mn_snan | ||
571 | orl #snaniop_mask,USER_FPSR(%a6) |set snan bit | ||
572 | btstb #snan_bit,FPCR_ENABLE(%a6) |test for snan enabled | ||
573 | bnes mn_snan | ||
574 | bsetb #signan_bit,ETEMP_HI(%a6) |force snans to qnans | ||
575 | mn_snan: | ||
576 | tstl ETEMP(%a6) |check for sign | ||
577 | bge finish |if clr, go on | ||
578 | orl #neg_mask,USER_FPSR(%a6) |set N bit | ||
579 | bra finish | ||
580 | |||
581 | mtry_zero: | ||
582 | movew ETEMP_EX+2(%a6),%d0 |get word 4 | ||
583 | andiw #0x000f,%d0 |clear all but last ni(y)bble | ||
584 | tstw %d0 |check for zero. | ||
585 | bnes mnot_spec | ||
586 | tstl ETEMP_HI(%a6) |check words 3 and 2 | ||
587 | bnes mnot_spec | ||
588 | tstl ETEMP_LO(%a6) |check words 1 and 0 | ||
589 | bnes mnot_spec | ||
590 | tstl ETEMP(%a6) |test sign of the zero | ||
591 | bges mpos_zero | ||
592 | orl #neg_mask+z_mask,USER_FPSR(%a6) |set N and Z | ||
593 | movel #0x80000000,ETEMP(%a6) |write neg zero to etemp | ||
594 | clrl ETEMP_HI(%a6) | ||
595 | clrl ETEMP_LO(%a6) | ||
596 | bras finish | ||
597 | mpos_zero: | ||
598 | orl #z_mask,USER_FPSR(%a6) |set Z | ||
599 | clrl ETEMP(%a6) | ||
600 | clrl ETEMP_HI(%a6) | ||
601 | clrl ETEMP_LO(%a6) | ||
602 | bras finish | ||
603 | |||
604 | mnot_spec: | ||
605 | fmovemx %fp0-%fp1,-(%a7) |save fp0 ,fp1 - decbin returns in fp0 | ||
606 | bsr decbin | ||
607 | fmovex %fp0,ETEMP(%a6) | ||
608 | | ;put the unpacked sop in the fsave stack | ||
609 | fmovemx (%a7)+,%fp0-%fp1 | ||
610 | |||
611 | finish: | ||
612 | movew CMDREG1B(%a6),%d0 |get the command word | ||
613 | andw #0xfbff,%d0 |change the source specifier field to | ||
614 | | ;extended (was packed). | ||
615 | movew %d0,CMDREG1B(%a6) |write command word back to fsave stack | ||
616 | | ;we need to do this so the 040 will | ||
617 | | ;re-execute the inst. without taking | ||
618 | | ;another packed trap. | ||
619 | |||
620 | fix_stag: | ||
621 | |Converted result is now in etemp on fsave stack, now set the source | ||
622 | |tag (stag) | ||
623 | | if (ete =$7fff) then INF or NAN | ||
624 | | if (etemp = $x.0----0) then | ||
625 | | stag = INF | ||
626 | | else | ||
627 | | stag = NAN | ||
628 | | else | ||
629 | | if (ete = $0000) then | ||
630 | | stag = ZERO | ||
631 | | else | ||
632 | | stag = NORM | ||
633 | | | ||
634 | | Note also that the etemp_15 bit (just right of the stag) must | ||
635 | | be set accordingly. | ||
636 | | | ||
637 | movew ETEMP_EX(%a6),%d1 | ||
638 | andiw #0x7fff,%d1 |strip sign | ||
639 | cmpw #0x7fff,%d1 | ||
640 | bnes z_or_nrm | ||
641 | movel ETEMP_HI(%a6),%d1 | ||
642 | bnes is_nan | ||
643 | movel ETEMP_LO(%a6),%d1 | ||
644 | bnes is_nan | ||
645 | is_inf: | ||
646 | moveb #0x40,STAG(%a6) | ||
647 | movel #0x40,%d0 | ||
648 | rts | ||
649 | is_nan: | ||
650 | moveb #0x60,STAG(%a6) | ||
651 | movel #0x60,%d0 | ||
652 | rts | ||
653 | z_or_nrm: | ||
654 | tstw %d1 | ||
655 | bnes is_nrm | ||
656 | is_zro: | ||
657 | | For a zero, set etemp_15 | ||
658 | moveb #0x30,STAG(%a6) | ||
659 | movel #0x20,%d0 | ||
660 | rts | ||
661 | is_nrm: | ||
662 | | For a norm, check if the exp <= $3fff; if so, set etemp_15 | ||
663 | cmpiw #0x3fff,%d1 | ||
664 | bles set_bit15 | ||
665 | moveb #0,STAG(%a6) | ||
666 | bras end_is_nrm | ||
667 | set_bit15: | ||
668 | moveb #0x10,STAG(%a6) | ||
669 | end_is_nrm: | ||
670 | movel #0,%d0 | ||
671 | end_fix: | ||
672 | rts | ||
673 | |||
674 | end_get: | ||
675 | rts | ||
676 | |end | ||
diff --git a/arch/m68k/fpsp040/kernel_ex.S b/arch/m68k/fpsp040/kernel_ex.S new file mode 100644 index 000000000000..476b711967ce --- /dev/null +++ b/arch/m68k/fpsp040/kernel_ex.S | |||
@@ -0,0 +1,494 @@ | |||
1 | | | ||
2 | | kernel_ex.sa 3.3 12/19/90 | ||
3 | | | ||
4 | | This file contains routines to force exception status in the | ||
5 | | fpu for exceptional cases detected or reported within the | ||
6 | | transcendental functions. Typically, the t_xx routine will | ||
7 | | set the appropriate bits in the USER_FPSR word on the stack. | ||
8 | | The bits are tested in gen_except.sa to determine if an exceptional | ||
9 | | situation needs to be created on return from the FPSP. | ||
10 | | | ||
11 | |||
12 | | Copyright (C) Motorola, Inc. 1990 | ||
13 | | All Rights Reserved | ||
14 | | | ||
15 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
16 | | The copyright notice above does not evidence any | ||
17 | | actual or intended publication of such source code. | ||
18 | |||
19 | KERNEL_EX: |idnt 2,1 | Motorola 040 Floating Point Software Package | ||
20 | |||
21 | |section 8 | ||
22 | |||
23 | #include "fpsp.h" | ||
24 | |||
25 | mns_inf: .long 0xffff0000,0x00000000,0x00000000 | ||
26 | pls_inf: .long 0x7fff0000,0x00000000,0x00000000 | ||
27 | nan: .long 0x7fff0000,0xffffffff,0xffffffff | ||
28 | huge: .long 0x7ffe0000,0xffffffff,0xffffffff | ||
29 | |||
30 | |xref ovf_r_k | ||
31 | |xref unf_sub | ||
32 | |xref nrm_set | ||
33 | |||
34 | .global t_dz | ||
35 | .global t_dz2 | ||
36 | .global t_operr | ||
37 | .global t_unfl | ||
38 | .global t_ovfl | ||
39 | .global t_ovfl2 | ||
40 | .global t_inx2 | ||
41 | .global t_frcinx | ||
42 | .global t_extdnrm | ||
43 | .global t_resdnrm | ||
44 | .global dst_nan | ||
45 | .global src_nan | ||
46 | | | ||
47 | | DZ exception | ||
48 | | | ||
49 | | | ||
50 | | if dz trap disabled | ||
51 | | store properly signed inf (use sign of etemp) into fp0 | ||
52 | | set FPSR exception status dz bit, condition code | ||
53 | | inf bit, and accrued dz bit | ||
54 | | return | ||
55 | | frestore the frame into the machine (done by unimp_hd) | ||
56 | | | ||
57 | | else dz trap enabled | ||
58 | | set exception status bit & accrued bits in FPSR | ||
59 | | set flag to disable sto_res from corrupting fp register | ||
60 | | return | ||
61 | | frestore the frame into the machine (done by unimp_hd) | ||
62 | | | ||
63 | | t_dz2 is used by monadic functions such as flogn (from do_func). | ||
64 | | t_dz is used by monadic functions such as satanh (from the | ||
65 | | transcendental function). | ||
66 | | | ||
67 | t_dz2: | ||
68 | bsetb #neg_bit,FPSR_CC(%a6) |set neg bit in FPSR | ||
69 | fmovel #0,%FPSR |clr status bits (Z set) | ||
70 | btstb #dz_bit,FPCR_ENABLE(%a6) |test FPCR for dz exc enabled | ||
71 | bnes dz_ena_end | ||
72 | bras m_inf |flogx always returns -inf | ||
73 | t_dz: | ||
74 | fmovel #0,%FPSR |clr status bits (Z set) | ||
75 | btstb #dz_bit,FPCR_ENABLE(%a6) |test FPCR for dz exc enabled | ||
76 | bnes dz_ena | ||
77 | | | ||
78 | | dz disabled | ||
79 | | | ||
80 | btstb #sign_bit,ETEMP_EX(%a6) |check sign for neg or pos | ||
81 | beqs p_inf |branch if pos sign | ||
82 | |||
83 | m_inf: | ||
84 | fmovemx mns_inf,%fp0-%fp0 |load -inf | ||
85 | bsetb #neg_bit,FPSR_CC(%a6) |set neg bit in FPSR | ||
86 | bras set_fpsr | ||
87 | p_inf: | ||
88 | fmovemx pls_inf,%fp0-%fp0 |load +inf | ||
89 | set_fpsr: | ||
90 | orl #dzinf_mask,USER_FPSR(%a6) |set I,DZ,ADZ | ||
91 | rts | ||
92 | | | ||
93 | | dz enabled | ||
94 | | | ||
95 | dz_ena: | ||
96 | btstb #sign_bit,ETEMP_EX(%a6) |check sign for neg or pos | ||
97 | beqs dz_ena_end | ||
98 | bsetb #neg_bit,FPSR_CC(%a6) |set neg bit in FPSR | ||
99 | dz_ena_end: | ||
100 | orl #dzinf_mask,USER_FPSR(%a6) |set I,DZ,ADZ | ||
101 | st STORE_FLG(%a6) | ||
102 | rts | ||
103 | | | ||
104 | | OPERR exception | ||
105 | | | ||
106 | | if (operr trap disabled) | ||
107 | | set FPSR exception status operr bit, condition code | ||
108 | | nan bit; Store default NAN into fp0 | ||
109 | | frestore the frame into the machine (done by unimp_hd) | ||
110 | | | ||
111 | | else (operr trap enabled) | ||
112 | | set FPSR exception status operr bit, accrued operr bit | ||
113 | | set flag to disable sto_res from corrupting fp register | ||
114 | | frestore the frame into the machine (done by unimp_hd) | ||
115 | | | ||
116 | t_operr: | ||
117 | orl #opnan_mask,USER_FPSR(%a6) |set NaN, OPERR, AIOP | ||
118 | |||
119 | btstb #operr_bit,FPCR_ENABLE(%a6) |test FPCR for operr enabled | ||
120 | bnes op_ena | ||
121 | |||
122 | fmovemx nan,%fp0-%fp0 |load default nan | ||
123 | rts | ||
124 | op_ena: | ||
125 | st STORE_FLG(%a6) |do not corrupt destination | ||
126 | rts | ||
127 | |||
128 | | | ||
129 | | t_unfl --- UNFL exception | ||
130 | | | ||
131 | | This entry point is used by all routines requiring unfl, inex2, | ||
132 | | aunfl, and ainex to be set on exit. | ||
133 | | | ||
134 | | On entry, a0 points to the exceptional operand. The final exceptional | ||
135 | | operand is built in FP_SCR1 and only the sign from the original operand | ||
136 | | is used. | ||
137 | | | ||
138 | t_unfl: | ||
139 | clrl FP_SCR1(%a6) |set exceptional operand to zero | ||
140 | clrl FP_SCR1+4(%a6) | ||
141 | clrl FP_SCR1+8(%a6) | ||
142 | tstb (%a0) |extract sign from caller's exop | ||
143 | bpls unfl_signok | ||
144 | bset #sign_bit,FP_SCR1(%a6) | ||
145 | unfl_signok: | ||
146 | leal FP_SCR1(%a6),%a0 | ||
147 | orl #unfinx_mask,USER_FPSR(%a6) | ||
148 | | ;set UNFL, INEX2, AUNFL, AINEX | ||
149 | unfl_con: | ||
150 | btstb #unfl_bit,FPCR_ENABLE(%a6) | ||
151 | beqs unfl_dis | ||
152 | |||
153 | unfl_ena: | ||
154 | bfclr STAG(%a6){#5:#3} |clear wbtm66,wbtm1,wbtm0 | ||
155 | bsetb #wbtemp15_bit,WB_BYTE(%a6) |set wbtemp15 | ||
156 | bsetb #sticky_bit,STICKY(%a6) |set sticky bit | ||
157 | |||
158 | bclrb #E1,E_BYTE(%a6) | ||
159 | |||
160 | unfl_dis: | ||
161 | bfextu FPCR_MODE(%a6){#0:#2},%d0 |get round precision | ||
162 | |||
163 | bclrb #sign_bit,LOCAL_EX(%a0) | ||
164 | sne LOCAL_SGN(%a0) |convert to internal ext format | ||
165 | |||
166 | bsr unf_sub |returns IEEE result at a0 | ||
167 | | ;and sets FPSR_CC accordingly | ||
168 | |||
169 | bfclr LOCAL_SGN(%a0){#0:#8} |convert back to IEEE ext format | ||
170 | beqs unfl_fin | ||
171 | |||
172 | bsetb #sign_bit,LOCAL_EX(%a0) | ||
173 | bsetb #sign_bit,FP_SCR1(%a6) |set sign bit of exc operand | ||
174 | |||
175 | unfl_fin: | ||
176 | fmovemx (%a0),%fp0-%fp0 |store result in fp0 | ||
177 | rts | ||
178 | |||
179 | |||
180 | | | ||
181 | | t_ovfl2 --- OVFL exception (without inex2 returned) | ||
182 | | | ||
183 | | This entry is used by scale to force catastrophic overflow. The | ||
184 | | ovfl, aovfl, and ainex bits are set, but not the inex2 bit. | ||
185 | | | ||
186 | t_ovfl2: | ||
187 | orl #ovfl_inx_mask,USER_FPSR(%a6) | ||
188 | movel ETEMP(%a6),FP_SCR1(%a6) | ||
189 | movel ETEMP_HI(%a6),FP_SCR1+4(%a6) | ||
190 | movel ETEMP_LO(%a6),FP_SCR1+8(%a6) | ||
191 | | | ||
192 | | Check for single or double round precision. If single, check if | ||
193 | | the lower 40 bits of ETEMP are zero; if not, set inex2. If double, | ||
194 | | check if the lower 21 bits are zero; if not, set inex2. | ||
195 | | | ||
196 | moveb FPCR_MODE(%a6),%d0 | ||
197 | andib #0xc0,%d0 | ||
198 | beq t_work |if extended, finish ovfl processing | ||
199 | cmpib #0x40,%d0 |test for single | ||
200 | bnes t_dbl | ||
201 | t_sgl: | ||
202 | tstb ETEMP_LO(%a6) | ||
203 | bnes t_setinx2 | ||
204 | movel ETEMP_HI(%a6),%d0 | ||
205 | andil #0xff,%d0 |look at only lower 8 bits | ||
206 | bnes t_setinx2 | ||
207 | bra t_work | ||
208 | t_dbl: | ||
209 | movel ETEMP_LO(%a6),%d0 | ||
210 | andil #0x7ff,%d0 |look at only lower 11 bits | ||
211 | beq t_work | ||
212 | t_setinx2: | ||
213 | orl #inex2_mask,USER_FPSR(%a6) | ||
214 | bras t_work | ||
215 | | | ||
216 | | t_ovfl --- OVFL exception | ||
217 | | | ||
218 | |** Note: the exc operand is returned in ETEMP. | ||
219 | | | ||
220 | t_ovfl: | ||
221 | orl #ovfinx_mask,USER_FPSR(%a6) | ||
222 | t_work: | ||
223 | btstb #ovfl_bit,FPCR_ENABLE(%a6) |test FPCR for ovfl enabled | ||
224 | beqs ovf_dis | ||
225 | |||
226 | ovf_ena: | ||
227 | clrl FP_SCR1(%a6) |set exceptional operand | ||
228 | clrl FP_SCR1+4(%a6) | ||
229 | clrl FP_SCR1+8(%a6) | ||
230 | |||
231 | bfclr STAG(%a6){#5:#3} |clear wbtm66,wbtm1,wbtm0 | ||
232 | bclrb #wbtemp15_bit,WB_BYTE(%a6) |clear wbtemp15 | ||
233 | bsetb #sticky_bit,STICKY(%a6) |set sticky bit | ||
234 | |||
235 | bclrb #E1,E_BYTE(%a6) | ||
236 | | ;fall through to disabled case | ||
237 | |||
238 | | For disabled overflow call 'ovf_r_k'. This routine loads the | ||
239 | | correct result based on the rounding precision, destination | ||
240 | | format, rounding mode and sign. | ||
241 | | | ||
242 | ovf_dis: | ||
243 | bsr ovf_r_k |returns unsigned ETEMP_EX | ||
244 | | ;and sets FPSR_CC accordingly. | ||
245 | bfclr ETEMP_SGN(%a6){#0:#8} |fix sign | ||
246 | beqs ovf_pos | ||
247 | bsetb #sign_bit,ETEMP_EX(%a6) | ||
248 | bsetb #sign_bit,FP_SCR1(%a6) |set exceptional operand sign | ||
249 | ovf_pos: | ||
250 | fmovemx ETEMP(%a6),%fp0-%fp0 |move the result to fp0 | ||
251 | rts | ||
252 | |||
253 | |||
254 | | | ||
255 | | INEX2 exception | ||
256 | | | ||
257 | | The inex2 and ainex bits are set. | ||
258 | | | ||
259 | t_inx2: | ||
260 | orl #inx2a_mask,USER_FPSR(%a6) |set INEX2, AINEX | ||
261 | rts | ||
262 | |||
263 | | | ||
264 | | Force Inex2 | ||
265 | | | ||
266 | | This routine is called by the transcendental routines to force | ||
267 | | the inex2 exception bits set in the FPSR. If the underflow bit | ||
268 | | is set, but the underflow trap was not taken, the aunfl bit in | ||
269 | | the FPSR must be set. | ||
270 | | | ||
271 | t_frcinx: | ||
272 | orl #inx2a_mask,USER_FPSR(%a6) |set INEX2, AINEX | ||
273 | btstb #unfl_bit,FPSR_EXCEPT(%a6) |test for unfl bit set | ||
274 | beqs no_uacc1 |if clear, do not set aunfl | ||
275 | bsetb #aunfl_bit,FPSR_AEXCEPT(%a6) | ||
276 | no_uacc1: | ||
277 | rts | ||
278 | |||
279 | | | ||
280 | | DST_NAN | ||
281 | | | ||
282 | | Determine if the destination nan is signalling or non-signalling, | ||
283 | | and set the FPSR bits accordingly. See the MC68040 User's Manual | ||
284 | | section 3.2.2.5 NOT-A-NUMBERS. | ||
285 | | | ||
286 | dst_nan: | ||
287 | btstb #sign_bit,FPTEMP_EX(%a6) |test sign of nan | ||
288 | beqs dst_pos |if clr, it was positive | ||
289 | bsetb #neg_bit,FPSR_CC(%a6) |set N bit | ||
290 | dst_pos: | ||
291 | btstb #signan_bit,FPTEMP_HI(%a6) |check if signalling | ||
292 | beqs dst_snan |branch if signalling | ||
293 | |||
294 | fmovel %d1,%fpcr |restore user's rmode/prec | ||
295 | fmovex FPTEMP(%a6),%fp0 |return the non-signalling nan | ||
296 | | | ||
297 | | Check the source nan. If it is signalling, snan will be reported. | ||
298 | | | ||
299 | moveb STAG(%a6),%d0 | ||
300 | andib #0xe0,%d0 | ||
301 | cmpib #0x60,%d0 | ||
302 | bnes no_snan | ||
303 | btstb #signan_bit,ETEMP_HI(%a6) |check if signalling | ||
304 | bnes no_snan | ||
305 | orl #snaniop_mask,USER_FPSR(%a6) |set NAN, SNAN, AIOP | ||
306 | no_snan: | ||
307 | rts | ||
308 | |||
309 | dst_snan: | ||
310 | btstb #snan_bit,FPCR_ENABLE(%a6) |check if trap enabled | ||
311 | beqs dst_dis |branch if disabled | ||
312 | |||
313 | orb #nan_tag,DTAG(%a6) |set up dtag for nan | ||
314 | st STORE_FLG(%a6) |do not store a result | ||
315 | orl #snaniop_mask,USER_FPSR(%a6) |set NAN, SNAN, AIOP | ||
316 | rts | ||
317 | |||
318 | dst_dis: | ||
319 | bsetb #signan_bit,FPTEMP_HI(%a6) |set SNAN bit in sop | ||
320 | fmovel %d1,%fpcr |restore user's rmode/prec | ||
321 | fmovex FPTEMP(%a6),%fp0 |load non-sign. nan | ||
322 | orl #snaniop_mask,USER_FPSR(%a6) |set NAN, SNAN, AIOP | ||
323 | rts | ||
324 | |||
325 | | | ||
326 | | SRC_NAN | ||
327 | | | ||
328 | | Determine if the source nan is signalling or non-signalling, | ||
329 | | and set the FPSR bits accordingly. See the MC68040 User's Manual | ||
330 | | section 3.2.2.5 NOT-A-NUMBERS. | ||
331 | | | ||
332 | src_nan: | ||
333 | btstb #sign_bit,ETEMP_EX(%a6) |test sign of nan | ||
334 | beqs src_pos |if clr, it was positive | ||
335 | bsetb #neg_bit,FPSR_CC(%a6) |set N bit | ||
336 | src_pos: | ||
337 | btstb #signan_bit,ETEMP_HI(%a6) |check if signalling | ||
338 | beqs src_snan |branch if signalling | ||
339 | fmovel %d1,%fpcr |restore user's rmode/prec | ||
340 | fmovex ETEMP(%a6),%fp0 |return the non-signalling nan | ||
341 | rts | ||
342 | |||
343 | src_snan: | ||
344 | btstb #snan_bit,FPCR_ENABLE(%a6) |check if trap enabled | ||
345 | beqs src_dis |branch if disabled | ||
346 | bsetb #signan_bit,ETEMP_HI(%a6) |set SNAN bit in sop | ||
347 | orb #norm_tag,DTAG(%a6) |set up dtag for norm | ||
348 | orb #nan_tag,STAG(%a6) |set up stag for nan | ||
349 | st STORE_FLG(%a6) |do not store a result | ||
350 | orl #snaniop_mask,USER_FPSR(%a6) |set NAN, SNAN, AIOP | ||
351 | rts | ||
352 | |||
353 | src_dis: | ||
354 | bsetb #signan_bit,ETEMP_HI(%a6) |set SNAN bit in sop | ||
355 | fmovel %d1,%fpcr |restore user's rmode/prec | ||
356 | fmovex ETEMP(%a6),%fp0 |load non-sign. nan | ||
357 | orl #snaniop_mask,USER_FPSR(%a6) |set NAN, SNAN, AIOP | ||
358 | rts | ||
359 | |||
360 | | | ||
361 | | For all functions that have a denormalized input and that f(x)=x, | ||
362 | | this is the entry point | ||
363 | | | ||
364 | t_extdnrm: | ||
365 | orl #unfinx_mask,USER_FPSR(%a6) | ||
366 | | ;set UNFL, INEX2, AUNFL, AINEX | ||
367 | bras xdnrm_con | ||
368 | | | ||
369 | | Entry point for scale with extended denorm. The function does | ||
370 | | not set inex2, aunfl, or ainex. | ||
371 | | | ||
372 | t_resdnrm: | ||
373 | orl #unfl_mask,USER_FPSR(%a6) | ||
374 | |||
375 | xdnrm_con: | ||
376 | btstb #unfl_bit,FPCR_ENABLE(%a6) | ||
377 | beqs xdnrm_dis | ||
378 | |||
379 | | | ||
380 | | If exceptions are enabled, the additional task of setting up WBTEMP | ||
381 | | is needed so that when the underflow exception handler is entered, | ||
382 | | the user perceives no difference between what the 040 provides vs. | ||
383 | | what the FPSP provides. | ||
384 | | | ||
385 | xdnrm_ena: | ||
386 | movel %a0,-(%a7) | ||
387 | |||
388 | movel LOCAL_EX(%a0),FP_SCR1(%a6) | ||
389 | movel LOCAL_HI(%a0),FP_SCR1+4(%a6) | ||
390 | movel LOCAL_LO(%a0),FP_SCR1+8(%a6) | ||
391 | |||
392 | lea FP_SCR1(%a6),%a0 | ||
393 | |||
394 | bclrb #sign_bit,LOCAL_EX(%a0) | ||
395 | sne LOCAL_SGN(%a0) |convert to internal ext format | ||
396 | tstw LOCAL_EX(%a0) |check if input is denorm | ||
397 | beqs xdnrm_dn |if so, skip nrm_set | ||
398 | bsr nrm_set |normalize the result (exponent | ||
399 | | ;will be negative | ||
400 | xdnrm_dn: | ||
401 | bclrb #sign_bit,LOCAL_EX(%a0) |take off false sign | ||
402 | bfclr LOCAL_SGN(%a0){#0:#8} |change back to IEEE ext format | ||
403 | beqs xdep | ||
404 | bsetb #sign_bit,LOCAL_EX(%a0) | ||
405 | xdep: | ||
406 | bfclr STAG(%a6){#5:#3} |clear wbtm66,wbtm1,wbtm0 | ||
407 | bsetb #wbtemp15_bit,WB_BYTE(%a6) |set wbtemp15 | ||
408 | bclrb #sticky_bit,STICKY(%a6) |clear sticky bit | ||
409 | bclrb #E1,E_BYTE(%a6) | ||
410 | movel (%a7)+,%a0 | ||
411 | xdnrm_dis: | ||
412 | bfextu FPCR_MODE(%a6){#0:#2},%d0 |get round precision | ||
413 | bnes not_ext |if not round extended, store | ||
414 | | ;IEEE defaults | ||
415 | is_ext: | ||
416 | btstb #sign_bit,LOCAL_EX(%a0) | ||
417 | beqs xdnrm_store | ||
418 | |||
419 | bsetb #neg_bit,FPSR_CC(%a6) |set N bit in FPSR_CC | ||
420 | |||
421 | bras xdnrm_store | ||
422 | |||
423 | not_ext: | ||
424 | bclrb #sign_bit,LOCAL_EX(%a0) | ||
425 | sne LOCAL_SGN(%a0) |convert to internal ext format | ||
426 | bsr unf_sub |returns IEEE result pointed by | ||
427 | | ;a0; sets FPSR_CC accordingly | ||
428 | bfclr LOCAL_SGN(%a0){#0:#8} |convert back to IEEE ext format | ||
429 | beqs xdnrm_store | ||
430 | bsetb #sign_bit,LOCAL_EX(%a0) | ||
431 | xdnrm_store: | ||
432 | fmovemx (%a0),%fp0-%fp0 |store result in fp0 | ||
433 | rts | ||
434 | |||
435 | | | ||
436 | | This subroutine is used for dyadic operations that use an extended | ||
437 | | denorm within the kernel. The approach used is to capture the frame, | ||
438 | | fix/restore. | ||
439 | | | ||
440 | .global t_avoid_unsupp | ||
441 | t_avoid_unsupp: | ||
442 | link %a2,#-LOCAL_SIZE |so that a2 fpsp.h negative | ||
443 | | ;offsets may be used | ||
444 | fsave -(%a7) | ||
445 | tstb 1(%a7) |check if idle, exit if so | ||
446 | beq idle_end | ||
447 | btstb #E1,E_BYTE(%a2) |check for an E1 exception if | ||
448 | | ;enabled, there is an unsupp | ||
449 | beq end_avun |else, exit | ||
450 | btstb #7,DTAG(%a2) |check for denorm destination | ||
451 | beqs src_den |else, must be a source denorm | ||
452 | | | ||
453 | | handle destination denorm | ||
454 | | | ||
455 | lea FPTEMP(%a2),%a0 | ||
456 | btstb #sign_bit,LOCAL_EX(%a0) | ||
457 | sne LOCAL_SGN(%a0) |convert to internal ext format | ||
458 | bclrb #7,DTAG(%a2) |set DTAG to norm | ||
459 | bsr nrm_set |normalize result, exponent | ||
460 | | ;will become negative | ||
461 | bclrb #sign_bit,LOCAL_EX(%a0) |get rid of fake sign | ||
462 | bfclr LOCAL_SGN(%a0){#0:#8} |convert back to IEEE ext format | ||
463 | beqs ck_src_den |check if source is also denorm | ||
464 | bsetb #sign_bit,LOCAL_EX(%a0) | ||
465 | ck_src_den: | ||
466 | btstb #7,STAG(%a2) | ||
467 | beqs end_avun | ||
468 | src_den: | ||
469 | lea ETEMP(%a2),%a0 | ||
470 | btstb #sign_bit,LOCAL_EX(%a0) | ||
471 | sne LOCAL_SGN(%a0) |convert to internal ext format | ||
472 | bclrb #7,STAG(%a2) |set STAG to norm | ||
473 | bsr nrm_set |normalize result, exponent | ||
474 | | ;will become negative | ||
475 | bclrb #sign_bit,LOCAL_EX(%a0) |get rid of fake sign | ||
476 | bfclr LOCAL_SGN(%a0){#0:#8} |convert back to IEEE ext format | ||
477 | beqs den_com | ||
478 | bsetb #sign_bit,LOCAL_EX(%a0) | ||
479 | den_com: | ||
480 | moveb #0xfe,CU_SAVEPC(%a2) |set continue frame | ||
481 | clrw NMNEXC(%a2) |clear NMNEXC | ||
482 | bclrb #E1,E_BYTE(%a2) | ||
483 | | fmove.l %FPSR,FPSR_SHADOW(%a2) | ||
484 | | bset.b #SFLAG,E_BYTE(%a2) | ||
485 | | bset.b #XFLAG,T_BYTE(%a2) | ||
486 | end_avun: | ||
487 | frestore (%a7)+ | ||
488 | unlk %a2 | ||
489 | rts | ||
490 | idle_end: | ||
491 | addl #4,%a7 | ||
492 | unlk %a2 | ||
493 | rts | ||
494 | |end | ||
diff --git a/arch/m68k/fpsp040/res_func.S b/arch/m68k/fpsp040/res_func.S new file mode 100644 index 000000000000..8f6b95217865 --- /dev/null +++ b/arch/m68k/fpsp040/res_func.S | |||
@@ -0,0 +1,2040 @@ | |||
1 | | | ||
2 | | res_func.sa 3.9 7/29/91 | ||
3 | | | ||
4 | | Normalizes denormalized numbers if necessary and updates the | ||
5 | | stack frame. The function is then restored back into the | ||
6 | | machine and the 040 completes the operation. This routine | ||
7 | | is only used by the unsupported data type/format handler. | ||
8 | | (Exception vector 55). | ||
9 | | | ||
10 | | For packed move out (fmove.p fpm,<ea>) the operation is | ||
11 | | completed here; data is packed and moved to user memory. | ||
12 | | The stack is restored to the 040 only in the case of a | ||
13 | | reportable exception in the conversion. | ||
14 | | | ||
15 | | | ||
16 | | Copyright (C) Motorola, Inc. 1990 | ||
17 | | All Rights Reserved | ||
18 | | | ||
19 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
20 | | The copyright notice above does not evidence any | ||
21 | | actual or intended publication of such source code. | ||
22 | |||
23 | RES_FUNC: |idnt 2,1 | Motorola 040 Floating Point Software Package | ||
24 | |||
25 | |section 8 | ||
26 | |||
27 | #include "fpsp.h" | ||
28 | |||
29 | sp_bnds: .short 0x3f81,0x407e | ||
30 | .short 0x3f6a,0x0000 | ||
31 | dp_bnds: .short 0x3c01,0x43fe | ||
32 | .short 0x3bcd,0x0000 | ||
33 | |||
34 | |xref mem_write | ||
35 | |xref bindec | ||
36 | |xref get_fline | ||
37 | |xref round | ||
38 | |xref denorm | ||
39 | |xref dest_ext | ||
40 | |xref dest_dbl | ||
41 | |xref dest_sgl | ||
42 | |xref unf_sub | ||
43 | |xref nrm_set | ||
44 | |xref dnrm_lp | ||
45 | |xref ovf_res | ||
46 | |xref reg_dest | ||
47 | |xref t_ovfl | ||
48 | |xref t_unfl | ||
49 | |||
50 | .global res_func | ||
51 | .global p_move | ||
52 | |||
53 | res_func: | ||
54 | clrb DNRM_FLG(%a6) | ||
55 | clrb RES_FLG(%a6) | ||
56 | clrb CU_ONLY(%a6) | ||
57 | tstb DY_MO_FLG(%a6) | ||
58 | beqs monadic | ||
59 | dyadic: | ||
60 | btstb #7,DTAG(%a6) |if dop = norm=000, zero=001, | ||
61 | | ;inf=010 or nan=011 | ||
62 | beqs monadic |then branch | ||
63 | | ;else denorm | ||
64 | | HANDLE DESTINATION DENORM HERE | ||
65 | | ;set dtag to norm | ||
66 | | ;write the tag & fpte15 to the fstack | ||
67 | leal FPTEMP(%a6),%a0 | ||
68 | |||
69 | bclrb #sign_bit,LOCAL_EX(%a0) | ||
70 | sne LOCAL_SGN(%a0) | ||
71 | |||
72 | bsr nrm_set |normalize number (exp will go negative) | ||
73 | bclrb #sign_bit,LOCAL_EX(%a0) |get rid of false sign | ||
74 | bfclr LOCAL_SGN(%a0){#0:#8} |change back to IEEE ext format | ||
75 | beqs dpos | ||
76 | bsetb #sign_bit,LOCAL_EX(%a0) | ||
77 | dpos: | ||
78 | bfclr DTAG(%a6){#0:#4} |set tag to normalized, FPTE15 = 0 | ||
79 | bsetb #4,DTAG(%a6) |set FPTE15 | ||
80 | orb #0x0f,DNRM_FLG(%a6) | ||
81 | monadic: | ||
82 | leal ETEMP(%a6),%a0 | ||
83 | btstb #direction_bit,CMDREG1B(%a6) |check direction | ||
84 | bne opclass3 |it is a mv out | ||
85 | | | ||
86 | | At this point, only opclass 0 and 2 possible | ||
87 | | | ||
88 | btstb #7,STAG(%a6) |if sop = norm=000, zero=001, | ||
89 | | ;inf=010 or nan=011 | ||
90 | bne mon_dnrm |else denorm | ||
91 | tstb DY_MO_FLG(%a6) |all cases of dyadic instructions would | ||
92 | bne normal |require normalization of denorm | ||
93 | |||
94 | | At this point: | ||
95 | | monadic instructions: fabs = $18 fneg = $1a ftst = $3a | ||
96 | | fmove = $00 fsmove = $40 fdmove = $44 | ||
97 | | fsqrt = $05* fssqrt = $41 fdsqrt = $45 | ||
98 | | (*fsqrt reencoded to $05) | ||
99 | | | ||
100 | movew CMDREG1B(%a6),%d0 |get command register | ||
101 | andil #0x7f,%d0 |strip to only command word | ||
102 | | | ||
103 | | At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and | ||
104 | | fdsqrt are possible. | ||
105 | | For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize) | ||
106 | | For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize) | ||
107 | | | ||
108 | btstl #0,%d0 | ||
109 | bne normal |weed out fsqrt instructions | ||
110 | | | ||
111 | | cu_norm handles fmove in instructions with normalized inputs. | ||
112 | | The routine round is used to correctly round the input for the | ||
113 | | destination precision and mode. | ||
114 | | | ||
115 | cu_norm: | ||
116 | st CU_ONLY(%a6) |set cu-only inst flag | ||
117 | movew CMDREG1B(%a6),%d0 | ||
118 | andib #0x3b,%d0 |isolate bits to select inst | ||
119 | tstb %d0 | ||
120 | beql cu_nmove |if zero, it is an fmove | ||
121 | cmpib #0x18,%d0 | ||
122 | beql cu_nabs |if $18, it is fabs | ||
123 | cmpib #0x1a,%d0 | ||
124 | beql cu_nneg |if $1a, it is fneg | ||
125 | | | ||
126 | | Inst is ftst. Check the source operand and set the cc's accordingly. | ||
127 | | No write is done, so simply rts. | ||
128 | | | ||
129 | cu_ntst: | ||
130 | movew LOCAL_EX(%a0),%d0 | ||
131 | bclrl #15,%d0 | ||
132 | sne LOCAL_SGN(%a0) | ||
133 | beqs cu_ntpo | ||
134 | orl #neg_mask,USER_FPSR(%a6) |set N | ||
135 | cu_ntpo: | ||
136 | cmpiw #0x7fff,%d0 |test for inf/nan | ||
137 | bnes cu_ntcz | ||
138 | tstl LOCAL_HI(%a0) | ||
139 | bnes cu_ntn | ||
140 | tstl LOCAL_LO(%a0) | ||
141 | bnes cu_ntn | ||
142 | orl #inf_mask,USER_FPSR(%a6) | ||
143 | rts | ||
144 | cu_ntn: | ||
145 | orl #nan_mask,USER_FPSR(%a6) | ||
146 | movel ETEMP_EX(%a6),FPTEMP_EX(%a6) |set up fptemp sign for | ||
147 | | ;snan handler | ||
148 | |||
149 | rts | ||
150 | cu_ntcz: | ||
151 | tstl LOCAL_HI(%a0) | ||
152 | bnel cu_ntsx | ||
153 | tstl LOCAL_LO(%a0) | ||
154 | bnel cu_ntsx | ||
155 | orl #z_mask,USER_FPSR(%a6) | ||
156 | cu_ntsx: | ||
157 | rts | ||
158 | | | ||
159 | | Inst is fabs. Execute the absolute value function on the input. | ||
160 | | Branch to the fmove code. If the operand is NaN, do nothing. | ||
161 | | | ||
162 | cu_nabs: | ||
163 | moveb STAG(%a6),%d0 | ||
164 | btstl #5,%d0 |test for NaN or zero | ||
165 | bne wr_etemp |if either, simply write it | ||
166 | bclrb #7,LOCAL_EX(%a0) |do abs | ||
167 | bras cu_nmove |fmove code will finish | ||
168 | | | ||
169 | | Inst is fneg. Execute the negate value function on the input. | ||
170 | | Fall though to the fmove code. If the operand is NaN, do nothing. | ||
171 | | | ||
172 | cu_nneg: | ||
173 | moveb STAG(%a6),%d0 | ||
174 | btstl #5,%d0 |test for NaN or zero | ||
175 | bne wr_etemp |if either, simply write it | ||
176 | bchgb #7,LOCAL_EX(%a0) |do neg | ||
177 | | | ||
178 | | Inst is fmove. This code also handles all result writes. | ||
179 | | If bit 2 is set, round is forced to double. If it is clear, | ||
180 | | and bit 6 is set, round is forced to single. If both are clear, | ||
181 | | the round precision is found in the fpcr. If the rounding precision | ||
182 | | is double or single, round the result before the write. | ||
183 | | | ||
184 | cu_nmove: | ||
185 | moveb STAG(%a6),%d0 | ||
186 | andib #0xe0,%d0 |isolate stag bits | ||
187 | bne wr_etemp |if not norm, simply write it | ||
188 | btstb #2,CMDREG1B+1(%a6) |check for rd | ||
189 | bne cu_nmrd | ||
190 | btstb #6,CMDREG1B+1(%a6) |check for rs | ||
191 | bne cu_nmrs | ||
192 | | | ||
193 | | The move or operation is not with forced precision. Test for | ||
194 | | nan or inf as the input; if so, simply write it to FPn. Use the | ||
195 | | FPCR_MODE byte to get rounding on norms and zeros. | ||
196 | | | ||
197 | cu_nmnr: | ||
198 | bfextu FPCR_MODE(%a6){#0:#2},%d0 | ||
199 | tstb %d0 |check for extended | ||
200 | beq cu_wrexn |if so, just write result | ||
201 | cmpib #1,%d0 |check for single | ||
202 | beq cu_nmrs |fall through to double | ||
203 | | | ||
204 | | The move is fdmove or round precision is double. | ||
205 | | | ||
206 | cu_nmrd: | ||
207 | movel #2,%d0 |set up the size for denorm | ||
208 | movew LOCAL_EX(%a0),%d1 |compare exponent to double threshold | ||
209 | andw #0x7fff,%d1 | ||
210 | cmpw #0x3c01,%d1 | ||
211 | bls cu_nunfl | ||
212 | bfextu FPCR_MODE(%a6){#2:#2},%d1 |get rmode | ||
213 | orl #0x00020000,%d1 |or in rprec (double) | ||
214 | clrl %d0 |clear g,r,s for round | ||
215 | bclrb #sign_bit,LOCAL_EX(%a0) |convert to internal format | ||
216 | sne LOCAL_SGN(%a0) | ||
217 | bsrl round | ||
218 | bfclr LOCAL_SGN(%a0){#0:#8} | ||
219 | beqs cu_nmrdc | ||
220 | bsetb #sign_bit,LOCAL_EX(%a0) | ||
221 | cu_nmrdc: | ||
222 | movew LOCAL_EX(%a0),%d1 |check for overflow | ||
223 | andw #0x7fff,%d1 | ||
224 | cmpw #0x43ff,%d1 | ||
225 | bge cu_novfl |take care of overflow case | ||
226 | bra cu_wrexn | ||
227 | | | ||
228 | | The move is fsmove or round precision is single. | ||
229 | | | ||
230 | cu_nmrs: | ||
231 | movel #1,%d0 | ||
232 | movew LOCAL_EX(%a0),%d1 | ||
233 | andw #0x7fff,%d1 | ||
234 | cmpw #0x3f81,%d1 | ||
235 | bls cu_nunfl | ||
236 | bfextu FPCR_MODE(%a6){#2:#2},%d1 | ||
237 | orl #0x00010000,%d1 | ||
238 | clrl %d0 | ||
239 | bclrb #sign_bit,LOCAL_EX(%a0) | ||
240 | sne LOCAL_SGN(%a0) | ||
241 | bsrl round | ||
242 | bfclr LOCAL_SGN(%a0){#0:#8} | ||
243 | beqs cu_nmrsc | ||
244 | bsetb #sign_bit,LOCAL_EX(%a0) | ||
245 | cu_nmrsc: | ||
246 | movew LOCAL_EX(%a0),%d1 | ||
247 | andw #0x7FFF,%d1 | ||
248 | cmpw #0x407f,%d1 | ||
249 | blt cu_wrexn | ||
250 | | | ||
251 | | The operand is above precision boundaries. Use t_ovfl to | ||
252 | | generate the correct value. | ||
253 | | | ||
254 | cu_novfl: | ||
255 | bsr t_ovfl | ||
256 | bra cu_wrexn | ||
257 | | | ||
258 | | The operand is below precision boundaries. Use denorm to | ||
259 | | generate the correct value. | ||
260 | | | ||
261 | cu_nunfl: | ||
262 | bclrb #sign_bit,LOCAL_EX(%a0) | ||
263 | sne LOCAL_SGN(%a0) | ||
264 | bsr denorm | ||
265 | bfclr LOCAL_SGN(%a0){#0:#8} |change back to IEEE ext format | ||
266 | beqs cu_nucont | ||
267 | bsetb #sign_bit,LOCAL_EX(%a0) | ||
268 | cu_nucont: | ||
269 | bfextu FPCR_MODE(%a6){#2:#2},%d1 | ||
270 | btstb #2,CMDREG1B+1(%a6) |check for rd | ||
271 | bne inst_d | ||
272 | btstb #6,CMDREG1B+1(%a6) |check for rs | ||
273 | bne inst_s | ||
274 | swap %d1 | ||
275 | moveb FPCR_MODE(%a6),%d1 | ||
276 | lsrb #6,%d1 | ||
277 | swap %d1 | ||
278 | bra inst_sd | ||
279 | inst_d: | ||
280 | orl #0x00020000,%d1 | ||
281 | bra inst_sd | ||
282 | inst_s: | ||
283 | orl #0x00010000,%d1 | ||
284 | inst_sd: | ||
285 | bclrb #sign_bit,LOCAL_EX(%a0) | ||
286 | sne LOCAL_SGN(%a0) | ||
287 | bsrl round | ||
288 | bfclr LOCAL_SGN(%a0){#0:#8} | ||
289 | beqs cu_nuflp | ||
290 | bsetb #sign_bit,LOCAL_EX(%a0) | ||
291 | cu_nuflp: | ||
292 | btstb #inex2_bit,FPSR_EXCEPT(%a6) | ||
293 | beqs cu_nuninx | ||
294 | orl #aunfl_mask,USER_FPSR(%a6) |if the round was inex, set AUNFL | ||
295 | cu_nuninx: | ||
296 | tstl LOCAL_HI(%a0) |test for zero | ||
297 | bnes cu_nunzro | ||
298 | tstl LOCAL_LO(%a0) | ||
299 | bnes cu_nunzro | ||
300 | | | ||
301 | | The mantissa is zero from the denorm loop. Check sign and rmode | ||
302 | | to see if rounding should have occurred which would leave the lsb. | ||
303 | | | ||
304 | movel USER_FPCR(%a6),%d0 | ||
305 | andil #0x30,%d0 |isolate rmode | ||
306 | cmpil #0x20,%d0 | ||
307 | blts cu_nzro | ||
308 | bnes cu_nrp | ||
309 | cu_nrm: | ||
310 | tstw LOCAL_EX(%a0) |if positive, set lsb | ||
311 | bges cu_nzro | ||
312 | btstb #7,FPCR_MODE(%a6) |check for double | ||
313 | beqs cu_nincs | ||
314 | bras cu_nincd | ||
315 | cu_nrp: | ||
316 | tstw LOCAL_EX(%a0) |if positive, set lsb | ||
317 | blts cu_nzro | ||
318 | btstb #7,FPCR_MODE(%a6) |check for double | ||
319 | beqs cu_nincs | ||
320 | cu_nincd: | ||
321 | orl #0x800,LOCAL_LO(%a0) |inc for double | ||
322 | bra cu_nunzro | ||
323 | cu_nincs: | ||
324 | orl #0x100,LOCAL_HI(%a0) |inc for single | ||
325 | bra cu_nunzro | ||
326 | cu_nzro: | ||
327 | orl #z_mask,USER_FPSR(%a6) | ||
328 | moveb STAG(%a6),%d0 | ||
329 | andib #0xe0,%d0 | ||
330 | cmpib #0x40,%d0 |check if input was tagged zero | ||
331 | beqs cu_numv | ||
332 | cu_nunzro: | ||
333 | orl #unfl_mask,USER_FPSR(%a6) |set unfl | ||
334 | cu_numv: | ||
335 | movel (%a0),ETEMP(%a6) | ||
336 | movel 4(%a0),ETEMP_HI(%a6) | ||
337 | movel 8(%a0),ETEMP_LO(%a6) | ||
338 | | | ||
339 | | Write the result to memory, setting the fpsr cc bits. NaN and Inf | ||
340 | | bypass cu_wrexn. | ||
341 | | | ||
342 | cu_wrexn: | ||
343 | tstw LOCAL_EX(%a0) |test for zero | ||
344 | beqs cu_wrzero | ||
345 | cmpw #0x8000,LOCAL_EX(%a0) |test for zero | ||
346 | bnes cu_wreon | ||
347 | cu_wrzero: | ||
348 | orl #z_mask,USER_FPSR(%a6) |set Z bit | ||
349 | cu_wreon: | ||
350 | tstw LOCAL_EX(%a0) | ||
351 | bpl wr_etemp | ||
352 | orl #neg_mask,USER_FPSR(%a6) | ||
353 | bra wr_etemp | ||
354 | |||
355 | | | ||
356 | | HANDLE SOURCE DENORM HERE | ||
357 | | | ||
358 | | ;clear denorm stag to norm | ||
359 | | ;write the new tag & ete15 to the fstack | ||
360 | mon_dnrm: | ||
361 | | | ||
362 | | At this point, check for the cases in which normalizing the | ||
363 | | denorm produces incorrect results. | ||
364 | | | ||
365 | tstb DY_MO_FLG(%a6) |all cases of dyadic instructions would | ||
366 | bnes nrm_src |require normalization of denorm | ||
367 | |||
368 | | At this point: | ||
369 | | monadic instructions: fabs = $18 fneg = $1a ftst = $3a | ||
370 | | fmove = $00 fsmove = $40 fdmove = $44 | ||
371 | | fsqrt = $05* fssqrt = $41 fdsqrt = $45 | ||
372 | | (*fsqrt reencoded to $05) | ||
373 | | | ||
374 | movew CMDREG1B(%a6),%d0 |get command register | ||
375 | andil #0x7f,%d0 |strip to only command word | ||
376 | | | ||
377 | | At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and | ||
378 | | fdsqrt are possible. | ||
379 | | For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize) | ||
380 | | For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize) | ||
381 | | | ||
382 | btstl #0,%d0 | ||
383 | bnes nrm_src |weed out fsqrt instructions | ||
384 | st CU_ONLY(%a6) |set cu-only inst flag | ||
385 | bra cu_dnrm |fmove, fabs, fneg, ftst | ||
386 | | ;cases go to cu_dnrm | ||
387 | nrm_src: | ||
388 | bclrb #sign_bit,LOCAL_EX(%a0) | ||
389 | sne LOCAL_SGN(%a0) | ||
390 | bsr nrm_set |normalize number (exponent will go | ||
391 | | ; negative) | ||
392 | bclrb #sign_bit,LOCAL_EX(%a0) |get rid of false sign | ||
393 | |||
394 | bfclr LOCAL_SGN(%a0){#0:#8} |change back to IEEE ext format | ||
395 | beqs spos | ||
396 | bsetb #sign_bit,LOCAL_EX(%a0) | ||
397 | spos: | ||
398 | bfclr STAG(%a6){#0:#4} |set tag to normalized, FPTE15 = 0 | ||
399 | bsetb #4,STAG(%a6) |set ETE15 | ||
400 | orb #0xf0,DNRM_FLG(%a6) | ||
401 | normal: | ||
402 | tstb DNRM_FLG(%a6) |check if any of the ops were denorms | ||
403 | bne ck_wrap |if so, check if it is a potential | ||
404 | | ;wrap-around case | ||
405 | fix_stk: | ||
406 | moveb #0xfe,CU_SAVEPC(%a6) | ||
407 | bclrb #E1,E_BYTE(%a6) | ||
408 | |||
409 | clrw NMNEXC(%a6) | ||
410 | |||
411 | st RES_FLG(%a6) |indicate that a restore is needed | ||
412 | rts | ||
413 | |||
414 | | | ||
415 | | cu_dnrm handles all cu-only instructions (fmove, fabs, fneg, and | ||
416 | | ftst) completely in software without an frestore to the 040. | ||
417 | | | ||
418 | cu_dnrm: | ||
419 | st CU_ONLY(%a6) | ||
420 | movew CMDREG1B(%a6),%d0 | ||
421 | andib #0x3b,%d0 |isolate bits to select inst | ||
422 | tstb %d0 | ||
423 | beql cu_dmove |if zero, it is an fmove | ||
424 | cmpib #0x18,%d0 | ||
425 | beql cu_dabs |if $18, it is fabs | ||
426 | cmpib #0x1a,%d0 | ||
427 | beql cu_dneg |if $1a, it is fneg | ||
428 | | | ||
429 | | Inst is ftst. Check the source operand and set the cc's accordingly. | ||
430 | | No write is done, so simply rts. | ||
431 | | | ||
432 | cu_dtst: | ||
433 | movew LOCAL_EX(%a0),%d0 | ||
434 | bclrl #15,%d0 | ||
435 | sne LOCAL_SGN(%a0) | ||
436 | beqs cu_dtpo | ||
437 | orl #neg_mask,USER_FPSR(%a6) |set N | ||
438 | cu_dtpo: | ||
439 | cmpiw #0x7fff,%d0 |test for inf/nan | ||
440 | bnes cu_dtcz | ||
441 | tstl LOCAL_HI(%a0) | ||
442 | bnes cu_dtn | ||
443 | tstl LOCAL_LO(%a0) | ||
444 | bnes cu_dtn | ||
445 | orl #inf_mask,USER_FPSR(%a6) | ||
446 | rts | ||
447 | cu_dtn: | ||
448 | orl #nan_mask,USER_FPSR(%a6) | ||
449 | movel ETEMP_EX(%a6),FPTEMP_EX(%a6) |set up fptemp sign for | ||
450 | | ;snan handler | ||
451 | rts | ||
452 | cu_dtcz: | ||
453 | tstl LOCAL_HI(%a0) | ||
454 | bnel cu_dtsx | ||
455 | tstl LOCAL_LO(%a0) | ||
456 | bnel cu_dtsx | ||
457 | orl #z_mask,USER_FPSR(%a6) | ||
458 | cu_dtsx: | ||
459 | rts | ||
460 | | | ||
461 | | Inst is fabs. Execute the absolute value function on the input. | ||
462 | | Branch to the fmove code. | ||
463 | | | ||
464 | cu_dabs: | ||
465 | bclrb #7,LOCAL_EX(%a0) |do abs | ||
466 | bras cu_dmove |fmove code will finish | ||
467 | | | ||
468 | | Inst is fneg. Execute the negate value function on the input. | ||
469 | | Fall though to the fmove code. | ||
470 | | | ||
471 | cu_dneg: | ||
472 | bchgb #7,LOCAL_EX(%a0) |do neg | ||
473 | | | ||
474 | | Inst is fmove. This code also handles all result writes. | ||
475 | | If bit 2 is set, round is forced to double. If it is clear, | ||
476 | | and bit 6 is set, round is forced to single. If both are clear, | ||
477 | | the round precision is found in the fpcr. If the rounding precision | ||
478 | | is double or single, the result is zero, and the mode is checked | ||
479 | | to determine if the lsb of the result should be set. | ||
480 | | | ||
481 | cu_dmove: | ||
482 | btstb #2,CMDREG1B+1(%a6) |check for rd | ||
483 | bne cu_dmrd | ||
484 | btstb #6,CMDREG1B+1(%a6) |check for rs | ||
485 | bne cu_dmrs | ||
486 | | | ||
487 | | The move or operation is not with forced precision. Use the | ||
488 | | FPCR_MODE byte to get rounding. | ||
489 | | | ||
490 | cu_dmnr: | ||
491 | bfextu FPCR_MODE(%a6){#0:#2},%d0 | ||
492 | tstb %d0 |check for extended | ||
493 | beq cu_wrexd |if so, just write result | ||
494 | cmpib #1,%d0 |check for single | ||
495 | beq cu_dmrs |fall through to double | ||
496 | | | ||
497 | | The move is fdmove or round precision is double. Result is zero. | ||
498 | | Check rmode for rp or rm and set lsb accordingly. | ||
499 | | | ||
500 | cu_dmrd: | ||
501 | bfextu FPCR_MODE(%a6){#2:#2},%d1 |get rmode | ||
502 | tstw LOCAL_EX(%a0) |check sign | ||
503 | blts cu_dmdn | ||
504 | cmpib #3,%d1 |check for rp | ||
505 | bne cu_dpd |load double pos zero | ||
506 | bra cu_dpdr |load double pos zero w/lsb | ||
507 | cu_dmdn: | ||
508 | cmpib #2,%d1 |check for rm | ||
509 | bne cu_dnd |load double neg zero | ||
510 | bra cu_dndr |load double neg zero w/lsb | ||
511 | | | ||
512 | | The move is fsmove or round precision is single. Result is zero. | ||
513 | | Check for rp or rm and set lsb accordingly. | ||
514 | | | ||
515 | cu_dmrs: | ||
516 | bfextu FPCR_MODE(%a6){#2:#2},%d1 |get rmode | ||
517 | tstw LOCAL_EX(%a0) |check sign | ||
518 | blts cu_dmsn | ||
519 | cmpib #3,%d1 |check for rp | ||
520 | bne cu_spd |load single pos zero | ||
521 | bra cu_spdr |load single pos zero w/lsb | ||
522 | cu_dmsn: | ||
523 | cmpib #2,%d1 |check for rm | ||
524 | bne cu_snd |load single neg zero | ||
525 | bra cu_sndr |load single neg zero w/lsb | ||
526 | | | ||
527 | | The precision is extended, so the result in etemp is correct. | ||
528 | | Simply set unfl (not inex2 or aunfl) and write the result to | ||
529 | | the correct fp register. | ||
530 | cu_wrexd: | ||
531 | orl #unfl_mask,USER_FPSR(%a6) | ||
532 | tstw LOCAL_EX(%a0) | ||
533 | beq wr_etemp | ||
534 | orl #neg_mask,USER_FPSR(%a6) | ||
535 | bra wr_etemp | ||
536 | | | ||
537 | | These routines write +/- zero in double format. The routines | ||
538 | | cu_dpdr and cu_dndr set the double lsb. | ||
539 | | | ||
540 | cu_dpd: | ||
541 | movel #0x3c010000,LOCAL_EX(%a0) |force pos double zero | ||
542 | clrl LOCAL_HI(%a0) | ||
543 | clrl LOCAL_LO(%a0) | ||
544 | orl #z_mask,USER_FPSR(%a6) | ||
545 | orl #unfinx_mask,USER_FPSR(%a6) | ||
546 | bra wr_etemp | ||
547 | cu_dpdr: | ||
548 | movel #0x3c010000,LOCAL_EX(%a0) |force pos double zero | ||
549 | clrl LOCAL_HI(%a0) | ||
550 | movel #0x800,LOCAL_LO(%a0) |with lsb set | ||
551 | orl #unfinx_mask,USER_FPSR(%a6) | ||
552 | bra wr_etemp | ||
553 | cu_dnd: | ||
554 | movel #0xbc010000,LOCAL_EX(%a0) |force pos double zero | ||
555 | clrl LOCAL_HI(%a0) | ||
556 | clrl LOCAL_LO(%a0) | ||
557 | orl #z_mask,USER_FPSR(%a6) | ||
558 | orl #neg_mask,USER_FPSR(%a6) | ||
559 | orl #unfinx_mask,USER_FPSR(%a6) | ||
560 | bra wr_etemp | ||
561 | cu_dndr: | ||
562 | movel #0xbc010000,LOCAL_EX(%a0) |force pos double zero | ||
563 | clrl LOCAL_HI(%a0) | ||
564 | movel #0x800,LOCAL_LO(%a0) |with lsb set | ||
565 | orl #neg_mask,USER_FPSR(%a6) | ||
566 | orl #unfinx_mask,USER_FPSR(%a6) | ||
567 | bra wr_etemp | ||
568 | | | ||
569 | | These routines write +/- zero in single format. The routines | ||
570 | | cu_dpdr and cu_dndr set the single lsb. | ||
571 | | | ||
572 | cu_spd: | ||
573 | movel #0x3f810000,LOCAL_EX(%a0) |force pos single zero | ||
574 | clrl LOCAL_HI(%a0) | ||
575 | clrl LOCAL_LO(%a0) | ||
576 | orl #z_mask,USER_FPSR(%a6) | ||
577 | orl #unfinx_mask,USER_FPSR(%a6) | ||
578 | bra wr_etemp | ||
579 | cu_spdr: | ||
580 | movel #0x3f810000,LOCAL_EX(%a0) |force pos single zero | ||
581 | movel #0x100,LOCAL_HI(%a0) |with lsb set | ||
582 | clrl LOCAL_LO(%a0) | ||
583 | orl #unfinx_mask,USER_FPSR(%a6) | ||
584 | bra wr_etemp | ||
585 | cu_snd: | ||
586 | movel #0xbf810000,LOCAL_EX(%a0) |force pos single zero | ||
587 | clrl LOCAL_HI(%a0) | ||
588 | clrl LOCAL_LO(%a0) | ||
589 | orl #z_mask,USER_FPSR(%a6) | ||
590 | orl #neg_mask,USER_FPSR(%a6) | ||
591 | orl #unfinx_mask,USER_FPSR(%a6) | ||
592 | bra wr_etemp | ||
593 | cu_sndr: | ||
594 | movel #0xbf810000,LOCAL_EX(%a0) |force pos single zero | ||
595 | movel #0x100,LOCAL_HI(%a0) |with lsb set | ||
596 | clrl LOCAL_LO(%a0) | ||
597 | orl #neg_mask,USER_FPSR(%a6) | ||
598 | orl #unfinx_mask,USER_FPSR(%a6) | ||
599 | bra wr_etemp | ||
600 | |||
601 | | | ||
602 | | This code checks for 16-bit overflow conditions on dyadic | ||
603 | | operations which are not restorable into the floating-point | ||
604 | | unit and must be completed in software. Basically, this | ||
605 | | condition exists with a very large norm and a denorm. One | ||
606 | | of the operands must be denormalized to enter this code. | ||
607 | | | ||
608 | | Flags used: | ||
609 | | DY_MO_FLG contains 0 for monadic op, $ff for dyadic | ||
610 | | DNRM_FLG contains $00 for neither op denormalized | ||
611 | | $0f for the destination op denormalized | ||
612 | | $f0 for the source op denormalized | ||
613 | | $ff for both ops denormalized | ||
614 | | | ||
615 | | The wrap-around condition occurs for add, sub, div, and cmp | ||
616 | | when | ||
617 | | | ||
618 | | abs(dest_exp - src_exp) >= $8000 | ||
619 | | | ||
620 | | and for mul when | ||
621 | | | ||
622 | | (dest_exp + src_exp) < $0 | ||
623 | | | ||
624 | | we must process the operation here if this case is true. | ||
625 | | | ||
626 | | The rts following the frcfpn routine is the exit from res_func | ||
627 | | for this condition. The restore flag (RES_FLG) is left clear. | ||
628 | | No frestore is done unless an exception is to be reported. | ||
629 | | | ||
630 | | For fadd: | ||
631 | | if(sign_of(dest) != sign_of(src)) | ||
632 | | replace exponent of src with $3fff (keep sign) | ||
633 | | use fpu to perform dest+new_src (user's rmode and X) | ||
634 | | clr sticky | ||
635 | | else | ||
636 | | set sticky | ||
637 | | call round with user's precision and mode | ||
638 | | move result to fpn and wbtemp | ||
639 | | | ||
640 | | For fsub: | ||
641 | | if(sign_of(dest) == sign_of(src)) | ||
642 | | replace exponent of src with $3fff (keep sign) | ||
643 | | use fpu to perform dest+new_src (user's rmode and X) | ||
644 | | clr sticky | ||
645 | | else | ||
646 | | set sticky | ||
647 | | call round with user's precision and mode | ||
648 | | move result to fpn and wbtemp | ||
649 | | | ||
650 | | For fdiv/fsgldiv: | ||
651 | | if(both operands are denorm) | ||
652 | | restore_to_fpu; | ||
653 | | if(dest is norm) | ||
654 | | force_ovf; | ||
655 | | else(dest is denorm) | ||
656 | | force_unf: | ||
657 | | | ||
658 | | For fcmp: | ||
659 | | if(dest is norm) | ||
660 | | N = sign_of(dest); | ||
661 | | else(dest is denorm) | ||
662 | | N = sign_of(src); | ||
663 | | | ||
664 | | For fmul: | ||
665 | | if(both operands are denorm) | ||
666 | | force_unf; | ||
667 | | if((dest_exp + src_exp) < 0) | ||
668 | | force_unf: | ||
669 | | else | ||
670 | | restore_to_fpu; | ||
671 | | | ||
672 | | local equates: | ||
673 | .set addcode,0x22 | ||
674 | .set subcode,0x28 | ||
675 | .set mulcode,0x23 | ||
676 | .set divcode,0x20 | ||
677 | .set cmpcode,0x38 | ||
678 | ck_wrap: | ||
679 | | tstb DY_MO_FLG(%a6) ;check for fsqrt | ||
680 | beq fix_stk |if zero, it is fsqrt | ||
681 | movew CMDREG1B(%a6),%d0 | ||
682 | andiw #0x3b,%d0 |strip to command bits | ||
683 | cmpiw #addcode,%d0 | ||
684 | beq wrap_add | ||
685 | cmpiw #subcode,%d0 | ||
686 | beq wrap_sub | ||
687 | cmpiw #mulcode,%d0 | ||
688 | beq wrap_mul | ||
689 | cmpiw #cmpcode,%d0 | ||
690 | beq wrap_cmp | ||
691 | | | ||
692 | | Inst is fdiv. | ||
693 | | | ||
694 | wrap_div: | ||
695 | cmpb #0xff,DNRM_FLG(%a6) |if both ops denorm, | ||
696 | beq fix_stk |restore to fpu | ||
697 | | | ||
698 | | One of the ops is denormalized. Test for wrap condition | ||
699 | | and force the result. | ||
700 | | | ||
701 | cmpb #0x0f,DNRM_FLG(%a6) |check for dest denorm | ||
702 | bnes div_srcd | ||
703 | div_destd: | ||
704 | bsrl ckinf_ns | ||
705 | bne fix_stk | ||
706 | bfextu ETEMP_EX(%a6){#1:#15},%d0 |get src exp (always pos) | ||
707 | bfexts FPTEMP_EX(%a6){#1:#15},%d1 |get dest exp (always neg) | ||
708 | subl %d1,%d0 |subtract dest from src | ||
709 | cmpl #0x7fff,%d0 | ||
710 | blt fix_stk |if less, not wrap case | ||
711 | clrb WBTEMP_SGN(%a6) | ||
712 | movew ETEMP_EX(%a6),%d0 |find the sign of the result | ||
713 | movew FPTEMP_EX(%a6),%d1 | ||
714 | eorw %d1,%d0 | ||
715 | andiw #0x8000,%d0 | ||
716 | beq force_unf | ||
717 | st WBTEMP_SGN(%a6) | ||
718 | bra force_unf | ||
719 | |||
720 | ckinf_ns: | ||
721 | moveb STAG(%a6),%d0 |check source tag for inf or nan | ||
722 | bra ck_in_com | ||
723 | ckinf_nd: | ||
724 | moveb DTAG(%a6),%d0 |check destination tag for inf or nan | ||
725 | ck_in_com: | ||
726 | andib #0x60,%d0 |isolate tag bits | ||
727 | cmpb #0x40,%d0 |is it inf? | ||
728 | beq nan_or_inf |not wrap case | ||
729 | cmpb #0x60,%d0 |is it nan? | ||
730 | beq nan_or_inf |yes, not wrap case? | ||
731 | cmpb #0x20,%d0 |is it a zero? | ||
732 | beq nan_or_inf |yes | ||
733 | clrl %d0 | ||
734 | rts |then ; it is either a zero of norm, | ||
735 | | ;check wrap case | ||
736 | nan_or_inf: | ||
737 | moveql #-1,%d0 | ||
738 | rts | ||
739 | |||
740 | |||
741 | |||
742 | div_srcd: | ||
743 | bsrl ckinf_nd | ||
744 | bne fix_stk | ||
745 | bfextu FPTEMP_EX(%a6){#1:#15},%d0 |get dest exp (always pos) | ||
746 | bfexts ETEMP_EX(%a6){#1:#15},%d1 |get src exp (always neg) | ||
747 | subl %d1,%d0 |subtract src from dest | ||
748 | cmpl #0x8000,%d0 | ||
749 | blt fix_stk |if less, not wrap case | ||
750 | clrb WBTEMP_SGN(%a6) | ||
751 | movew ETEMP_EX(%a6),%d0 |find the sign of the result | ||
752 | movew FPTEMP_EX(%a6),%d1 | ||
753 | eorw %d1,%d0 | ||
754 | andiw #0x8000,%d0 | ||
755 | beqs force_ovf | ||
756 | st WBTEMP_SGN(%a6) | ||
757 | | | ||
758 | | This code handles the case of the instruction resulting in | ||
759 | | an overflow condition. | ||
760 | | | ||
761 | force_ovf: | ||
762 | bclrb #E1,E_BYTE(%a6) | ||
763 | orl #ovfl_inx_mask,USER_FPSR(%a6) | ||
764 | clrw NMNEXC(%a6) | ||
765 | leal WBTEMP(%a6),%a0 |point a0 to memory location | ||
766 | movew CMDREG1B(%a6),%d0 | ||
767 | btstl #6,%d0 |test for forced precision | ||
768 | beqs frcovf_fpcr | ||
769 | btstl #2,%d0 |check for double | ||
770 | bnes frcovf_dbl | ||
771 | movel #0x1,%d0 |inst is forced single | ||
772 | bras frcovf_rnd | ||
773 | frcovf_dbl: | ||
774 | movel #0x2,%d0 |inst is forced double | ||
775 | bras frcovf_rnd | ||
776 | frcovf_fpcr: | ||
777 | bfextu FPCR_MODE(%a6){#0:#2},%d0 |inst not forced - use fpcr prec | ||
778 | frcovf_rnd: | ||
779 | |||
780 | | The 881/882 does not set inex2 for the following case, so the | ||
781 | | line is commented out to be compatible with 881/882 | ||
782 | | tst.b %d0 | ||
783 | | beq.b frcovf_x | ||
784 | | or.l #inex2_mask,USER_FPSR(%a6) ;if prec is s or d, set inex2 | ||
785 | |||
786 | |frcovf_x: | ||
787 | bsrl ovf_res |get correct result based on | ||
788 | | ;round precision/mode. This | ||
789 | | ;sets FPSR_CC correctly | ||
790 | | ;returns in external format | ||
791 | bfclr WBTEMP_SGN(%a6){#0:#8} | ||
792 | beq frcfpn | ||
793 | bsetb #sign_bit,WBTEMP_EX(%a6) | ||
794 | bra frcfpn | ||
795 | | | ||
796 | | Inst is fadd. | ||
797 | | | ||
798 | wrap_add: | ||
799 | cmpb #0xff,DNRM_FLG(%a6) |if both ops denorm, | ||
800 | beq fix_stk |restore to fpu | ||
801 | | | ||
802 | | One of the ops is denormalized. Test for wrap condition | ||
803 | | and complete the instruction. | ||
804 | | | ||
805 | cmpb #0x0f,DNRM_FLG(%a6) |check for dest denorm | ||
806 | bnes add_srcd | ||
807 | add_destd: | ||
808 | bsrl ckinf_ns | ||
809 | bne fix_stk | ||
810 | bfextu ETEMP_EX(%a6){#1:#15},%d0 |get src exp (always pos) | ||
811 | bfexts FPTEMP_EX(%a6){#1:#15},%d1 |get dest exp (always neg) | ||
812 | subl %d1,%d0 |subtract dest from src | ||
813 | cmpl #0x8000,%d0 | ||
814 | blt fix_stk |if less, not wrap case | ||
815 | bra add_wrap | ||
816 | add_srcd: | ||
817 | bsrl ckinf_nd | ||
818 | bne fix_stk | ||
819 | bfextu FPTEMP_EX(%a6){#1:#15},%d0 |get dest exp (always pos) | ||
820 | bfexts ETEMP_EX(%a6){#1:#15},%d1 |get src exp (always neg) | ||
821 | subl %d1,%d0 |subtract src from dest | ||
822 | cmpl #0x8000,%d0 | ||
823 | blt fix_stk |if less, not wrap case | ||
824 | | | ||
825 | | Check the signs of the operands. If they are unlike, the fpu | ||
826 | | can be used to add the norm and 1.0 with the sign of the | ||
827 | | denorm and it will correctly generate the result in extended | ||
828 | | precision. We can then call round with no sticky and the result | ||
829 | | will be correct for the user's rounding mode and precision. If | ||
830 | | the signs are the same, we call round with the sticky bit set | ||
831 | | and the result will be correct for the user's rounding mode and | ||
832 | | precision. | ||
833 | | | ||
834 | add_wrap: | ||
835 | movew ETEMP_EX(%a6),%d0 | ||
836 | movew FPTEMP_EX(%a6),%d1 | ||
837 | eorw %d1,%d0 | ||
838 | andiw #0x8000,%d0 | ||
839 | beq add_same | ||
840 | | | ||
841 | | The signs are unlike. | ||
842 | | | ||
843 | cmpb #0x0f,DNRM_FLG(%a6) |is dest the denorm? | ||
844 | bnes add_u_srcd | ||
845 | movew FPTEMP_EX(%a6),%d0 | ||
846 | andiw #0x8000,%d0 | ||
847 | orw #0x3fff,%d0 |force the exponent to +/- 1 | ||
848 | movew %d0,FPTEMP_EX(%a6) |in the denorm | ||
849 | movel USER_FPCR(%a6),%d0 | ||
850 | andil #0x30,%d0 | ||
851 | fmovel %d0,%fpcr |set up users rmode and X | ||
852 | fmovex ETEMP(%a6),%fp0 | ||
853 | faddx FPTEMP(%a6),%fp0 | ||
854 | leal WBTEMP(%a6),%a0 |point a0 to wbtemp in frame | ||
855 | fmovel %fpsr,%d1 | ||
856 | orl %d1,USER_FPSR(%a6) |capture cc's and inex from fadd | ||
857 | fmovex %fp0,WBTEMP(%a6) |write result to memory | ||
858 | lsrl #4,%d0 |put rmode in lower 2 bits | ||
859 | movel USER_FPCR(%a6),%d1 | ||
860 | andil #0xc0,%d1 | ||
861 | lsrl #6,%d1 |put precision in upper word | ||
862 | swap %d1 | ||
863 | orl %d0,%d1 |set up for round call | ||
864 | clrl %d0 |force sticky to zero | ||
865 | bclrb #sign_bit,WBTEMP_EX(%a6) | ||
866 | sne WBTEMP_SGN(%a6) | ||
867 | bsrl round |round result to users rmode & prec | ||
868 | bfclr WBTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format | ||
869 | beq frcfpnr | ||
870 | bsetb #sign_bit,WBTEMP_EX(%a6) | ||
871 | bra frcfpnr | ||
872 | add_u_srcd: | ||
873 | movew ETEMP_EX(%a6),%d0 | ||
874 | andiw #0x8000,%d0 | ||
875 | orw #0x3fff,%d0 |force the exponent to +/- 1 | ||
876 | movew %d0,ETEMP_EX(%a6) |in the denorm | ||
877 | movel USER_FPCR(%a6),%d0 | ||
878 | andil #0x30,%d0 | ||
879 | fmovel %d0,%fpcr |set up users rmode and X | ||
880 | fmovex ETEMP(%a6),%fp0 | ||
881 | faddx FPTEMP(%a6),%fp0 | ||
882 | fmovel %fpsr,%d1 | ||
883 | orl %d1,USER_FPSR(%a6) |capture cc's and inex from fadd | ||
884 | leal WBTEMP(%a6),%a0 |point a0 to wbtemp in frame | ||
885 | fmovex %fp0,WBTEMP(%a6) |write result to memory | ||
886 | lsrl #4,%d0 |put rmode in lower 2 bits | ||
887 | movel USER_FPCR(%a6),%d1 | ||
888 | andil #0xc0,%d1 | ||
889 | lsrl #6,%d1 |put precision in upper word | ||
890 | swap %d1 | ||
891 | orl %d0,%d1 |set up for round call | ||
892 | clrl %d0 |force sticky to zero | ||
893 | bclrb #sign_bit,WBTEMP_EX(%a6) | ||
894 | sne WBTEMP_SGN(%a6) |use internal format for round | ||
895 | bsrl round |round result to users rmode & prec | ||
896 | bfclr WBTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format | ||
897 | beq frcfpnr | ||
898 | bsetb #sign_bit,WBTEMP_EX(%a6) | ||
899 | bra frcfpnr | ||
900 | | | ||
901 | | Signs are alike: | ||
902 | | | ||
903 | add_same: | ||
904 | cmpb #0x0f,DNRM_FLG(%a6) |is dest the denorm? | ||
905 | bnes add_s_srcd | ||
906 | add_s_destd: | ||
907 | leal ETEMP(%a6),%a0 | ||
908 | movel USER_FPCR(%a6),%d0 | ||
909 | andil #0x30,%d0 | ||
910 | lsrl #4,%d0 |put rmode in lower 2 bits | ||
911 | movel USER_FPCR(%a6),%d1 | ||
912 | andil #0xc0,%d1 | ||
913 | lsrl #6,%d1 |put precision in upper word | ||
914 | swap %d1 | ||
915 | orl %d0,%d1 |set up for round call | ||
916 | movel #0x20000000,%d0 |set sticky for round | ||
917 | bclrb #sign_bit,ETEMP_EX(%a6) | ||
918 | sne ETEMP_SGN(%a6) | ||
919 | bsrl round |round result to users rmode & prec | ||
920 | bfclr ETEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format | ||
921 | beqs add_s_dclr | ||
922 | bsetb #sign_bit,ETEMP_EX(%a6) | ||
923 | add_s_dclr: | ||
924 | leal WBTEMP(%a6),%a0 | ||
925 | movel ETEMP(%a6),(%a0) |write result to wbtemp | ||
926 | movel ETEMP_HI(%a6),4(%a0) | ||
927 | movel ETEMP_LO(%a6),8(%a0) | ||
928 | tstw ETEMP_EX(%a6) | ||
929 | bgt add_ckovf | ||
930 | orl #neg_mask,USER_FPSR(%a6) | ||
931 | bra add_ckovf | ||
932 | add_s_srcd: | ||
933 | leal FPTEMP(%a6),%a0 | ||
934 | movel USER_FPCR(%a6),%d0 | ||
935 | andil #0x30,%d0 | ||
936 | lsrl #4,%d0 |put rmode in lower 2 bits | ||
937 | movel USER_FPCR(%a6),%d1 | ||
938 | andil #0xc0,%d1 | ||
939 | lsrl #6,%d1 |put precision in upper word | ||
940 | swap %d1 | ||
941 | orl %d0,%d1 |set up for round call | ||
942 | movel #0x20000000,%d0 |set sticky for round | ||
943 | bclrb #sign_bit,FPTEMP_EX(%a6) | ||
944 | sne FPTEMP_SGN(%a6) | ||
945 | bsrl round |round result to users rmode & prec | ||
946 | bfclr FPTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format | ||
947 | beqs add_s_sclr | ||
948 | bsetb #sign_bit,FPTEMP_EX(%a6) | ||
949 | add_s_sclr: | ||
950 | leal WBTEMP(%a6),%a0 | ||
951 | movel FPTEMP(%a6),(%a0) |write result to wbtemp | ||
952 | movel FPTEMP_HI(%a6),4(%a0) | ||
953 | movel FPTEMP_LO(%a6),8(%a0) | ||
954 | tstw FPTEMP_EX(%a6) | ||
955 | bgt add_ckovf | ||
956 | orl #neg_mask,USER_FPSR(%a6) | ||
957 | add_ckovf: | ||
958 | movew WBTEMP_EX(%a6),%d0 | ||
959 | andiw #0x7fff,%d0 | ||
960 | cmpiw #0x7fff,%d0 | ||
961 | bne frcfpnr | ||
962 | | | ||
963 | | The result has overflowed to $7fff exponent. Set I, ovfl, | ||
964 | | and aovfl, and clr the mantissa (incorrectly set by the | ||
965 | | round routine.) | ||
966 | | | ||
967 | orl #inf_mask+ovfl_inx_mask,USER_FPSR(%a6) | ||
968 | clrl 4(%a0) | ||
969 | bra frcfpnr | ||
970 | | | ||
971 | | Inst is fsub. | ||
972 | | | ||
973 | wrap_sub: | ||
974 | cmpb #0xff,DNRM_FLG(%a6) |if both ops denorm, | ||
975 | beq fix_stk |restore to fpu | ||
976 | | | ||
977 | | One of the ops is denormalized. Test for wrap condition | ||
978 | | and complete the instruction. | ||
979 | | | ||
980 | cmpb #0x0f,DNRM_FLG(%a6) |check for dest denorm | ||
981 | bnes sub_srcd | ||
982 | sub_destd: | ||
983 | bsrl ckinf_ns | ||
984 | bne fix_stk | ||
985 | bfextu ETEMP_EX(%a6){#1:#15},%d0 |get src exp (always pos) | ||
986 | bfexts FPTEMP_EX(%a6){#1:#15},%d1 |get dest exp (always neg) | ||
987 | subl %d1,%d0 |subtract src from dest | ||
988 | cmpl #0x8000,%d0 | ||
989 | blt fix_stk |if less, not wrap case | ||
990 | bra sub_wrap | ||
991 | sub_srcd: | ||
992 | bsrl ckinf_nd | ||
993 | bne fix_stk | ||
994 | bfextu FPTEMP_EX(%a6){#1:#15},%d0 |get dest exp (always pos) | ||
995 | bfexts ETEMP_EX(%a6){#1:#15},%d1 |get src exp (always neg) | ||
996 | subl %d1,%d0 |subtract dest from src | ||
997 | cmpl #0x8000,%d0 | ||
998 | blt fix_stk |if less, not wrap case | ||
999 | | | ||
1000 | | Check the signs of the operands. If they are alike, the fpu | ||
1001 | | can be used to subtract from the norm 1.0 with the sign of the | ||
1002 | | denorm and it will correctly generate the result in extended | ||
1003 | | precision. We can then call round with no sticky and the result | ||
1004 | | will be correct for the user's rounding mode and precision. If | ||
1005 | | the signs are unlike, we call round with the sticky bit set | ||
1006 | | and the result will be correct for the user's rounding mode and | ||
1007 | | precision. | ||
1008 | | | ||
1009 | sub_wrap: | ||
1010 | movew ETEMP_EX(%a6),%d0 | ||
1011 | movew FPTEMP_EX(%a6),%d1 | ||
1012 | eorw %d1,%d0 | ||
1013 | andiw #0x8000,%d0 | ||
1014 | bne sub_diff | ||
1015 | | | ||
1016 | | The signs are alike. | ||
1017 | | | ||
1018 | cmpb #0x0f,DNRM_FLG(%a6) |is dest the denorm? | ||
1019 | bnes sub_u_srcd | ||
1020 | movew FPTEMP_EX(%a6),%d0 | ||
1021 | andiw #0x8000,%d0 | ||
1022 | orw #0x3fff,%d0 |force the exponent to +/- 1 | ||
1023 | movew %d0,FPTEMP_EX(%a6) |in the denorm | ||
1024 | movel USER_FPCR(%a6),%d0 | ||
1025 | andil #0x30,%d0 | ||
1026 | fmovel %d0,%fpcr |set up users rmode and X | ||
1027 | fmovex FPTEMP(%a6),%fp0 | ||
1028 | fsubx ETEMP(%a6),%fp0 | ||
1029 | fmovel %fpsr,%d1 | ||
1030 | orl %d1,USER_FPSR(%a6) |capture cc's and inex from fadd | ||
1031 | leal WBTEMP(%a6),%a0 |point a0 to wbtemp in frame | ||
1032 | fmovex %fp0,WBTEMP(%a6) |write result to memory | ||
1033 | lsrl #4,%d0 |put rmode in lower 2 bits | ||
1034 | movel USER_FPCR(%a6),%d1 | ||
1035 | andil #0xc0,%d1 | ||
1036 | lsrl #6,%d1 |put precision in upper word | ||
1037 | swap %d1 | ||
1038 | orl %d0,%d1 |set up for round call | ||
1039 | clrl %d0 |force sticky to zero | ||
1040 | bclrb #sign_bit,WBTEMP_EX(%a6) | ||
1041 | sne WBTEMP_SGN(%a6) | ||
1042 | bsrl round |round result to users rmode & prec | ||
1043 | bfclr WBTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format | ||
1044 | beq frcfpnr | ||
1045 | bsetb #sign_bit,WBTEMP_EX(%a6) | ||
1046 | bra frcfpnr | ||
1047 | sub_u_srcd: | ||
1048 | movew ETEMP_EX(%a6),%d0 | ||
1049 | andiw #0x8000,%d0 | ||
1050 | orw #0x3fff,%d0 |force the exponent to +/- 1 | ||
1051 | movew %d0,ETEMP_EX(%a6) |in the denorm | ||
1052 | movel USER_FPCR(%a6),%d0 | ||
1053 | andil #0x30,%d0 | ||
1054 | fmovel %d0,%fpcr |set up users rmode and X | ||
1055 | fmovex FPTEMP(%a6),%fp0 | ||
1056 | fsubx ETEMP(%a6),%fp0 | ||
1057 | fmovel %fpsr,%d1 | ||
1058 | orl %d1,USER_FPSR(%a6) |capture cc's and inex from fadd | ||
1059 | leal WBTEMP(%a6),%a0 |point a0 to wbtemp in frame | ||
1060 | fmovex %fp0,WBTEMP(%a6) |write result to memory | ||
1061 | lsrl #4,%d0 |put rmode in lower 2 bits | ||
1062 | movel USER_FPCR(%a6),%d1 | ||
1063 | andil #0xc0,%d1 | ||
1064 | lsrl #6,%d1 |put precision in upper word | ||
1065 | swap %d1 | ||
1066 | orl %d0,%d1 |set up for round call | ||
1067 | clrl %d0 |force sticky to zero | ||
1068 | bclrb #sign_bit,WBTEMP_EX(%a6) | ||
1069 | sne WBTEMP_SGN(%a6) | ||
1070 | bsrl round |round result to users rmode & prec | ||
1071 | bfclr WBTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format | ||
1072 | beq frcfpnr | ||
1073 | bsetb #sign_bit,WBTEMP_EX(%a6) | ||
1074 | bra frcfpnr | ||
1075 | | | ||
1076 | | Signs are unlike: | ||
1077 | | | ||
1078 | sub_diff: | ||
1079 | cmpb #0x0f,DNRM_FLG(%a6) |is dest the denorm? | ||
1080 | bnes sub_s_srcd | ||
1081 | sub_s_destd: | ||
1082 | leal ETEMP(%a6),%a0 | ||
1083 | movel USER_FPCR(%a6),%d0 | ||
1084 | andil #0x30,%d0 | ||
1085 | lsrl #4,%d0 |put rmode in lower 2 bits | ||
1086 | movel USER_FPCR(%a6),%d1 | ||
1087 | andil #0xc0,%d1 | ||
1088 | lsrl #6,%d1 |put precision in upper word | ||
1089 | swap %d1 | ||
1090 | orl %d0,%d1 |set up for round call | ||
1091 | movel #0x20000000,%d0 |set sticky for round | ||
1092 | | | ||
1093 | | Since the dest is the denorm, the sign is the opposite of the | ||
1094 | | norm sign. | ||
1095 | | | ||
1096 | eoriw #0x8000,ETEMP_EX(%a6) |flip sign on result | ||
1097 | tstw ETEMP_EX(%a6) | ||
1098 | bgts sub_s_dwr | ||
1099 | orl #neg_mask,USER_FPSR(%a6) | ||
1100 | sub_s_dwr: | ||
1101 | bclrb #sign_bit,ETEMP_EX(%a6) | ||
1102 | sne ETEMP_SGN(%a6) | ||
1103 | bsrl round |round result to users rmode & prec | ||
1104 | bfclr ETEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format | ||
1105 | beqs sub_s_dclr | ||
1106 | bsetb #sign_bit,ETEMP_EX(%a6) | ||
1107 | sub_s_dclr: | ||
1108 | leal WBTEMP(%a6),%a0 | ||
1109 | movel ETEMP(%a6),(%a0) |write result to wbtemp | ||
1110 | movel ETEMP_HI(%a6),4(%a0) | ||
1111 | movel ETEMP_LO(%a6),8(%a0) | ||
1112 | bra sub_ckovf | ||
1113 | sub_s_srcd: | ||
1114 | leal FPTEMP(%a6),%a0 | ||
1115 | movel USER_FPCR(%a6),%d0 | ||
1116 | andil #0x30,%d0 | ||
1117 | lsrl #4,%d0 |put rmode in lower 2 bits | ||
1118 | movel USER_FPCR(%a6),%d1 | ||
1119 | andil #0xc0,%d1 | ||
1120 | lsrl #6,%d1 |put precision in upper word | ||
1121 | swap %d1 | ||
1122 | orl %d0,%d1 |set up for round call | ||
1123 | movel #0x20000000,%d0 |set sticky for round | ||
1124 | bclrb #sign_bit,FPTEMP_EX(%a6) | ||
1125 | sne FPTEMP_SGN(%a6) | ||
1126 | bsrl round |round result to users rmode & prec | ||
1127 | bfclr FPTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format | ||
1128 | beqs sub_s_sclr | ||
1129 | bsetb #sign_bit,FPTEMP_EX(%a6) | ||
1130 | sub_s_sclr: | ||
1131 | leal WBTEMP(%a6),%a0 | ||
1132 | movel FPTEMP(%a6),(%a0) |write result to wbtemp | ||
1133 | movel FPTEMP_HI(%a6),4(%a0) | ||
1134 | movel FPTEMP_LO(%a6),8(%a0) | ||
1135 | tstw FPTEMP_EX(%a6) | ||
1136 | bgt sub_ckovf | ||
1137 | orl #neg_mask,USER_FPSR(%a6) | ||
1138 | sub_ckovf: | ||
1139 | movew WBTEMP_EX(%a6),%d0 | ||
1140 | andiw #0x7fff,%d0 | ||
1141 | cmpiw #0x7fff,%d0 | ||
1142 | bne frcfpnr | ||
1143 | | | ||
1144 | | The result has overflowed to $7fff exponent. Set I, ovfl, | ||
1145 | | and aovfl, and clr the mantissa (incorrectly set by the | ||
1146 | | round routine.) | ||
1147 | | | ||
1148 | orl #inf_mask+ovfl_inx_mask,USER_FPSR(%a6) | ||
1149 | clrl 4(%a0) | ||
1150 | bra frcfpnr | ||
1151 | | | ||
1152 | | Inst is fcmp. | ||
1153 | | | ||
1154 | wrap_cmp: | ||
1155 | cmpb #0xff,DNRM_FLG(%a6) |if both ops denorm, | ||
1156 | beq fix_stk |restore to fpu | ||
1157 | | | ||
1158 | | One of the ops is denormalized. Test for wrap condition | ||
1159 | | and complete the instruction. | ||
1160 | | | ||
1161 | cmpb #0x0f,DNRM_FLG(%a6) |check for dest denorm | ||
1162 | bnes cmp_srcd | ||
1163 | cmp_destd: | ||
1164 | bsrl ckinf_ns | ||
1165 | bne fix_stk | ||
1166 | bfextu ETEMP_EX(%a6){#1:#15},%d0 |get src exp (always pos) | ||
1167 | bfexts FPTEMP_EX(%a6){#1:#15},%d1 |get dest exp (always neg) | ||
1168 | subl %d1,%d0 |subtract dest from src | ||
1169 | cmpl #0x8000,%d0 | ||
1170 | blt fix_stk |if less, not wrap case | ||
1171 | tstw ETEMP_EX(%a6) |set N to ~sign_of(src) | ||
1172 | bge cmp_setn | ||
1173 | rts | ||
1174 | cmp_srcd: | ||
1175 | bsrl ckinf_nd | ||
1176 | bne fix_stk | ||
1177 | bfextu FPTEMP_EX(%a6){#1:#15},%d0 |get dest exp (always pos) | ||
1178 | bfexts ETEMP_EX(%a6){#1:#15},%d1 |get src exp (always neg) | ||
1179 | subl %d1,%d0 |subtract src from dest | ||
1180 | cmpl #0x8000,%d0 | ||
1181 | blt fix_stk |if less, not wrap case | ||
1182 | tstw FPTEMP_EX(%a6) |set N to sign_of(dest) | ||
1183 | blt cmp_setn | ||
1184 | rts | ||
1185 | cmp_setn: | ||
1186 | orl #neg_mask,USER_FPSR(%a6) | ||
1187 | rts | ||
1188 | |||
1189 | | | ||
1190 | | Inst is fmul. | ||
1191 | | | ||
1192 | wrap_mul: | ||
1193 | cmpb #0xff,DNRM_FLG(%a6) |if both ops denorm, | ||
1194 | beq force_unf |force an underflow (really!) | ||
1195 | | | ||
1196 | | One of the ops is denormalized. Test for wrap condition | ||
1197 | | and complete the instruction. | ||
1198 | | | ||
1199 | cmpb #0x0f,DNRM_FLG(%a6) |check for dest denorm | ||
1200 | bnes mul_srcd | ||
1201 | mul_destd: | ||
1202 | bsrl ckinf_ns | ||
1203 | bne fix_stk | ||
1204 | bfextu ETEMP_EX(%a6){#1:#15},%d0 |get src exp (always pos) | ||
1205 | bfexts FPTEMP_EX(%a6){#1:#15},%d1 |get dest exp (always neg) | ||
1206 | addl %d1,%d0 |subtract dest from src | ||
1207 | bgt fix_stk | ||
1208 | bra force_unf | ||
1209 | mul_srcd: | ||
1210 | bsrl ckinf_nd | ||
1211 | bne fix_stk | ||
1212 | bfextu FPTEMP_EX(%a6){#1:#15},%d0 |get dest exp (always pos) | ||
1213 | bfexts ETEMP_EX(%a6){#1:#15},%d1 |get src exp (always neg) | ||
1214 | addl %d1,%d0 |subtract src from dest | ||
1215 | bgt fix_stk | ||
1216 | |||
1217 | | | ||
1218 | | This code handles the case of the instruction resulting in | ||
1219 | | an underflow condition. | ||
1220 | | | ||
1221 | force_unf: | ||
1222 | bclrb #E1,E_BYTE(%a6) | ||
1223 | orl #unfinx_mask,USER_FPSR(%a6) | ||
1224 | clrw NMNEXC(%a6) | ||
1225 | clrb WBTEMP_SGN(%a6) | ||
1226 | movew ETEMP_EX(%a6),%d0 |find the sign of the result | ||
1227 | movew FPTEMP_EX(%a6),%d1 | ||
1228 | eorw %d1,%d0 | ||
1229 | andiw #0x8000,%d0 | ||
1230 | beqs frcunfcont | ||
1231 | st WBTEMP_SGN(%a6) | ||
1232 | frcunfcont: | ||
1233 | lea WBTEMP(%a6),%a0 |point a0 to memory location | ||
1234 | movew CMDREG1B(%a6),%d0 | ||
1235 | btstl #6,%d0 |test for forced precision | ||
1236 | beqs frcunf_fpcr | ||
1237 | btstl #2,%d0 |check for double | ||
1238 | bnes frcunf_dbl | ||
1239 | movel #0x1,%d0 |inst is forced single | ||
1240 | bras frcunf_rnd | ||
1241 | frcunf_dbl: | ||
1242 | movel #0x2,%d0 |inst is forced double | ||
1243 | bras frcunf_rnd | ||
1244 | frcunf_fpcr: | ||
1245 | bfextu FPCR_MODE(%a6){#0:#2},%d0 |inst not forced - use fpcr prec | ||
1246 | frcunf_rnd: | ||
1247 | bsrl unf_sub |get correct result based on | ||
1248 | | ;round precision/mode. This | ||
1249 | | ;sets FPSR_CC correctly | ||
1250 | bfclr WBTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format | ||
1251 | beqs frcfpn | ||
1252 | bsetb #sign_bit,WBTEMP_EX(%a6) | ||
1253 | bra frcfpn | ||
1254 | |||
1255 | | | ||
1256 | | Write the result to the user's fpn. All results must be HUGE to be | ||
1257 | | written; otherwise the results would have overflowed or underflowed. | ||
1258 | | If the rounding precision is single or double, the ovf_res routine | ||
1259 | | is needed to correctly supply the max value. | ||
1260 | | | ||
1261 | frcfpnr: | ||
1262 | movew CMDREG1B(%a6),%d0 | ||
1263 | btstl #6,%d0 |test for forced precision | ||
1264 | beqs frcfpn_fpcr | ||
1265 | btstl #2,%d0 |check for double | ||
1266 | bnes frcfpn_dbl | ||
1267 | movel #0x1,%d0 |inst is forced single | ||
1268 | bras frcfpn_rnd | ||
1269 | frcfpn_dbl: | ||
1270 | movel #0x2,%d0 |inst is forced double | ||
1271 | bras frcfpn_rnd | ||
1272 | frcfpn_fpcr: | ||
1273 | bfextu FPCR_MODE(%a6){#0:#2},%d0 |inst not forced - use fpcr prec | ||
1274 | tstb %d0 | ||
1275 | beqs frcfpn |if extended, write what you got | ||
1276 | frcfpn_rnd: | ||
1277 | bclrb #sign_bit,WBTEMP_EX(%a6) | ||
1278 | sne WBTEMP_SGN(%a6) | ||
1279 | bsrl ovf_res |get correct result based on | ||
1280 | | ;round precision/mode. This | ||
1281 | | ;sets FPSR_CC correctly | ||
1282 | bfclr WBTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format | ||
1283 | beqs frcfpn_clr | ||
1284 | bsetb #sign_bit,WBTEMP_EX(%a6) | ||
1285 | frcfpn_clr: | ||
1286 | orl #ovfinx_mask,USER_FPSR(%a6) | ||
1287 | | | ||
1288 | | Perform the write. | ||
1289 | | | ||
1290 | frcfpn: | ||
1291 | bfextu CMDREG1B(%a6){#6:#3},%d0 |extract fp destination register | ||
1292 | cmpib #3,%d0 | ||
1293 | bles frc0123 |check if dest is fp0-fp3 | ||
1294 | movel #7,%d1 | ||
1295 | subl %d0,%d1 | ||
1296 | clrl %d0 | ||
1297 | bsetl %d1,%d0 | ||
1298 | fmovemx WBTEMP(%a6),%d0 | ||
1299 | rts | ||
1300 | frc0123: | ||
1301 | cmpib #0,%d0 | ||
1302 | beqs frc0_dst | ||
1303 | cmpib #1,%d0 | ||
1304 | beqs frc1_dst | ||
1305 | cmpib #2,%d0 | ||
1306 | beqs frc2_dst | ||
1307 | frc3_dst: | ||
1308 | movel WBTEMP_EX(%a6),USER_FP3(%a6) | ||
1309 | movel WBTEMP_HI(%a6),USER_FP3+4(%a6) | ||
1310 | movel WBTEMP_LO(%a6),USER_FP3+8(%a6) | ||
1311 | rts | ||
1312 | frc2_dst: | ||
1313 | movel WBTEMP_EX(%a6),USER_FP2(%a6) | ||
1314 | movel WBTEMP_HI(%a6),USER_FP2+4(%a6) | ||
1315 | movel WBTEMP_LO(%a6),USER_FP2+8(%a6) | ||
1316 | rts | ||
1317 | frc1_dst: | ||
1318 | movel WBTEMP_EX(%a6),USER_FP1(%a6) | ||
1319 | movel WBTEMP_HI(%a6),USER_FP1+4(%a6) | ||
1320 | movel WBTEMP_LO(%a6),USER_FP1+8(%a6) | ||
1321 | rts | ||
1322 | frc0_dst: | ||
1323 | movel WBTEMP_EX(%a6),USER_FP0(%a6) | ||
1324 | movel WBTEMP_HI(%a6),USER_FP0+4(%a6) | ||
1325 | movel WBTEMP_LO(%a6),USER_FP0+8(%a6) | ||
1326 | rts | ||
1327 | |||
1328 | | | ||
1329 | | Write etemp to fpn. | ||
1330 | | A check is made on enabled and signalled snan exceptions, | ||
1331 | | and the destination is not overwritten if this condition exists. | ||
1332 | | This code is designed to make fmoveins of unsupported data types | ||
1333 | | faster. | ||
1334 | | | ||
1335 | wr_etemp: | ||
1336 | btstb #snan_bit,FPSR_EXCEPT(%a6) |if snan is set, and | ||
1337 | beqs fmoveinc |enabled, force restore | ||
1338 | btstb #snan_bit,FPCR_ENABLE(%a6) |and don't overwrite | ||
1339 | beqs fmoveinc |the dest | ||
1340 | movel ETEMP_EX(%a6),FPTEMP_EX(%a6) |set up fptemp sign for | ||
1341 | | ;snan handler | ||
1342 | tstb ETEMP(%a6) |check for negative | ||
1343 | blts snan_neg | ||
1344 | rts | ||
1345 | snan_neg: | ||
1346 | orl #neg_bit,USER_FPSR(%a6) |snan is negative; set N | ||
1347 | rts | ||
1348 | fmoveinc: | ||
1349 | clrw NMNEXC(%a6) | ||
1350 | bclrb #E1,E_BYTE(%a6) | ||
1351 | moveb STAG(%a6),%d0 |check if stag is inf | ||
1352 | andib #0xe0,%d0 | ||
1353 | cmpib #0x40,%d0 | ||
1354 | bnes fminc_cnan | ||
1355 | orl #inf_mask,USER_FPSR(%a6) |if inf, nothing yet has set I | ||
1356 | tstw LOCAL_EX(%a0) |check sign | ||
1357 | bges fminc_con | ||
1358 | orl #neg_mask,USER_FPSR(%a6) | ||
1359 | bra fminc_con | ||
1360 | fminc_cnan: | ||
1361 | cmpib #0x60,%d0 |check if stag is NaN | ||
1362 | bnes fminc_czero | ||
1363 | orl #nan_mask,USER_FPSR(%a6) |if nan, nothing yet has set NaN | ||
1364 | movel ETEMP_EX(%a6),FPTEMP_EX(%a6) |set up fptemp sign for | ||
1365 | | ;snan handler | ||
1366 | tstw LOCAL_EX(%a0) |check sign | ||
1367 | bges fminc_con | ||
1368 | orl #neg_mask,USER_FPSR(%a6) | ||
1369 | bra fminc_con | ||
1370 | fminc_czero: | ||
1371 | cmpib #0x20,%d0 |check if zero | ||
1372 | bnes fminc_con | ||
1373 | orl #z_mask,USER_FPSR(%a6) |if zero, set Z | ||
1374 | tstw LOCAL_EX(%a0) |check sign | ||
1375 | bges fminc_con | ||
1376 | orl #neg_mask,USER_FPSR(%a6) | ||
1377 | fminc_con: | ||
1378 | bfextu CMDREG1B(%a6){#6:#3},%d0 |extract fp destination register | ||
1379 | cmpib #3,%d0 | ||
1380 | bles fp0123 |check if dest is fp0-fp3 | ||
1381 | movel #7,%d1 | ||
1382 | subl %d0,%d1 | ||
1383 | clrl %d0 | ||
1384 | bsetl %d1,%d0 | ||
1385 | fmovemx ETEMP(%a6),%d0 | ||
1386 | rts | ||
1387 | |||
1388 | fp0123: | ||
1389 | cmpib #0,%d0 | ||
1390 | beqs fp0_dst | ||
1391 | cmpib #1,%d0 | ||
1392 | beqs fp1_dst | ||
1393 | cmpib #2,%d0 | ||
1394 | beqs fp2_dst | ||
1395 | fp3_dst: | ||
1396 | movel ETEMP_EX(%a6),USER_FP3(%a6) | ||
1397 | movel ETEMP_HI(%a6),USER_FP3+4(%a6) | ||
1398 | movel ETEMP_LO(%a6),USER_FP3+8(%a6) | ||
1399 | rts | ||
1400 | fp2_dst: | ||
1401 | movel ETEMP_EX(%a6),USER_FP2(%a6) | ||
1402 | movel ETEMP_HI(%a6),USER_FP2+4(%a6) | ||
1403 | movel ETEMP_LO(%a6),USER_FP2+8(%a6) | ||
1404 | rts | ||
1405 | fp1_dst: | ||
1406 | movel ETEMP_EX(%a6),USER_FP1(%a6) | ||
1407 | movel ETEMP_HI(%a6),USER_FP1+4(%a6) | ||
1408 | movel ETEMP_LO(%a6),USER_FP1+8(%a6) | ||
1409 | rts | ||
1410 | fp0_dst: | ||
1411 | movel ETEMP_EX(%a6),USER_FP0(%a6) | ||
1412 | movel ETEMP_HI(%a6),USER_FP0+4(%a6) | ||
1413 | movel ETEMP_LO(%a6),USER_FP0+8(%a6) | ||
1414 | rts | ||
1415 | |||
1416 | opclass3: | ||
1417 | st CU_ONLY(%a6) | ||
1418 | movew CMDREG1B(%a6),%d0 |check if packed moveout | ||
1419 | andiw #0x0c00,%d0 |isolate last 2 bits of size field | ||
1420 | cmpiw #0x0c00,%d0 |if size is 011 or 111, it is packed | ||
1421 | beq pack_out |else it is norm or denorm | ||
1422 | bra mv_out | ||
1423 | |||
1424 | |||
1425 | | | ||
1426 | | MOVE OUT | ||
1427 | | | ||
1428 | |||
1429 | mv_tbl: | ||
1430 | .long li | ||
1431 | .long sgp | ||
1432 | .long xp | ||
1433 | .long mvout_end |should never be taken | ||
1434 | .long wi | ||
1435 | .long dp | ||
1436 | .long bi | ||
1437 | .long mvout_end |should never be taken | ||
1438 | mv_out: | ||
1439 | bfextu CMDREG1B(%a6){#3:#3},%d1 |put source specifier in d1 | ||
1440 | leal mv_tbl,%a0 | ||
1441 | movel %a0@(%d1:l:4),%a0 | ||
1442 | jmp (%a0) | ||
1443 | |||
1444 | | | ||
1445 | | This exit is for move-out to memory. The aunfl bit is | ||
1446 | | set if the result is inex and unfl is signalled. | ||
1447 | | | ||
1448 | mvout_end: | ||
1449 | btstb #inex2_bit,FPSR_EXCEPT(%a6) | ||
1450 | beqs no_aufl | ||
1451 | btstb #unfl_bit,FPSR_EXCEPT(%a6) | ||
1452 | beqs no_aufl | ||
1453 | bsetb #aunfl_bit,FPSR_AEXCEPT(%a6) | ||
1454 | no_aufl: | ||
1455 | clrw NMNEXC(%a6) | ||
1456 | bclrb #E1,E_BYTE(%a6) | ||
1457 | fmovel #0,%FPSR |clear any cc bits from res_func | ||
1458 | | | ||
1459 | | Return ETEMP to extended format from internal extended format so | ||
1460 | | that gen_except will have a correctly signed value for ovfl/unfl | ||
1461 | | handlers. | ||
1462 | | | ||
1463 | bfclr ETEMP_SGN(%a6){#0:#8} | ||
1464 | beqs mvout_con | ||
1465 | bsetb #sign_bit,ETEMP_EX(%a6) | ||
1466 | mvout_con: | ||
1467 | rts | ||
1468 | | | ||
1469 | | This exit is for move-out to int register. The aunfl bit is | ||
1470 | | not set in any case for this move. | ||
1471 | | | ||
1472 | mvouti_end: | ||
1473 | clrw NMNEXC(%a6) | ||
1474 | bclrb #E1,E_BYTE(%a6) | ||
1475 | fmovel #0,%FPSR |clear any cc bits from res_func | ||
1476 | | | ||
1477 | | Return ETEMP to extended format from internal extended format so | ||
1478 | | that gen_except will have a correctly signed value for ovfl/unfl | ||
1479 | | handlers. | ||
1480 | | | ||
1481 | bfclr ETEMP_SGN(%a6){#0:#8} | ||
1482 | beqs mvouti_con | ||
1483 | bsetb #sign_bit,ETEMP_EX(%a6) | ||
1484 | mvouti_con: | ||
1485 | rts | ||
1486 | | | ||
1487 | | li is used to handle a long integer source specifier | ||
1488 | | | ||
1489 | |||
1490 | li: | ||
1491 | moveql #4,%d0 |set byte count | ||
1492 | |||
1493 | btstb #7,STAG(%a6) |check for extended denorm | ||
1494 | bne int_dnrm |if so, branch | ||
1495 | |||
1496 | fmovemx ETEMP(%a6),%fp0-%fp0 | ||
1497 | fcmpd #0x41dfffffffc00000,%fp0 | ||
1498 | | 41dfffffffc00000 in dbl prec = 401d0000fffffffe00000000 in ext prec | ||
1499 | fbge lo_plrg | ||
1500 | fcmpd #0xc1e0000000000000,%fp0 | ||
1501 | | c1e0000000000000 in dbl prec = c01e00008000000000000000 in ext prec | ||
1502 | fble lo_nlrg | ||
1503 | | | ||
1504 | | at this point, the answer is between the largest pos and neg values | ||
1505 | | | ||
1506 | movel USER_FPCR(%a6),%d1 |use user's rounding mode | ||
1507 | andil #0x30,%d1 | ||
1508 | fmovel %d1,%fpcr | ||
1509 | fmovel %fp0,L_SCR1(%a6) |let the 040 perform conversion | ||
1510 | fmovel %fpsr,%d1 | ||
1511 | orl %d1,USER_FPSR(%a6) |capture inex2/ainex if set | ||
1512 | bra int_wrt | ||
1513 | |||
1514 | |||
1515 | lo_plrg: | ||
1516 | movel #0x7fffffff,L_SCR1(%a6) |answer is largest positive int | ||
1517 | fbeq int_wrt |exact answer | ||
1518 | fcmpd #0x41dfffffffe00000,%fp0 | ||
1519 | | 41dfffffffe00000 in dbl prec = 401d0000ffffffff00000000 in ext prec | ||
1520 | fbge int_operr |set operr | ||
1521 | bra int_inx |set inexact | ||
1522 | |||
1523 | lo_nlrg: | ||
1524 | movel #0x80000000,L_SCR1(%a6) | ||
1525 | fbeq int_wrt |exact answer | ||
1526 | fcmpd #0xc1e0000000100000,%fp0 | ||
1527 | | c1e0000000100000 in dbl prec = c01e00008000000080000000 in ext prec | ||
1528 | fblt int_operr |set operr | ||
1529 | bra int_inx |set inexact | ||
1530 | |||
1531 | | | ||
1532 | | wi is used to handle a word integer source specifier | ||
1533 | | | ||
1534 | |||
1535 | wi: | ||
1536 | moveql #2,%d0 |set byte count | ||
1537 | |||
1538 | btstb #7,STAG(%a6) |check for extended denorm | ||
1539 | bne int_dnrm |branch if so | ||
1540 | |||
1541 | fmovemx ETEMP(%a6),%fp0-%fp0 | ||
1542 | fcmps #0x46fffe00,%fp0 | ||
1543 | | 46fffe00 in sgl prec = 400d0000fffe000000000000 in ext prec | ||
1544 | fbge wo_plrg | ||
1545 | fcmps #0xc7000000,%fp0 | ||
1546 | | c7000000 in sgl prec = c00e00008000000000000000 in ext prec | ||
1547 | fble wo_nlrg | ||
1548 | |||
1549 | | | ||
1550 | | at this point, the answer is between the largest pos and neg values | ||
1551 | | | ||
1552 | movel USER_FPCR(%a6),%d1 |use user's rounding mode | ||
1553 | andil #0x30,%d1 | ||
1554 | fmovel %d1,%fpcr | ||
1555 | fmovew %fp0,L_SCR1(%a6) |let the 040 perform conversion | ||
1556 | fmovel %fpsr,%d1 | ||
1557 | orl %d1,USER_FPSR(%a6) |capture inex2/ainex if set | ||
1558 | bra int_wrt | ||
1559 | |||
1560 | wo_plrg: | ||
1561 | movew #0x7fff,L_SCR1(%a6) |answer is largest positive int | ||
1562 | fbeq int_wrt |exact answer | ||
1563 | fcmps #0x46ffff00,%fp0 | ||
1564 | | 46ffff00 in sgl prec = 400d0000ffff000000000000 in ext prec | ||
1565 | fbge int_operr |set operr | ||
1566 | bra int_inx |set inexact | ||
1567 | |||
1568 | wo_nlrg: | ||
1569 | movew #0x8000,L_SCR1(%a6) | ||
1570 | fbeq int_wrt |exact answer | ||
1571 | fcmps #0xc7000080,%fp0 | ||
1572 | | c7000080 in sgl prec = c00e00008000800000000000 in ext prec | ||
1573 | fblt int_operr |set operr | ||
1574 | bra int_inx |set inexact | ||
1575 | |||
1576 | | | ||
1577 | | bi is used to handle a byte integer source specifier | ||
1578 | | | ||
1579 | |||
1580 | bi: | ||
1581 | moveql #1,%d0 |set byte count | ||
1582 | |||
1583 | btstb #7,STAG(%a6) |check for extended denorm | ||
1584 | bne int_dnrm |branch if so | ||
1585 | |||
1586 | fmovemx ETEMP(%a6),%fp0-%fp0 | ||
1587 | fcmps #0x42fe0000,%fp0 | ||
1588 | | 42fe0000 in sgl prec = 40050000fe00000000000000 in ext prec | ||
1589 | fbge by_plrg | ||
1590 | fcmps #0xc3000000,%fp0 | ||
1591 | | c3000000 in sgl prec = c00600008000000000000000 in ext prec | ||
1592 | fble by_nlrg | ||
1593 | |||
1594 | | | ||
1595 | | at this point, the answer is between the largest pos and neg values | ||
1596 | | | ||
1597 | movel USER_FPCR(%a6),%d1 |use user's rounding mode | ||
1598 | andil #0x30,%d1 | ||
1599 | fmovel %d1,%fpcr | ||
1600 | fmoveb %fp0,L_SCR1(%a6) |let the 040 perform conversion | ||
1601 | fmovel %fpsr,%d1 | ||
1602 | orl %d1,USER_FPSR(%a6) |capture inex2/ainex if set | ||
1603 | bra int_wrt | ||
1604 | |||
1605 | by_plrg: | ||
1606 | moveb #0x7f,L_SCR1(%a6) |answer is largest positive int | ||
1607 | fbeq int_wrt |exact answer | ||
1608 | fcmps #0x42ff0000,%fp0 | ||
1609 | | 42ff0000 in sgl prec = 40050000ff00000000000000 in ext prec | ||
1610 | fbge int_operr |set operr | ||
1611 | bra int_inx |set inexact | ||
1612 | |||
1613 | by_nlrg: | ||
1614 | moveb #0x80,L_SCR1(%a6) | ||
1615 | fbeq int_wrt |exact answer | ||
1616 | fcmps #0xc3008000,%fp0 | ||
1617 | | c3008000 in sgl prec = c00600008080000000000000 in ext prec | ||
1618 | fblt int_operr |set operr | ||
1619 | bra int_inx |set inexact | ||
1620 | |||
1621 | | | ||
1622 | | Common integer routines | ||
1623 | | | ||
1624 | | int_drnrm---account for possible nonzero result for round up with positive | ||
1625 | | operand and round down for negative answer. In the first case (result = 1) | ||
1626 | | byte-width (store in d0) of result must be honored. In the second case, | ||
1627 | | -1 in L_SCR1(a6) will cover all contingencies (FMOVE.B/W/L out). | ||
1628 | |||
1629 | int_dnrm: | ||
1630 | movel #0,L_SCR1(%a6) | initialize result to 0 | ||
1631 | bfextu FPCR_MODE(%a6){#2:#2},%d1 | d1 is the rounding mode | ||
1632 | cmpb #2,%d1 | ||
1633 | bmis int_inx | if RN or RZ, done | ||
1634 | bnes int_rp | if RP, continue below | ||
1635 | tstw ETEMP(%a6) | RM: store -1 in L_SCR1 if src is negative | ||
1636 | bpls int_inx | otherwise result is 0 | ||
1637 | movel #-1,L_SCR1(%a6) | ||
1638 | bras int_inx | ||
1639 | int_rp: | ||
1640 | tstw ETEMP(%a6) | RP: store +1 of proper width in L_SCR1 if | ||
1641 | | ; source is greater than 0 | ||
1642 | bmis int_inx | otherwise, result is 0 | ||
1643 | lea L_SCR1(%a6),%a1 | a1 is address of L_SCR1 | ||
1644 | addal %d0,%a1 | offset by destination width -1 | ||
1645 | subal #1,%a1 | ||
1646 | bsetb #0,(%a1) | set low bit at a1 address | ||
1647 | int_inx: | ||
1648 | oril #inx2a_mask,USER_FPSR(%a6) | ||
1649 | bras int_wrt | ||
1650 | int_operr: | ||
1651 | fmovemx %fp0-%fp0,FPTEMP(%a6) |FPTEMP must contain the extended | ||
1652 | | ;precision source that needs to be | ||
1653 | | ;converted to integer this is required | ||
1654 | | ;if the operr exception is enabled. | ||
1655 | | ;set operr/aiop (no inex2 on int ovfl) | ||
1656 | |||
1657 | oril #opaop_mask,USER_FPSR(%a6) | ||
1658 | | ;fall through to perform int_wrt | ||
1659 | int_wrt: | ||
1660 | movel EXC_EA(%a6),%a1 |load destination address | ||
1661 | tstl %a1 |check to see if it is a dest register | ||
1662 | beqs wrt_dn |write data register | ||
1663 | lea L_SCR1(%a6),%a0 |point to supervisor source address | ||
1664 | bsrl mem_write | ||
1665 | bra mvouti_end | ||
1666 | |||
1667 | wrt_dn: | ||
1668 | movel %d0,-(%sp) |d0 currently contains the size to write | ||
1669 | bsrl get_fline |get_fline returns Dn in d0 | ||
1670 | andiw #0x7,%d0 |isolate register | ||
1671 | movel (%sp)+,%d1 |get size | ||
1672 | cmpil #4,%d1 |most frequent case | ||
1673 | beqs sz_long | ||
1674 | cmpil #2,%d1 | ||
1675 | bnes sz_con | ||
1676 | orl #8,%d0 |add 'word' size to register# | ||
1677 | bras sz_con | ||
1678 | sz_long: | ||
1679 | orl #0x10,%d0 |add 'long' size to register# | ||
1680 | sz_con: | ||
1681 | movel %d0,%d1 |reg_dest expects size:reg in d1 | ||
1682 | bsrl reg_dest |load proper data register | ||
1683 | bra mvouti_end | ||
1684 | xp: | ||
1685 | lea ETEMP(%a6),%a0 | ||
1686 | bclrb #sign_bit,LOCAL_EX(%a0) | ||
1687 | sne LOCAL_SGN(%a0) | ||
1688 | btstb #7,STAG(%a6) |check for extended denorm | ||
1689 | bne xdnrm | ||
1690 | clrl %d0 | ||
1691 | bras do_fp |do normal case | ||
1692 | sgp: | ||
1693 | lea ETEMP(%a6),%a0 | ||
1694 | bclrb #sign_bit,LOCAL_EX(%a0) | ||
1695 | sne LOCAL_SGN(%a0) | ||
1696 | btstb #7,STAG(%a6) |check for extended denorm | ||
1697 | bne sp_catas |branch if so | ||
1698 | movew LOCAL_EX(%a0),%d0 | ||
1699 | lea sp_bnds,%a1 | ||
1700 | cmpw (%a1),%d0 | ||
1701 | blt sp_under | ||
1702 | cmpw 2(%a1),%d0 | ||
1703 | bgt sp_over | ||
1704 | movel #1,%d0 |set destination format to single | ||
1705 | bras do_fp |do normal case | ||
1706 | dp: | ||
1707 | lea ETEMP(%a6),%a0 | ||
1708 | bclrb #sign_bit,LOCAL_EX(%a0) | ||
1709 | sne LOCAL_SGN(%a0) | ||
1710 | |||
1711 | btstb #7,STAG(%a6) |check for extended denorm | ||
1712 | bne dp_catas |branch if so | ||
1713 | |||
1714 | movew LOCAL_EX(%a0),%d0 | ||
1715 | lea dp_bnds,%a1 | ||
1716 | |||
1717 | cmpw (%a1),%d0 | ||
1718 | blt dp_under | ||
1719 | cmpw 2(%a1),%d0 | ||
1720 | bgt dp_over | ||
1721 | |||
1722 | movel #2,%d0 |set destination format to double | ||
1723 | | ;fall through to do_fp | ||
1724 | | | ||
1725 | do_fp: | ||
1726 | bfextu FPCR_MODE(%a6){#2:#2},%d1 |rnd mode in d1 | ||
1727 | swap %d0 |rnd prec in upper word | ||
1728 | addl %d0,%d1 |d1 has PREC/MODE info | ||
1729 | |||
1730 | clrl %d0 |clear g,r,s | ||
1731 | |||
1732 | bsrl round |round | ||
1733 | |||
1734 | movel %a0,%a1 | ||
1735 | movel EXC_EA(%a6),%a0 | ||
1736 | |||
1737 | bfextu CMDREG1B(%a6){#3:#3},%d1 |extract destination format | ||
1738 | | ;at this point only the dest | ||
1739 | | ;formats sgl, dbl, ext are | ||
1740 | | ;possible | ||
1741 | cmpb #2,%d1 | ||
1742 | bgts ddbl |double=5, extended=2, single=1 | ||
1743 | bnes dsgl | ||
1744 | | ;fall through to dext | ||
1745 | dext: | ||
1746 | bsrl dest_ext | ||
1747 | bra mvout_end | ||
1748 | dsgl: | ||
1749 | bsrl dest_sgl | ||
1750 | bra mvout_end | ||
1751 | ddbl: | ||
1752 | bsrl dest_dbl | ||
1753 | bra mvout_end | ||
1754 | |||
1755 | | | ||
1756 | | Handle possible denorm or catastrophic underflow cases here | ||
1757 | | | ||
1758 | xdnrm: | ||
1759 | bsr set_xop |initialize WBTEMP | ||
1760 | bsetb #wbtemp15_bit,WB_BYTE(%a6) |set wbtemp15 | ||
1761 | |||
1762 | movel %a0,%a1 | ||
1763 | movel EXC_EA(%a6),%a0 |a0 has the destination pointer | ||
1764 | bsrl dest_ext |store to memory | ||
1765 | bsetb #unfl_bit,FPSR_EXCEPT(%a6) | ||
1766 | bra mvout_end | ||
1767 | |||
1768 | sp_under: | ||
1769 | bsetb #etemp15_bit,STAG(%a6) | ||
1770 | |||
1771 | cmpw 4(%a1),%d0 | ||
1772 | blts sp_catas |catastrophic underflow case | ||
1773 | |||
1774 | movel #1,%d0 |load in round precision | ||
1775 | movel #sgl_thresh,%d1 |load in single denorm threshold | ||
1776 | bsrl dpspdnrm |expects d1 to have the proper | ||
1777 | | ;denorm threshold | ||
1778 | bsrl dest_sgl |stores value to destination | ||
1779 | bsetb #unfl_bit,FPSR_EXCEPT(%a6) | ||
1780 | bra mvout_end |exit | ||
1781 | |||
1782 | dp_under: | ||
1783 | bsetb #etemp15_bit,STAG(%a6) | ||
1784 | |||
1785 | cmpw 4(%a1),%d0 | ||
1786 | blts dp_catas |catastrophic underflow case | ||
1787 | |||
1788 | movel #dbl_thresh,%d1 |load in double precision threshold | ||
1789 | movel #2,%d0 | ||
1790 | bsrl dpspdnrm |expects d1 to have proper | ||
1791 | | ;denorm threshold | ||
1792 | | ;expects d0 to have round precision | ||
1793 | bsrl dest_dbl |store value to destination | ||
1794 | bsetb #unfl_bit,FPSR_EXCEPT(%a6) | ||
1795 | bra mvout_end |exit | ||
1796 | |||
1797 | | | ||
1798 | | Handle catastrophic underflow cases here | ||
1799 | | | ||
1800 | sp_catas: | ||
1801 | | Temp fix for z bit set in unf_sub | ||
1802 | movel USER_FPSR(%a6),-(%a7) | ||
1803 | |||
1804 | movel #1,%d0 |set round precision to sgl | ||
1805 | |||
1806 | bsrl unf_sub |a0 points to result | ||
1807 | |||
1808 | movel (%a7)+,USER_FPSR(%a6) | ||
1809 | |||
1810 | movel #1,%d0 | ||
1811 | subw %d0,LOCAL_EX(%a0) |account for difference between | ||
1812 | | ;denorm/norm bias | ||
1813 | |||
1814 | movel %a0,%a1 |a1 has the operand input | ||
1815 | movel EXC_EA(%a6),%a0 |a0 has the destination pointer | ||
1816 | |||
1817 | bsrl dest_sgl |store the result | ||
1818 | oril #unfinx_mask,USER_FPSR(%a6) | ||
1819 | bra mvout_end | ||
1820 | |||
1821 | dp_catas: | ||
1822 | | Temp fix for z bit set in unf_sub | ||
1823 | movel USER_FPSR(%a6),-(%a7) | ||
1824 | |||
1825 | movel #2,%d0 |set round precision to dbl | ||
1826 | bsrl unf_sub |a0 points to result | ||
1827 | |||
1828 | movel (%a7)+,USER_FPSR(%a6) | ||
1829 | |||
1830 | movel #1,%d0 | ||
1831 | subw %d0,LOCAL_EX(%a0) |account for difference between | ||
1832 | | ;denorm/norm bias | ||
1833 | |||
1834 | movel %a0,%a1 |a1 has the operand input | ||
1835 | movel EXC_EA(%a6),%a0 |a0 has the destination pointer | ||
1836 | |||
1837 | bsrl dest_dbl |store the result | ||
1838 | oril #unfinx_mask,USER_FPSR(%a6) | ||
1839 | bra mvout_end | ||
1840 | |||
1841 | | | ||
1842 | | Handle catastrophic overflow cases here | ||
1843 | | | ||
1844 | sp_over: | ||
1845 | | Temp fix for z bit set in unf_sub | ||
1846 | movel USER_FPSR(%a6),-(%a7) | ||
1847 | |||
1848 | movel #1,%d0 | ||
1849 | leal FP_SCR1(%a6),%a0 |use FP_SCR1 for creating result | ||
1850 | movel ETEMP_EX(%a6),(%a0) | ||
1851 | movel ETEMP_HI(%a6),4(%a0) | ||
1852 | movel ETEMP_LO(%a6),8(%a0) | ||
1853 | bsrl ovf_res | ||
1854 | |||
1855 | movel (%a7)+,USER_FPSR(%a6) | ||
1856 | |||
1857 | movel %a0,%a1 | ||
1858 | movel EXC_EA(%a6),%a0 | ||
1859 | bsrl dest_sgl | ||
1860 | orl #ovfinx_mask,USER_FPSR(%a6) | ||
1861 | bra mvout_end | ||
1862 | |||
1863 | dp_over: | ||
1864 | | Temp fix for z bit set in ovf_res | ||
1865 | movel USER_FPSR(%a6),-(%a7) | ||
1866 | |||
1867 | movel #2,%d0 | ||
1868 | leal FP_SCR1(%a6),%a0 |use FP_SCR1 for creating result | ||
1869 | movel ETEMP_EX(%a6),(%a0) | ||
1870 | movel ETEMP_HI(%a6),4(%a0) | ||
1871 | movel ETEMP_LO(%a6),8(%a0) | ||
1872 | bsrl ovf_res | ||
1873 | |||
1874 | movel (%a7)+,USER_FPSR(%a6) | ||
1875 | |||
1876 | movel %a0,%a1 | ||
1877 | movel EXC_EA(%a6),%a0 | ||
1878 | bsrl dest_dbl | ||
1879 | orl #ovfinx_mask,USER_FPSR(%a6) | ||
1880 | bra mvout_end | ||
1881 | |||
1882 | | | ||
1883 | | DPSPDNRM | ||
1884 | | | ||
1885 | | This subroutine takes an extended normalized number and denormalizes | ||
1886 | | it to the given round precision. This subroutine also decrements | ||
1887 | | the input operand's exponent by 1 to account for the fact that | ||
1888 | | dest_sgl or dest_dbl expects a normalized number's bias. | ||
1889 | | | ||
1890 | | Input: a0 points to a normalized number in internal extended format | ||
1891 | | d0 is the round precision (=1 for sgl; =2 for dbl) | ||
1892 | | d1 is the single precision or double precision | ||
1893 | | denorm threshold | ||
1894 | | | ||
1895 | | Output: (In the format for dest_sgl or dest_dbl) | ||
1896 | | a0 points to the destination | ||
1897 | | a1 points to the operand | ||
1898 | | | ||
1899 | | Exceptions: Reports inexact 2 exception by setting USER_FPSR bits | ||
1900 | | | ||
1901 | dpspdnrm: | ||
1902 | movel %d0,-(%a7) |save round precision | ||
1903 | clrl %d0 |clear initial g,r,s | ||
1904 | bsrl dnrm_lp |careful with d0, it's needed by round | ||
1905 | |||
1906 | bfextu FPCR_MODE(%a6){#2:#2},%d1 |get rounding mode | ||
1907 | swap %d1 | ||
1908 | movew 2(%a7),%d1 |set rounding precision | ||
1909 | swap %d1 |at this point d1 has PREC/MODE info | ||
1910 | bsrl round |round result, sets the inex bit in | ||
1911 | | ;USER_FPSR if needed | ||
1912 | |||
1913 | movew #1,%d0 | ||
1914 | subw %d0,LOCAL_EX(%a0) |account for difference in denorm | ||
1915 | | ;vs norm bias | ||
1916 | |||
1917 | movel %a0,%a1 |a1 has the operand input | ||
1918 | movel EXC_EA(%a6),%a0 |a0 has the destination pointer | ||
1919 | addw #4,%a7 |pop stack | ||
1920 | rts | ||
1921 | | | ||
1922 | | SET_XOP initialized WBTEMP with the value pointed to by a0 | ||
1923 | | input: a0 points to input operand in the internal extended format | ||
1924 | | | ||
1925 | set_xop: | ||
1926 | movel LOCAL_EX(%a0),WBTEMP_EX(%a6) | ||
1927 | movel LOCAL_HI(%a0),WBTEMP_HI(%a6) | ||
1928 | movel LOCAL_LO(%a0),WBTEMP_LO(%a6) | ||
1929 | bfclr WBTEMP_SGN(%a6){#0:#8} | ||
1930 | beqs sxop | ||
1931 | bsetb #sign_bit,WBTEMP_EX(%a6) | ||
1932 | sxop: | ||
1933 | bfclr STAG(%a6){#5:#4} |clear wbtm66,wbtm1,wbtm0,sbit | ||
1934 | rts | ||
1935 | | | ||
1936 | | P_MOVE | ||
1937 | | | ||
1938 | p_movet: | ||
1939 | .long p_move | ||
1940 | .long p_movez | ||
1941 | .long p_movei | ||
1942 | .long p_moven | ||
1943 | .long p_move | ||
1944 | p_regd: | ||
1945 | .long p_dyd0 | ||
1946 | .long p_dyd1 | ||
1947 | .long p_dyd2 | ||
1948 | .long p_dyd3 | ||
1949 | .long p_dyd4 | ||
1950 | .long p_dyd5 | ||
1951 | .long p_dyd6 | ||
1952 | .long p_dyd7 | ||
1953 | |||
1954 | pack_out: | ||
1955 | leal p_movet,%a0 |load jmp table address | ||
1956 | movew STAG(%a6),%d0 |get source tag | ||
1957 | bfextu %d0{#16:#3},%d0 |isolate source bits | ||
1958 | movel (%a0,%d0.w*4),%a0 |load a0 with routine label for tag | ||
1959 | jmp (%a0) |go to the routine | ||
1960 | |||
1961 | p_write: | ||
1962 | movel #0x0c,%d0 |get byte count | ||
1963 | movel EXC_EA(%a6),%a1 |get the destination address | ||
1964 | bsr mem_write |write the user's destination | ||
1965 | moveb #0,CU_SAVEPC(%a6) |set the cu save pc to all 0's | ||
1966 | |||
1967 | | | ||
1968 | | Also note that the dtag must be set to norm here - this is because | ||
1969 | | the 040 uses the dtag to execute the correct microcode. | ||
1970 | | | ||
1971 | bfclr DTAG(%a6){#0:#3} |set dtag to norm | ||
1972 | |||
1973 | rts | ||
1974 | |||
1975 | | Notes on handling of special case (zero, inf, and nan) inputs: | ||
1976 | | 1. Operr is not signalled if the k-factor is greater than 18. | ||
1977 | | 2. Per the manual, status bits are not set. | ||
1978 | | | ||
1979 | |||
1980 | p_move: | ||
1981 | movew CMDREG1B(%a6),%d0 | ||
1982 | btstl #kfact_bit,%d0 |test for dynamic k-factor | ||
1983 | beqs statick |if clear, k-factor is static | ||
1984 | dynamick: | ||
1985 | bfextu %d0{#25:#3},%d0 |isolate register for dynamic k-factor | ||
1986 | lea p_regd,%a0 | ||
1987 | movel %a0@(%d0:l:4),%a0 | ||
1988 | jmp (%a0) | ||
1989 | statick: | ||
1990 | andiw #0x007f,%d0 |get k-factor | ||
1991 | bfexts %d0{#25:#7},%d0 |sign extend d0 for bindec | ||
1992 | leal ETEMP(%a6),%a0 |a0 will point to the packed decimal | ||
1993 | bsrl bindec |perform the convert; data at a6 | ||
1994 | leal FP_SCR1(%a6),%a0 |load a0 with result address | ||
1995 | bral p_write | ||
1996 | p_movez: | ||
1997 | leal ETEMP(%a6),%a0 |a0 will point to the packed decimal | ||
1998 | clrw 2(%a0) |clear lower word of exp | ||
1999 | clrl 4(%a0) |load second lword of ZERO | ||
2000 | clrl 8(%a0) |load third lword of ZERO | ||
2001 | bra p_write |go write results | ||
2002 | p_movei: | ||
2003 | fmovel #0,%FPSR |clear aiop | ||
2004 | leal ETEMP(%a6),%a0 |a0 will point to the packed decimal | ||
2005 | clrw 2(%a0) |clear lower word of exp | ||
2006 | bra p_write |go write the result | ||
2007 | p_moven: | ||
2008 | leal ETEMP(%a6),%a0 |a0 will point to the packed decimal | ||
2009 | clrw 2(%a0) |clear lower word of exp | ||
2010 | bra p_write |go write the result | ||
2011 | |||
2012 | | | ||
2013 | | Routines to read the dynamic k-factor from Dn. | ||
2014 | | | ||
2015 | p_dyd0: | ||
2016 | movel USER_D0(%a6),%d0 | ||
2017 | bras statick | ||
2018 | p_dyd1: | ||
2019 | movel USER_D1(%a6),%d0 | ||
2020 | bras statick | ||
2021 | p_dyd2: | ||
2022 | movel %d2,%d0 | ||
2023 | bras statick | ||
2024 | p_dyd3: | ||
2025 | movel %d3,%d0 | ||
2026 | bras statick | ||
2027 | p_dyd4: | ||
2028 | movel %d4,%d0 | ||
2029 | bras statick | ||
2030 | p_dyd5: | ||
2031 | movel %d5,%d0 | ||
2032 | bras statick | ||
2033 | p_dyd6: | ||
2034 | movel %d6,%d0 | ||
2035 | bra statick | ||
2036 | p_dyd7: | ||
2037 | movel %d7,%d0 | ||
2038 | bra statick | ||
2039 | |||
2040 | |end | ||
diff --git a/arch/m68k/fpsp040/round.S b/arch/m68k/fpsp040/round.S new file mode 100644 index 000000000000..00f98068783f --- /dev/null +++ b/arch/m68k/fpsp040/round.S | |||
@@ -0,0 +1,649 @@ | |||
1 | | | ||
2 | | round.sa 3.4 7/29/91 | ||
3 | | | ||
4 | | handle rounding and normalization tasks | ||
5 | | | ||
6 | | | ||
7 | | | ||
8 | | Copyright (C) Motorola, Inc. 1990 | ||
9 | | All Rights Reserved | ||
10 | | | ||
11 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
12 | | The copyright notice above does not evidence any | ||
13 | | actual or intended publication of such source code. | ||
14 | |||
15 | |ROUND idnt 2,1 | Motorola 040 Floating Point Software Package | ||
16 | |||
17 | |section 8 | ||
18 | |||
19 | #include "fpsp.h" | ||
20 | |||
21 | | | ||
22 | | round --- round result according to precision/mode | ||
23 | | | ||
24 | | a0 points to the input operand in the internal extended format | ||
25 | | d1(high word) contains rounding precision: | ||
26 | | ext = $0000xxxx | ||
27 | | sgl = $0001xxxx | ||
28 | | dbl = $0002xxxx | ||
29 | | d1(low word) contains rounding mode: | ||
30 | | RN = $xxxx0000 | ||
31 | | RZ = $xxxx0001 | ||
32 | | RM = $xxxx0010 | ||
33 | | RP = $xxxx0011 | ||
34 | | d0{31:29} contains the g,r,s bits (extended) | ||
35 | | | ||
36 | | On return the value pointed to by a0 is correctly rounded, | ||
37 | | a0 is preserved and the g-r-s bits in d0 are cleared. | ||
38 | | The result is not typed - the tag field is invalid. The | ||
39 | | result is still in the internal extended format. | ||
40 | | | ||
41 | | The INEX bit of USER_FPSR will be set if the rounded result was | ||
42 | | inexact (i.e. if any of the g-r-s bits were set). | ||
43 | | | ||
44 | |||
45 | .global round | ||
46 | round: | ||
47 | | If g=r=s=0 then result is exact and round is done, else set | ||
48 | | the inex flag in status reg and continue. | ||
49 | | | ||
50 | bsrs ext_grs |this subroutine looks at the | ||
51 | | :rounding precision and sets | ||
52 | | ;the appropriate g-r-s bits. | ||
53 | tstl %d0 |if grs are zero, go force | ||
54 | bne rnd_cont |lower bits to zero for size | ||
55 | |||
56 | swap %d1 |set up d1.w for round prec. | ||
57 | bra truncate | ||
58 | |||
59 | rnd_cont: | ||
60 | | | ||
61 | | Use rounding mode as an index into a jump table for these modes. | ||
62 | | | ||
63 | orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex | ||
64 | lea mode_tab,%a1 | ||
65 | movel (%a1,%d1.w*4),%a1 | ||
66 | jmp (%a1) | ||
67 | | | ||
68 | | Jump table indexed by rounding mode in d1.w. All following assumes | ||
69 | | grs != 0. | ||
70 | | | ||
71 | mode_tab: | ||
72 | .long rnd_near | ||
73 | .long rnd_zero | ||
74 | .long rnd_mnus | ||
75 | .long rnd_plus | ||
76 | | | ||
77 | | ROUND PLUS INFINITY | ||
78 | | | ||
79 | | If sign of fp number = 0 (positive), then add 1 to l. | ||
80 | | | ||
81 | rnd_plus: | ||
82 | swap %d1 |set up d1 for round prec. | ||
83 | tstb LOCAL_SGN(%a0) |check for sign | ||
84 | bmi truncate |if positive then truncate | ||
85 | movel #0xffffffff,%d0 |force g,r,s to be all f's | ||
86 | lea add_to_l,%a1 | ||
87 | movel (%a1,%d1.w*4),%a1 | ||
88 | jmp (%a1) | ||
89 | | | ||
90 | | ROUND MINUS INFINITY | ||
91 | | | ||
92 | | If sign of fp number = 1 (negative), then add 1 to l. | ||
93 | | | ||
94 | rnd_mnus: | ||
95 | swap %d1 |set up d1 for round prec. | ||
96 | tstb LOCAL_SGN(%a0) |check for sign | ||
97 | bpl truncate |if negative then truncate | ||
98 | movel #0xffffffff,%d0 |force g,r,s to be all f's | ||
99 | lea add_to_l,%a1 | ||
100 | movel (%a1,%d1.w*4),%a1 | ||
101 | jmp (%a1) | ||
102 | | | ||
103 | | ROUND ZERO | ||
104 | | | ||
105 | | Always truncate. | ||
106 | rnd_zero: | ||
107 | swap %d1 |set up d1 for round prec. | ||
108 | bra truncate | ||
109 | | | ||
110 | | | ||
111 | | ROUND NEAREST | ||
112 | | | ||
113 | | If (g=1), then add 1 to l and if (r=s=0), then clear l | ||
114 | | Note that this will round to even in case of a tie. | ||
115 | | | ||
116 | rnd_near: | ||
117 | swap %d1 |set up d1 for round prec. | ||
118 | asll #1,%d0 |shift g-bit to c-bit | ||
119 | bcc truncate |if (g=1) then | ||
120 | lea add_to_l,%a1 | ||
121 | movel (%a1,%d1.w*4),%a1 | ||
122 | jmp (%a1) | ||
123 | |||
124 | | | ||
125 | | ext_grs --- extract guard, round and sticky bits | ||
126 | | | ||
127 | | Input: d1 = PREC:ROUND | ||
128 | | Output: d0{31:29}= guard, round, sticky | ||
129 | | | ||
130 | | The ext_grs extract the guard/round/sticky bits according to the | ||
131 | | selected rounding precision. It is called by the round subroutine | ||
132 | | only. All registers except d0 are kept intact. d0 becomes an | ||
133 | | updated guard,round,sticky in d0{31:29} | ||
134 | | | ||
135 | | Notes: the ext_grs uses the round PREC, and therefore has to swap d1 | ||
136 | | prior to usage, and needs to restore d1 to original. | ||
137 | | | ||
138 | ext_grs: | ||
139 | swap %d1 |have d1.w point to round precision | ||
140 | cmpiw #0,%d1 | ||
141 | bnes sgl_or_dbl | ||
142 | bras end_ext_grs | ||
143 | |||
144 | sgl_or_dbl: | ||
145 | moveml %d2/%d3,-(%a7) |make some temp registers | ||
146 | cmpiw #1,%d1 | ||
147 | bnes grs_dbl | ||
148 | grs_sgl: | ||
149 | bfextu LOCAL_HI(%a0){#24:#2},%d3 |sgl prec. g-r are 2 bits right | ||
150 | movel #30,%d2 |of the sgl prec. limits | ||
151 | lsll %d2,%d3 |shift g-r bits to MSB of d3 | ||
152 | movel LOCAL_HI(%a0),%d2 |get word 2 for s-bit test | ||
153 | andil #0x0000003f,%d2 |s bit is the or of all other | ||
154 | bnes st_stky |bits to the right of g-r | ||
155 | tstl LOCAL_LO(%a0) |test lower mantissa | ||
156 | bnes st_stky |if any are set, set sticky | ||
157 | tstl %d0 |test original g,r,s | ||
158 | bnes st_stky |if any are set, set sticky | ||
159 | bras end_sd |if words 3 and 4 are clr, exit | ||
160 | grs_dbl: | ||
161 | bfextu LOCAL_LO(%a0){#21:#2},%d3 |dbl-prec. g-r are 2 bits right | ||
162 | movel #30,%d2 |of the dbl prec. limits | ||
163 | lsll %d2,%d3 |shift g-r bits to the MSB of d3 | ||
164 | movel LOCAL_LO(%a0),%d2 |get lower mantissa for s-bit test | ||
165 | andil #0x000001ff,%d2 |s bit is the or-ing of all | ||
166 | bnes st_stky |other bits to the right of g-r | ||
167 | tstl %d0 |test word original g,r,s | ||
168 | bnes st_stky |if any are set, set sticky | ||
169 | bras end_sd |if clear, exit | ||
170 | st_stky: | ||
171 | bset #rnd_stky_bit,%d3 | ||
172 | end_sd: | ||
173 | movel %d3,%d0 |return grs to d0 | ||
174 | moveml (%a7)+,%d2/%d3 |restore scratch registers | ||
175 | end_ext_grs: | ||
176 | swap %d1 |restore d1 to original | ||
177 | rts | ||
178 | |||
179 | |******************* Local Equates | ||
180 | .set ad_1_sgl,0x00000100 | constant to add 1 to l-bit in sgl prec | ||
181 | .set ad_1_dbl,0x00000800 | constant to add 1 to l-bit in dbl prec | ||
182 | |||
183 | |||
184 | |Jump table for adding 1 to the l-bit indexed by rnd prec | ||
185 | |||
186 | add_to_l: | ||
187 | .long add_ext | ||
188 | .long add_sgl | ||
189 | .long add_dbl | ||
190 | .long add_dbl | ||
191 | | | ||
192 | | ADD SINGLE | ||
193 | | | ||
194 | add_sgl: | ||
195 | addl #ad_1_sgl,LOCAL_HI(%a0) | ||
196 | bccs scc_clr |no mantissa overflow | ||
197 | roxrw LOCAL_HI(%a0) |shift v-bit back in | ||
198 | roxrw LOCAL_HI+2(%a0) |shift v-bit back in | ||
199 | addw #0x1,LOCAL_EX(%a0) |and incr exponent | ||
200 | scc_clr: | ||
201 | tstl %d0 |test for rs = 0 | ||
202 | bnes sgl_done | ||
203 | andiw #0xfe00,LOCAL_HI+2(%a0) |clear the l-bit | ||
204 | sgl_done: | ||
205 | andil #0xffffff00,LOCAL_HI(%a0) |truncate bits beyond sgl limit | ||
206 | clrl LOCAL_LO(%a0) |clear d2 | ||
207 | rts | ||
208 | |||
209 | | | ||
210 | | ADD EXTENDED | ||
211 | | | ||
212 | add_ext: | ||
213 | addql #1,LOCAL_LO(%a0) |add 1 to l-bit | ||
214 | bccs xcc_clr |test for carry out | ||
215 | addql #1,LOCAL_HI(%a0) |propagate carry | ||
216 | bccs xcc_clr | ||
217 | roxrw LOCAL_HI(%a0) |mant is 0 so restore v-bit | ||
218 | roxrw LOCAL_HI+2(%a0) |mant is 0 so restore v-bit | ||
219 | roxrw LOCAL_LO(%a0) | ||
220 | roxrw LOCAL_LO+2(%a0) | ||
221 | addw #0x1,LOCAL_EX(%a0) |and inc exp | ||
222 | xcc_clr: | ||
223 | tstl %d0 |test rs = 0 | ||
224 | bnes add_ext_done | ||
225 | andib #0xfe,LOCAL_LO+3(%a0) |clear the l bit | ||
226 | add_ext_done: | ||
227 | rts | ||
228 | | | ||
229 | | ADD DOUBLE | ||
230 | | | ||
231 | add_dbl: | ||
232 | addl #ad_1_dbl,LOCAL_LO(%a0) | ||
233 | bccs dcc_clr | ||
234 | addql #1,LOCAL_HI(%a0) |propagate carry | ||
235 | bccs dcc_clr | ||
236 | roxrw LOCAL_HI(%a0) |mant is 0 so restore v-bit | ||
237 | roxrw LOCAL_HI+2(%a0) |mant is 0 so restore v-bit | ||
238 | roxrw LOCAL_LO(%a0) | ||
239 | roxrw LOCAL_LO+2(%a0) | ||
240 | addw #0x1,LOCAL_EX(%a0) |incr exponent | ||
241 | dcc_clr: | ||
242 | tstl %d0 |test for rs = 0 | ||
243 | bnes dbl_done | ||
244 | andiw #0xf000,LOCAL_LO+2(%a0) |clear the l-bit | ||
245 | |||
246 | dbl_done: | ||
247 | andil #0xfffff800,LOCAL_LO(%a0) |truncate bits beyond dbl limit | ||
248 | rts | ||
249 | |||
250 | error: | ||
251 | rts | ||
252 | | | ||
253 | | Truncate all other bits | ||
254 | | | ||
255 | trunct: | ||
256 | .long end_rnd | ||
257 | .long sgl_done | ||
258 | .long dbl_done | ||
259 | .long dbl_done | ||
260 | |||
261 | truncate: | ||
262 | lea trunct,%a1 | ||
263 | movel (%a1,%d1.w*4),%a1 | ||
264 | jmp (%a1) | ||
265 | |||
266 | end_rnd: | ||
267 | rts | ||
268 | |||
269 | | | ||
270 | | NORMALIZE | ||
271 | | | ||
272 | | These routines (nrm_zero & nrm_set) normalize the unnorm. This | ||
273 | | is done by shifting the mantissa left while decrementing the | ||
274 | | exponent. | ||
275 | | | ||
276 | | NRM_SET shifts and decrements until there is a 1 set in the integer | ||
277 | | bit of the mantissa (msb in d1). | ||
278 | | | ||
279 | | NRM_ZERO shifts and decrements until there is a 1 set in the integer | ||
280 | | bit of the mantissa (msb in d1) unless this would mean the exponent | ||
281 | | would go less than 0. In that case the number becomes a denorm - the | ||
282 | | exponent (d0) is set to 0 and the mantissa (d1 & d2) is not | ||
283 | | normalized. | ||
284 | | | ||
285 | | Note that both routines have been optimized (for the worst case) and | ||
286 | | therefore do not have the easy to follow decrement/shift loop. | ||
287 | | | ||
288 | | NRM_ZERO | ||
289 | | | ||
290 | | Distance to first 1 bit in mantissa = X | ||
291 | | Distance to 0 from exponent = Y | ||
292 | | If X < Y | ||
293 | | Then | ||
294 | | nrm_set | ||
295 | | Else | ||
296 | | shift mantissa by Y | ||
297 | | set exponent = 0 | ||
298 | | | ||
299 | |input: | ||
300 | | FP_SCR1 = exponent, ms mantissa part, ls mantissa part | ||
301 | |output: | ||
302 | | L_SCR1{4} = fpte15 or ete15 bit | ||
303 | | | ||
304 | .global nrm_zero | ||
305 | nrm_zero: | ||
306 | movew LOCAL_EX(%a0),%d0 | ||
307 | cmpw #64,%d0 |see if exp > 64 | ||
308 | bmis d0_less | ||
309 | bsr nrm_set |exp > 64 so exp won't exceed 0 | ||
310 | rts | ||
311 | d0_less: | ||
312 | moveml %d2/%d3/%d5/%d6,-(%a7) | ||
313 | movel LOCAL_HI(%a0),%d1 | ||
314 | movel LOCAL_LO(%a0),%d2 | ||
315 | |||
316 | bfffo %d1{#0:#32},%d3 |get the distance to the first 1 | ||
317 | | ;in ms mant | ||
318 | beqs ms_clr |branch if no bits were set | ||
319 | cmpw %d3,%d0 |of X>Y | ||
320 | bmis greater |then exp will go past 0 (neg) if | ||
321 | | ;it is just shifted | ||
322 | bsr nrm_set |else exp won't go past 0 | ||
323 | moveml (%a7)+,%d2/%d3/%d5/%d6 | ||
324 | rts | ||
325 | greater: | ||
326 | movel %d2,%d6 |save ls mant in d6 | ||
327 | lsll %d0,%d2 |shift ls mant by count | ||
328 | lsll %d0,%d1 |shift ms mant by count | ||
329 | movel #32,%d5 | ||
330 | subl %d0,%d5 |make op a denorm by shifting bits | ||
331 | lsrl %d5,%d6 |by the number in the exp, then | ||
332 | | ;set exp = 0. | ||
333 | orl %d6,%d1 |shift the ls mant bits into the ms mant | ||
334 | movel #0,%d0 |same as if decremented exp to 0 | ||
335 | | ;while shifting | ||
336 | movew %d0,LOCAL_EX(%a0) | ||
337 | movel %d1,LOCAL_HI(%a0) | ||
338 | movel %d2,LOCAL_LO(%a0) | ||
339 | moveml (%a7)+,%d2/%d3/%d5/%d6 | ||
340 | rts | ||
341 | ms_clr: | ||
342 | bfffo %d2{#0:#32},%d3 |check if any bits set in ls mant | ||
343 | beqs all_clr |branch if none set | ||
344 | addw #32,%d3 | ||
345 | cmpw %d3,%d0 |if X>Y | ||
346 | bmis greater |then branch | ||
347 | bsr nrm_set |else exp won't go past 0 | ||
348 | moveml (%a7)+,%d2/%d3/%d5/%d6 | ||
349 | rts | ||
350 | all_clr: | ||
351 | movew #0,LOCAL_EX(%a0) |no mantissa bits set. Set exp = 0. | ||
352 | moveml (%a7)+,%d2/%d3/%d5/%d6 | ||
353 | rts | ||
354 | | | ||
355 | | NRM_SET | ||
356 | | | ||
357 | .global nrm_set | ||
358 | nrm_set: | ||
359 | movel %d7,-(%a7) | ||
360 | bfffo LOCAL_HI(%a0){#0:#32},%d7 |find first 1 in ms mant to d7) | ||
361 | beqs lower |branch if ms mant is all 0's | ||
362 | |||
363 | movel %d6,-(%a7) | ||
364 | |||
365 | subw %d7,LOCAL_EX(%a0) |sub exponent by count | ||
366 | movel LOCAL_HI(%a0),%d0 |d0 has ms mant | ||
367 | movel LOCAL_LO(%a0),%d1 |d1 has ls mant | ||
368 | |||
369 | lsll %d7,%d0 |shift first 1 to j bit position | ||
370 | movel %d1,%d6 |copy ls mant into d6 | ||
371 | lsll %d7,%d6 |shift ls mant by count | ||
372 | movel %d6,LOCAL_LO(%a0) |store ls mant into memory | ||
373 | moveql #32,%d6 | ||
374 | subl %d7,%d6 |continue shift | ||
375 | lsrl %d6,%d1 |shift off all bits but those that will | ||
376 | | ;be shifted into ms mant | ||
377 | orl %d1,%d0 |shift the ls mant bits into the ms mant | ||
378 | movel %d0,LOCAL_HI(%a0) |store ms mant into memory | ||
379 | moveml (%a7)+,%d7/%d6 |restore registers | ||
380 | rts | ||
381 | |||
382 | | | ||
383 | | We get here if ms mant was = 0, and we assume ls mant has bits | ||
384 | | set (otherwise this would have been tagged a zero not a denorm). | ||
385 | | | ||
386 | lower: | ||
387 | movew LOCAL_EX(%a0),%d0 |d0 has exponent | ||
388 | movel LOCAL_LO(%a0),%d1 |d1 has ls mant | ||
389 | subw #32,%d0 |account for ms mant being all zeros | ||
390 | bfffo %d1{#0:#32},%d7 |find first 1 in ls mant to d7) | ||
391 | subw %d7,%d0 |subtract shift count from exp | ||
392 | lsll %d7,%d1 |shift first 1 to integer bit in ms mant | ||
393 | movew %d0,LOCAL_EX(%a0) |store ms mant | ||
394 | movel %d1,LOCAL_HI(%a0) |store exp | ||
395 | clrl LOCAL_LO(%a0) |clear ls mant | ||
396 | movel (%a7)+,%d7 | ||
397 | rts | ||
398 | | | ||
399 | | denorm --- denormalize an intermediate result | ||
400 | | | ||
401 | | Used by underflow. | ||
402 | | | ||
403 | | Input: | ||
404 | | a0 points to the operand to be denormalized | ||
405 | | (in the internal extended format) | ||
406 | | | ||
407 | | d0: rounding precision | ||
408 | | Output: | ||
409 | | a0 points to the denormalized result | ||
410 | | (in the internal extended format) | ||
411 | | | ||
412 | | d0 is guard,round,sticky | ||
413 | | | ||
414 | | d0 comes into this routine with the rounding precision. It | ||
415 | | is then loaded with the denormalized exponent threshold for the | ||
416 | | rounding precision. | ||
417 | | | ||
418 | |||
419 | .global denorm | ||
420 | denorm: | ||
421 | btstb #6,LOCAL_EX(%a0) |check for exponents between $7fff-$4000 | ||
422 | beqs no_sgn_ext | ||
423 | bsetb #7,LOCAL_EX(%a0) |sign extend if it is so | ||
424 | no_sgn_ext: | ||
425 | |||
426 | cmpib #0,%d0 |if 0 then extended precision | ||
427 | bnes not_ext |else branch | ||
428 | |||
429 | clrl %d1 |load d1 with ext threshold | ||
430 | clrl %d0 |clear the sticky flag | ||
431 | bsr dnrm_lp |denormalize the number | ||
432 | tstb %d1 |check for inex | ||
433 | beq no_inex |if clr, no inex | ||
434 | bras dnrm_inex |if set, set inex | ||
435 | |||
436 | not_ext: | ||
437 | cmpil #1,%d0 |if 1 then single precision | ||
438 | beqs load_sgl |else must be 2, double prec | ||
439 | |||
440 | load_dbl: | ||
441 | movew #dbl_thresh,%d1 |put copy of threshold in d1 | ||
442 | movel %d1,%d0 |copy d1 into d0 | ||
443 | subw LOCAL_EX(%a0),%d0 |diff = threshold - exp | ||
444 | cmpw #67,%d0 |if diff > 67 (mant + grs bits) | ||
445 | bpls chk_stky |then branch (all bits would be | ||
446 | | ; shifted off in denorm routine) | ||
447 | clrl %d0 |else clear the sticky flag | ||
448 | bsr dnrm_lp |denormalize the number | ||
449 | tstb %d1 |check flag | ||
450 | beqs no_inex |if clr, no inex | ||
451 | bras dnrm_inex |if set, set inex | ||
452 | |||
453 | load_sgl: | ||
454 | movew #sgl_thresh,%d1 |put copy of threshold in d1 | ||
455 | movel %d1,%d0 |copy d1 into d0 | ||
456 | subw LOCAL_EX(%a0),%d0 |diff = threshold - exp | ||
457 | cmpw #67,%d0 |if diff > 67 (mant + grs bits) | ||
458 | bpls chk_stky |then branch (all bits would be | ||
459 | | ; shifted off in denorm routine) | ||
460 | clrl %d0 |else clear the sticky flag | ||
461 | bsr dnrm_lp |denormalize the number | ||
462 | tstb %d1 |check flag | ||
463 | beqs no_inex |if clr, no inex | ||
464 | bras dnrm_inex |if set, set inex | ||
465 | |||
466 | chk_stky: | ||
467 | tstl LOCAL_HI(%a0) |check for any bits set | ||
468 | bnes set_stky | ||
469 | tstl LOCAL_LO(%a0) |check for any bits set | ||
470 | bnes set_stky | ||
471 | bras clr_mant | ||
472 | set_stky: | ||
473 | orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex | ||
474 | movel #0x20000000,%d0 |set sticky bit in return value | ||
475 | clr_mant: | ||
476 | movew %d1,LOCAL_EX(%a0) |load exp with threshold | ||
477 | movel #0,LOCAL_HI(%a0) |set d1 = 0 (ms mantissa) | ||
478 | movel #0,LOCAL_LO(%a0) |set d2 = 0 (ms mantissa) | ||
479 | rts | ||
480 | dnrm_inex: | ||
481 | orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex | ||
482 | no_inex: | ||
483 | rts | ||
484 | |||
485 | | | ||
486 | | dnrm_lp --- normalize exponent/mantissa to specified threshold | ||
487 | | | ||
488 | | Input: | ||
489 | | a0 points to the operand to be denormalized | ||
490 | | d0{31:29} initial guard,round,sticky | ||
491 | | d1{15:0} denormalization threshold | ||
492 | | Output: | ||
493 | | a0 points to the denormalized operand | ||
494 | | d0{31:29} final guard,round,sticky | ||
495 | | d1.b inexact flag: all ones means inexact result | ||
496 | | | ||
497 | | The LOCAL_LO and LOCAL_GRS parts of the value are copied to FP_SCR2 | ||
498 | | so that bfext can be used to extract the new low part of the mantissa. | ||
499 | | Dnrm_lp can be called with a0 pointing to ETEMP or WBTEMP and there | ||
500 | | is no LOCAL_GRS scratch word following it on the fsave frame. | ||
501 | | | ||
502 | .global dnrm_lp | ||
503 | dnrm_lp: | ||
504 | movel %d2,-(%sp) |save d2 for temp use | ||
505 | btstb #E3,E_BYTE(%a6) |test for type E3 exception | ||
506 | beqs not_E3 |not type E3 exception | ||
507 | bfextu WBTEMP_GRS(%a6){#6:#3},%d2 |extract guard,round, sticky bit | ||
508 | movel #29,%d0 | ||
509 | lsll %d0,%d2 |shift g,r,s to their positions | ||
510 | movel %d2,%d0 | ||
511 | not_E3: | ||
512 | movel (%sp)+,%d2 |restore d2 | ||
513 | movel LOCAL_LO(%a0),FP_SCR2+LOCAL_LO(%a6) | ||
514 | movel %d0,FP_SCR2+LOCAL_GRS(%a6) | ||
515 | movel %d1,%d0 |copy the denorm threshold | ||
516 | subw LOCAL_EX(%a0),%d1 |d1 = threshold - uns exponent | ||
517 | bles no_lp |d1 <= 0 | ||
518 | cmpw #32,%d1 | ||
519 | blts case_1 |0 = d1 < 32 | ||
520 | cmpw #64,%d1 | ||
521 | blts case_2 |32 <= d1 < 64 | ||
522 | bra case_3 |d1 >= 64 | ||
523 | | | ||
524 | | No normalization necessary | ||
525 | | | ||
526 | no_lp: | ||
527 | clrb %d1 |set no inex2 reported | ||
528 | movel FP_SCR2+LOCAL_GRS(%a6),%d0 |restore original g,r,s | ||
529 | rts | ||
530 | | | ||
531 | | case (0<d1<32) | ||
532 | | | ||
533 | case_1: | ||
534 | movel %d2,-(%sp) | ||
535 | movew %d0,LOCAL_EX(%a0) |exponent = denorm threshold | ||
536 | movel #32,%d0 | ||
537 | subw %d1,%d0 |d0 = 32 - d1 | ||
538 | bfextu LOCAL_EX(%a0){%d0:#32},%d2 | ||
539 | bfextu %d2{%d1:%d0},%d2 |d2 = new LOCAL_HI | ||
540 | bfextu LOCAL_HI(%a0){%d0:#32},%d1 |d1 = new LOCAL_LO | ||
541 | bfextu FP_SCR2+LOCAL_LO(%a6){%d0:#32},%d0 |d0 = new G,R,S | ||
542 | movel %d2,LOCAL_HI(%a0) |store new LOCAL_HI | ||
543 | movel %d1,LOCAL_LO(%a0) |store new LOCAL_LO | ||
544 | clrb %d1 | ||
545 | bftst %d0{#2:#30} | ||
546 | beqs c1nstky | ||
547 | bsetl #rnd_stky_bit,%d0 | ||
548 | st %d1 | ||
549 | c1nstky: | ||
550 | movel FP_SCR2+LOCAL_GRS(%a6),%d2 |restore original g,r,s | ||
551 | andil #0xe0000000,%d2 |clear all but G,R,S | ||
552 | tstl %d2 |test if original G,R,S are clear | ||
553 | beqs grs_clear | ||
554 | orl #0x20000000,%d0 |set sticky bit in d0 | ||
555 | grs_clear: | ||
556 | andil #0xe0000000,%d0 |clear all but G,R,S | ||
557 | movel (%sp)+,%d2 | ||
558 | rts | ||
559 | | | ||
560 | | case (32<=d1<64) | ||
561 | | | ||
562 | case_2: | ||
563 | movel %d2,-(%sp) | ||
564 | movew %d0,LOCAL_EX(%a0) |unsigned exponent = threshold | ||
565 | subw #32,%d1 |d1 now between 0 and 32 | ||
566 | movel #32,%d0 | ||
567 | subw %d1,%d0 |d0 = 32 - d1 | ||
568 | bfextu LOCAL_EX(%a0){%d0:#32},%d2 | ||
569 | bfextu %d2{%d1:%d0},%d2 |d2 = new LOCAL_LO | ||
570 | bfextu LOCAL_HI(%a0){%d0:#32},%d1 |d1 = new G,R,S | ||
571 | bftst %d1{#2:#30} | ||
572 | bnes c2_sstky |bra if sticky bit to be set | ||
573 | bftst FP_SCR2+LOCAL_LO(%a6){%d0:#32} | ||
574 | bnes c2_sstky |bra if sticky bit to be set | ||
575 | movel %d1,%d0 | ||
576 | clrb %d1 | ||
577 | bras end_c2 | ||
578 | c2_sstky: | ||
579 | movel %d1,%d0 | ||
580 | bsetl #rnd_stky_bit,%d0 | ||
581 | st %d1 | ||
582 | end_c2: | ||
583 | clrl LOCAL_HI(%a0) |store LOCAL_HI = 0 | ||
584 | movel %d2,LOCAL_LO(%a0) |store LOCAL_LO | ||
585 | movel FP_SCR2+LOCAL_GRS(%a6),%d2 |restore original g,r,s | ||
586 | andil #0xe0000000,%d2 |clear all but G,R,S | ||
587 | tstl %d2 |test if original G,R,S are clear | ||
588 | beqs clear_grs | ||
589 | orl #0x20000000,%d0 |set sticky bit in d0 | ||
590 | clear_grs: | ||
591 | andil #0xe0000000,%d0 |get rid of all but G,R,S | ||
592 | movel (%sp)+,%d2 | ||
593 | rts | ||
594 | | | ||
595 | | d1 >= 64 Force the exponent to be the denorm threshold with the | ||
596 | | correct sign. | ||
597 | | | ||
598 | case_3: | ||
599 | movew %d0,LOCAL_EX(%a0) | ||
600 | tstw LOCAL_SGN(%a0) | ||
601 | bges c3con | ||
602 | c3neg: | ||
603 | orl #0x80000000,LOCAL_EX(%a0) | ||
604 | c3con: | ||
605 | cmpw #64,%d1 | ||
606 | beqs sixty_four | ||
607 | cmpw #65,%d1 | ||
608 | beqs sixty_five | ||
609 | | | ||
610 | | Shift value is out of range. Set d1 for inex2 flag and | ||
611 | | return a zero with the given threshold. | ||
612 | | | ||
613 | clrl LOCAL_HI(%a0) | ||
614 | clrl LOCAL_LO(%a0) | ||
615 | movel #0x20000000,%d0 | ||
616 | st %d1 | ||
617 | rts | ||
618 | |||
619 | sixty_four: | ||
620 | movel LOCAL_HI(%a0),%d0 | ||
621 | bfextu %d0{#2:#30},%d1 | ||
622 | andil #0xc0000000,%d0 | ||
623 | bras c3com | ||
624 | |||
625 | sixty_five: | ||
626 | movel LOCAL_HI(%a0),%d0 | ||
627 | bfextu %d0{#1:#31},%d1 | ||
628 | andil #0x80000000,%d0 | ||
629 | lsrl #1,%d0 |shift high bit into R bit | ||
630 | |||
631 | c3com: | ||
632 | tstl %d1 | ||
633 | bnes c3ssticky | ||
634 | tstl LOCAL_LO(%a0) | ||
635 | bnes c3ssticky | ||
636 | tstb FP_SCR2+LOCAL_GRS(%a6) | ||
637 | bnes c3ssticky | ||
638 | clrb %d1 | ||
639 | bras c3end | ||
640 | |||
641 | c3ssticky: | ||
642 | bsetl #rnd_stky_bit,%d0 | ||
643 | st %d1 | ||
644 | c3end: | ||
645 | clrl LOCAL_HI(%a0) | ||
646 | clrl LOCAL_LO(%a0) | ||
647 | rts | ||
648 | |||
649 | |end | ||
diff --git a/arch/m68k/fpsp040/sacos.S b/arch/m68k/fpsp040/sacos.S new file mode 100644 index 000000000000..83b00ab1c48f --- /dev/null +++ b/arch/m68k/fpsp040/sacos.S | |||
@@ -0,0 +1,115 @@ | |||
1 | | | ||
2 | | sacos.sa 3.3 12/19/90 | ||
3 | | | ||
4 | | Description: The entry point sAcos computes the inverse cosine of | ||
5 | | an input argument; sAcosd does the same except for denormalized | ||
6 | | input. | ||
7 | | | ||
8 | | Input: Double-extended number X in location pointed to | ||
9 | | by address register a0. | ||
10 | | | ||
11 | | Output: The value arccos(X) returned in floating-point register Fp0. | ||
12 | | | ||
13 | | Accuracy and Monotonicity: The returned result is within 3 ulps in | ||
14 | | 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the | ||
15 | | result is subsequently rounded to double precision. The | ||
16 | | result is provably monotonic in double precision. | ||
17 | | | ||
18 | | Speed: The program sCOS takes approximately 310 cycles. | ||
19 | | | ||
20 | | Algorithm: | ||
21 | | | ||
22 | | ACOS | ||
23 | | 1. If |X| >= 1, go to 3. | ||
24 | | | ||
25 | | 2. (|X| < 1) Calculate acos(X) by | ||
26 | | z := (1-X) / (1+X) | ||
27 | | acos(X) = 2 * atan( sqrt(z) ). | ||
28 | | Exit. | ||
29 | | | ||
30 | | 3. If |X| > 1, go to 5. | ||
31 | | | ||
32 | | 4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit. | ||
33 | | | ||
34 | | 5. (|X| > 1) Generate an invalid operation by 0 * infinity. | ||
35 | | Exit. | ||
36 | | | ||
37 | |||
38 | | Copyright (C) Motorola, Inc. 1990 | ||
39 | | All Rights Reserved | ||
40 | | | ||
41 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
42 | | The copyright notice above does not evidence any | ||
43 | | actual or intended publication of such source code. | ||
44 | |||
45 | |SACOS idnt 2,1 | Motorola 040 Floating Point Software Package | ||
46 | |||
47 | |section 8 | ||
48 | |||
49 | PI: .long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000 | ||
50 | PIBY2: .long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000 | ||
51 | |||
52 | |xref t_operr | ||
53 | |xref t_frcinx | ||
54 | |xref satan | ||
55 | |||
56 | .global sacosd | ||
57 | sacosd: | ||
58 | |--ACOS(X) = PI/2 FOR DENORMALIZED X | ||
59 | fmovel %d1,%fpcr | ...load user's rounding mode/precision | ||
60 | fmovex PIBY2,%fp0 | ||
61 | bra t_frcinx | ||
62 | |||
63 | .global sacos | ||
64 | sacos: | ||
65 | fmovex (%a0),%fp0 | ...LOAD INPUT | ||
66 | |||
67 | movel (%a0),%d0 | ...pack exponent with upper 16 fraction | ||
68 | movew 4(%a0),%d0 | ||
69 | andil #0x7FFFFFFF,%d0 | ||
70 | cmpil #0x3FFF8000,%d0 | ||
71 | bges ACOSBIG | ||
72 | |||
73 | |--THIS IS THE USUAL CASE, |X| < 1 | ||
74 | |--ACOS(X) = 2 * ATAN( SQRT( (1-X)/(1+X) ) ) | ||
75 | |||
76 | fmoves #0x3F800000,%fp1 | ||
77 | faddx %fp0,%fp1 | ...1+X | ||
78 | fnegx %fp0 | ... -X | ||
79 | fadds #0x3F800000,%fp0 | ...1-X | ||
80 | fdivx %fp1,%fp0 | ...(1-X)/(1+X) | ||
81 | fsqrtx %fp0 | ...SQRT((1-X)/(1+X)) | ||
82 | fmovemx %fp0-%fp0,(%a0) | ...overwrite input | ||
83 | movel %d1,-(%sp) |save original users fpcr | ||
84 | clrl %d1 | ||
85 | bsr satan | ...ATAN(SQRT([1-X]/[1+X])) | ||
86 | fmovel (%sp)+,%fpcr |restore users exceptions | ||
87 | faddx %fp0,%fp0 | ...2 * ATAN( STUFF ) | ||
88 | bra t_frcinx | ||
89 | |||
90 | ACOSBIG: | ||
91 | fabsx %fp0 | ||
92 | fcmps #0x3F800000,%fp0 | ||
93 | fbgt t_operr |cause an operr exception | ||
94 | |||
95 | |--|X| = 1, ACOS(X) = 0 OR PI | ||
96 | movel (%a0),%d0 | ...pack exponent with upper 16 fraction | ||
97 | movew 4(%a0),%d0 | ||
98 | cmpl #0,%d0 |D0 has original exponent+fraction | ||
99 | bgts ACOSP1 | ||
100 | |||
101 | |--X = -1 | ||
102 | |Returns PI and inexact exception | ||
103 | fmovex PI,%fp0 | ||
104 | fmovel %d1,%FPCR | ||
105 | fadds #0x00800000,%fp0 |cause an inexact exception to be put | ||
106 | | ;into the 040 - will not trap until next | ||
107 | | ;fp inst. | ||
108 | bra t_frcinx | ||
109 | |||
110 | ACOSP1: | ||
111 | fmovel %d1,%FPCR | ||
112 | fmoves #0x00000000,%fp0 | ||
113 | rts |Facos ; of +1 is exact | ||
114 | |||
115 | |end | ||
diff --git a/arch/m68k/fpsp040/sasin.S b/arch/m68k/fpsp040/sasin.S new file mode 100644 index 000000000000..5647a6043903 --- /dev/null +++ b/arch/m68k/fpsp040/sasin.S | |||
@@ -0,0 +1,104 @@ | |||
1 | | | ||
2 | | sasin.sa 3.3 12/19/90 | ||
3 | | | ||
4 | | Description: The entry point sAsin computes the inverse sine of | ||
5 | | an input argument; sAsind does the same except for denormalized | ||
6 | | input. | ||
7 | | | ||
8 | | Input: Double-extended number X in location pointed to | ||
9 | | by address register a0. | ||
10 | | | ||
11 | | Output: The value arcsin(X) returned in floating-point register Fp0. | ||
12 | | | ||
13 | | Accuracy and Monotonicity: The returned result is within 3 ulps in | ||
14 | | 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the | ||
15 | | result is subsequently rounded to double precision. The | ||
16 | | result is provably monotonic in double precision. | ||
17 | | | ||
18 | | Speed: The program sASIN takes approximately 310 cycles. | ||
19 | | | ||
20 | | Algorithm: | ||
21 | | | ||
22 | | ASIN | ||
23 | | 1. If |X| >= 1, go to 3. | ||
24 | | | ||
25 | | 2. (|X| < 1) Calculate asin(X) by | ||
26 | | z := sqrt( [1-X][1+X] ) | ||
27 | | asin(X) = atan( x / z ). | ||
28 | | Exit. | ||
29 | | | ||
30 | | 3. If |X| > 1, go to 5. | ||
31 | | | ||
32 | | 4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit. | ||
33 | | | ||
34 | | 5. (|X| > 1) Generate an invalid operation by 0 * infinity. | ||
35 | | Exit. | ||
36 | | | ||
37 | |||
38 | | Copyright (C) Motorola, Inc. 1990 | ||
39 | | All Rights Reserved | ||
40 | | | ||
41 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
42 | | The copyright notice above does not evidence any | ||
43 | | actual or intended publication of such source code. | ||
44 | |||
45 | |SASIN idnt 2,1 | Motorola 040 Floating Point Software Package | ||
46 | |||
47 | |section 8 | ||
48 | |||
49 | PIBY2: .long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000 | ||
50 | |||
51 | |xref t_operr | ||
52 | |xref t_frcinx | ||
53 | |xref t_extdnrm | ||
54 | |xref satan | ||
55 | |||
56 | .global sasind | ||
57 | sasind: | ||
58 | |--ASIN(X) = X FOR DENORMALIZED X | ||
59 | |||
60 | bra t_extdnrm | ||
61 | |||
62 | .global sasin | ||
63 | sasin: | ||
64 | fmovex (%a0),%fp0 | ...LOAD INPUT | ||
65 | |||
66 | movel (%a0),%d0 | ||
67 | movew 4(%a0),%d0 | ||
68 | andil #0x7FFFFFFF,%d0 | ||
69 | cmpil #0x3FFF8000,%d0 | ||
70 | bges asinbig | ||
71 | |||
72 | |--THIS IS THE USUAL CASE, |X| < 1 | ||
73 | |--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) ) | ||
74 | |||
75 | fmoves #0x3F800000,%fp1 | ||
76 | fsubx %fp0,%fp1 | ...1-X | ||
77 | fmovemx %fp2-%fp2,-(%a7) | ||
78 | fmoves #0x3F800000,%fp2 | ||
79 | faddx %fp0,%fp2 | ...1+X | ||
80 | fmulx %fp2,%fp1 | ...(1+X)(1-X) | ||
81 | fmovemx (%a7)+,%fp2-%fp2 | ||
82 | fsqrtx %fp1 | ...SQRT([1-X][1+X]) | ||
83 | fdivx %fp1,%fp0 | ...X/SQRT([1-X][1+X]) | ||
84 | fmovemx %fp0-%fp0,(%a0) | ||
85 | bsr satan | ||
86 | bra t_frcinx | ||
87 | |||
88 | asinbig: | ||
89 | fabsx %fp0 | ...|X| | ||
90 | fcmps #0x3F800000,%fp0 | ||
91 | fbgt t_operr |cause an operr exception | ||
92 | |||
93 | |--|X| = 1, ASIN(X) = +- PI/2. | ||
94 | |||
95 | fmovex PIBY2,%fp0 | ||
96 | movel (%a0),%d0 | ||
97 | andil #0x80000000,%d0 | ...SIGN BIT OF X | ||
98 | oril #0x3F800000,%d0 | ...+-1 IN SGL FORMAT | ||
99 | movel %d0,-(%sp) | ...push SIGN(X) IN SGL-FMT | ||
100 | fmovel %d1,%FPCR | ||
101 | fmuls (%sp)+,%fp0 | ||
102 | bra t_frcinx | ||
103 | |||
104 | |end | ||
diff --git a/arch/m68k/fpsp040/satan.S b/arch/m68k/fpsp040/satan.S new file mode 100644 index 000000000000..20dae222d51e --- /dev/null +++ b/arch/m68k/fpsp040/satan.S | |||
@@ -0,0 +1,478 @@ | |||
1 | | | ||
2 | | satan.sa 3.3 12/19/90 | ||
3 | | | ||
4 | | The entry point satan computes the arctangent of an | ||
5 | | input value. satand does the same except the input value is a | ||
6 | | denormalized number. | ||
7 | | | ||
8 | | Input: Double-extended value in memory location pointed to by address | ||
9 | | register a0. | ||
10 | | | ||
11 | | Output: Arctan(X) returned in floating-point register Fp0. | ||
12 | | | ||
13 | | Accuracy and Monotonicity: The returned result is within 2 ulps in | ||
14 | | 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the | ||
15 | | result is subsequently rounded to double precision. The | ||
16 | | result is provably monotonic in double precision. | ||
17 | | | ||
18 | | Speed: The program satan takes approximately 160 cycles for input | ||
19 | | argument X such that 1/16 < |X| < 16. For the other arguments, | ||
20 | | the program will run no worse than 10% slower. | ||
21 | | | ||
22 | | Algorithm: | ||
23 | | Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5. | ||
24 | | | ||
25 | | Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. Note that k = -4, -3,..., or 3. | ||
26 | | Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 significant bits | ||
27 | | of X with a bit-1 attached at the 6-th bit position. Define u | ||
28 | | to be u = (X-F) / (1 + X*F). | ||
29 | | | ||
30 | | Step 3. Approximate arctan(u) by a polynomial poly. | ||
31 | | | ||
32 | | Step 4. Return arctan(F) + poly, arctan(F) is fetched from a table of values | ||
33 | | calculated beforehand. Exit. | ||
34 | | | ||
35 | | Step 5. If |X| >= 16, go to Step 7. | ||
36 | | | ||
37 | | Step 6. Approximate arctan(X) by an odd polynomial in X. Exit. | ||
38 | | | ||
39 | | Step 7. Define X' = -1/X. Approximate arctan(X') by an odd polynomial in X'. | ||
40 | | Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit. | ||
41 | | | ||
42 | |||
43 | | Copyright (C) Motorola, Inc. 1990 | ||
44 | | All Rights Reserved | ||
45 | | | ||
46 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
47 | | The copyright notice above does not evidence any | ||
48 | | actual or intended publication of such source code. | ||
49 | |||
50 | |satan idnt 2,1 | Motorola 040 Floating Point Software Package | ||
51 | |||
52 | |section 8 | ||
53 | |||
54 | #include "fpsp.h" | ||
55 | |||
56 | BOUNDS1: .long 0x3FFB8000,0x4002FFFF | ||
57 | |||
58 | ONE: .long 0x3F800000 | ||
59 | |||
60 | .long 0x00000000 | ||
61 | |||
62 | ATANA3: .long 0xBFF6687E,0x314987D8 | ||
63 | ATANA2: .long 0x4002AC69,0x34A26DB3 | ||
64 | |||
65 | ATANA1: .long 0xBFC2476F,0x4E1DA28E | ||
66 | ATANB6: .long 0x3FB34444,0x7F876989 | ||
67 | |||
68 | ATANB5: .long 0xBFB744EE,0x7FAF45DB | ||
69 | ATANB4: .long 0x3FBC71C6,0x46940220 | ||
70 | |||
71 | ATANB3: .long 0xBFC24924,0x921872F9 | ||
72 | ATANB2: .long 0x3FC99999,0x99998FA9 | ||
73 | |||
74 | ATANB1: .long 0xBFD55555,0x55555555 | ||
75 | ATANC5: .long 0xBFB70BF3,0x98539E6A | ||
76 | |||
77 | ATANC4: .long 0x3FBC7187,0x962D1D7D | ||
78 | ATANC3: .long 0xBFC24924,0x827107B8 | ||
79 | |||
80 | ATANC2: .long 0x3FC99999,0x9996263E | ||
81 | ATANC1: .long 0xBFD55555,0x55555536 | ||
82 | |||
83 | PPIBY2: .long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000 | ||
84 | NPIBY2: .long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000 | ||
85 | PTINY: .long 0x00010000,0x80000000,0x00000000,0x00000000 | ||
86 | NTINY: .long 0x80010000,0x80000000,0x00000000,0x00000000 | ||
87 | |||
88 | ATANTBL: | ||
89 | .long 0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000 | ||
90 | .long 0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000 | ||
91 | .long 0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000 | ||
92 | .long 0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000 | ||
93 | .long 0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000 | ||
94 | .long 0x3FFB0000,0xAB98E943,0x62765619,0x00000000 | ||
95 | .long 0x3FFB0000,0xB389E502,0xF9C59862,0x00000000 | ||
96 | .long 0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000 | ||
97 | .long 0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000 | ||
98 | .long 0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000 | ||
99 | .long 0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000 | ||
100 | .long 0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000 | ||
101 | .long 0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000 | ||
102 | .long 0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000 | ||
103 | .long 0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000 | ||
104 | .long 0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000 | ||
105 | .long 0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000 | ||
106 | .long 0x3FFC0000,0x8B232A08,0x304282D8,0x00000000 | ||
107 | .long 0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000 | ||
108 | .long 0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000 | ||
109 | .long 0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000 | ||
110 | .long 0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000 | ||
111 | .long 0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000 | ||
112 | .long 0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000 | ||
113 | .long 0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000 | ||
114 | .long 0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000 | ||
115 | .long 0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000 | ||
116 | .long 0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000 | ||
117 | .long 0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000 | ||
118 | .long 0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000 | ||
119 | .long 0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000 | ||
120 | .long 0x3FFC0000,0xF7170A28,0xECC06666,0x00000000 | ||
121 | .long 0x3FFD0000,0x812FD288,0x332DAD32,0x00000000 | ||
122 | .long 0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000 | ||
123 | .long 0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000 | ||
124 | .long 0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000 | ||
125 | .long 0x3FFD0000,0x9EB68949,0x3889A227,0x00000000 | ||
126 | .long 0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000 | ||
127 | .long 0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000 | ||
128 | .long 0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000 | ||
129 | .long 0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000 | ||
130 | .long 0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000 | ||
131 | .long 0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000 | ||
132 | .long 0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000 | ||
133 | .long 0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000 | ||
134 | .long 0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000 | ||
135 | .long 0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000 | ||
136 | .long 0x3FFD0000,0xEA2D764F,0x64315989,0x00000000 | ||
137 | .long 0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000 | ||
138 | .long 0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000 | ||
139 | .long 0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000 | ||
140 | .long 0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000 | ||
141 | .long 0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000 | ||
142 | .long 0x3FFE0000,0x97731420,0x365E538C,0x00000000 | ||
143 | .long 0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000 | ||
144 | .long 0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000 | ||
145 | .long 0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000 | ||
146 | .long 0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000 | ||
147 | .long 0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000 | ||
148 | .long 0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000 | ||
149 | .long 0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000 | ||
150 | .long 0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000 | ||
151 | .long 0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000 | ||
152 | .long 0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000 | ||
153 | .long 0x3FFE0000,0xCD000549,0xADEC7159,0x00000000 | ||
154 | .long 0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000 | ||
155 | .long 0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000 | ||
156 | .long 0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000 | ||
157 | .long 0x3FFE0000,0xE8771129,0xC4353259,0x00000000 | ||
158 | .long 0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000 | ||
159 | .long 0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000 | ||
160 | .long 0x3FFE0000,0xF919039D,0x758B8D41,0x00000000 | ||
161 | .long 0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000 | ||
162 | .long 0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000 | ||
163 | .long 0x3FFF0000,0x83889E35,0x49D108E1,0x00000000 | ||
164 | .long 0x3FFF0000,0x859CFA76,0x511D724B,0x00000000 | ||
165 | .long 0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000 | ||
166 | .long 0x3FFF0000,0x89732FD1,0x9557641B,0x00000000 | ||
167 | .long 0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000 | ||
168 | .long 0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000 | ||
169 | .long 0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000 | ||
170 | .long 0x3FFF0000,0x922DA7D7,0x91888487,0x00000000 | ||
171 | .long 0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000 | ||
172 | .long 0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000 | ||
173 | .long 0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000 | ||
174 | .long 0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000 | ||
175 | .long 0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000 | ||
176 | .long 0x3FFF0000,0x9F100575,0x006CC571,0x00000000 | ||
177 | .long 0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000 | ||
178 | .long 0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000 | ||
179 | .long 0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000 | ||
180 | .long 0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000 | ||
181 | .long 0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000 | ||
182 | .long 0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000 | ||
183 | .long 0x3FFF0000,0xA83A5153,0x0956168F,0x00000000 | ||
184 | .long 0x3FFF0000,0xA93A2007,0x7539546E,0x00000000 | ||
185 | .long 0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000 | ||
186 | .long 0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000 | ||
187 | .long 0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000 | ||
188 | .long 0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000 | ||
189 | .long 0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000 | ||
190 | .long 0x3FFF0000,0xB1846515,0x0F71496A,0x00000000 | ||
191 | .long 0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000 | ||
192 | .long 0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000 | ||
193 | .long 0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000 | ||
194 | .long 0x3FFF0000,0xB525529D,0x562246BD,0x00000000 | ||
195 | .long 0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000 | ||
196 | .long 0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000 | ||
197 | .long 0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000 | ||
198 | .long 0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000 | ||
199 | .long 0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000 | ||
200 | .long 0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000 | ||
201 | .long 0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000 | ||
202 | .long 0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000 | ||
203 | .long 0x3FFF0000,0xBB471285,0x7637E17D,0x00000000 | ||
204 | .long 0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000 | ||
205 | .long 0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000 | ||
206 | .long 0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000 | ||
207 | .long 0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000 | ||
208 | .long 0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000 | ||
209 | .long 0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000 | ||
210 | .long 0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000 | ||
211 | .long 0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000 | ||
212 | .long 0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000 | ||
213 | .long 0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000 | ||
214 | .long 0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000 | ||
215 | .long 0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000 | ||
216 | .long 0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000 | ||
217 | |||
218 | .set X,FP_SCR1 | ||
219 | .set XDCARE,X+2 | ||
220 | .set XFRAC,X+4 | ||
221 | .set XFRACLO,X+8 | ||
222 | |||
223 | .set ATANF,FP_SCR2 | ||
224 | .set ATANFHI,ATANF+4 | ||
225 | .set ATANFLO,ATANF+8 | ||
226 | |||
227 | |||
228 | | xref t_frcinx | ||
229 | |xref t_extdnrm | ||
230 | |||
231 | .global satand | ||
232 | satand: | ||
233 | |--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT | ||
234 | |||
235 | bra t_extdnrm | ||
236 | |||
237 | .global satan | ||
238 | satan: | ||
239 | |--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S | ||
240 | |||
241 | fmovex (%a0),%fp0 | ...LOAD INPUT | ||
242 | |||
243 | movel (%a0),%d0 | ||
244 | movew 4(%a0),%d0 | ||
245 | fmovex %fp0,X(%a6) | ||
246 | andil #0x7FFFFFFF,%d0 | ||
247 | |||
248 | cmpil #0x3FFB8000,%d0 | ...|X| >= 1/16? | ||
249 | bges ATANOK1 | ||
250 | bra ATANSM | ||
251 | |||
252 | ATANOK1: | ||
253 | cmpil #0x4002FFFF,%d0 | ...|X| < 16 ? | ||
254 | bles ATANMAIN | ||
255 | bra ATANBIG | ||
256 | |||
257 | |||
258 | |--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE | ||
259 | |--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ). | ||
260 | |--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN | ||
261 | |--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE | ||
262 | |--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS | ||
263 | |--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR | ||
264 | |--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO | ||
265 | |--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE | ||
266 | |--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL | ||
267 | |--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE | ||
268 | |--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION | ||
269 | |--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION | ||
270 | |--WILL INVOLVE A VERY LONG POLYNOMIAL. | ||
271 | |||
272 | |--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS | ||
273 | |--WE CHOSE F TO BE +-2^K * 1.BBBB1 | ||
274 | |--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE | ||
275 | |--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE | ||
276 | |--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS | ||
277 | |-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|). | ||
278 | |||
279 | ATANMAIN: | ||
280 | |||
281 | movew #0x0000,XDCARE(%a6) | ...CLEAN UP X JUST IN CASE | ||
282 | andil #0xF8000000,XFRAC(%a6) | ...FIRST 5 BITS | ||
283 | oril #0x04000000,XFRAC(%a6) | ...SET 6-TH BIT TO 1 | ||
284 | movel #0x00000000,XFRACLO(%a6) | ...LOCATION OF X IS NOW F | ||
285 | |||
286 | fmovex %fp0,%fp1 | ...FP1 IS X | ||
287 | fmulx X(%a6),%fp1 | ...FP1 IS X*F, NOTE THAT X*F > 0 | ||
288 | fsubx X(%a6),%fp0 | ...FP0 IS X-F | ||
289 | fadds #0x3F800000,%fp1 | ...FP1 IS 1 + X*F | ||
290 | fdivx %fp1,%fp0 | ...FP0 IS U = (X-F)/(1+X*F) | ||
291 | |||
292 | |--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|) | ||
293 | |--CREATE ATAN(F) AND STORE IT IN ATANF, AND | ||
294 | |--SAVE REGISTERS FP2. | ||
295 | |||
296 | movel %d2,-(%a7) | ...SAVE d2 TEMPORARILY | ||
297 | movel %d0,%d2 | ...THE EXPO AND 16 BITS OF X | ||
298 | andil #0x00007800,%d0 | ...4 VARYING BITS OF F'S FRACTION | ||
299 | andil #0x7FFF0000,%d2 | ...EXPONENT OF F | ||
300 | subil #0x3FFB0000,%d2 | ...K+4 | ||
301 | asrl #1,%d2 | ||
302 | addl %d2,%d0 | ...THE 7 BITS IDENTIFYING F | ||
303 | asrl #7,%d0 | ...INDEX INTO TBL OF ATAN(|F|) | ||
304 | lea ATANTBL,%a1 | ||
305 | addal %d0,%a1 | ...ADDRESS OF ATAN(|F|) | ||
306 | movel (%a1)+,ATANF(%a6) | ||
307 | movel (%a1)+,ATANFHI(%a6) | ||
308 | movel (%a1)+,ATANFLO(%a6) | ...ATANF IS NOW ATAN(|F|) | ||
309 | movel X(%a6),%d0 | ...LOAD SIGN AND EXPO. AGAIN | ||
310 | andil #0x80000000,%d0 | ...SIGN(F) | ||
311 | orl %d0,ATANF(%a6) | ...ATANF IS NOW SIGN(F)*ATAN(|F|) | ||
312 | movel (%a7)+,%d2 | ...RESTORE d2 | ||
313 | |||
314 | |--THAT'S ALL I HAVE TO DO FOR NOW, | ||
315 | |--BUT ALAS, THE DIVIDE IS STILL CRANKING! | ||
316 | |||
317 | |--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS | ||
318 | |--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U | ||
319 | |--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT. | ||
320 | |--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3)) | ||
321 | |--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3. | ||
322 | |--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT | ||
323 | |--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED | ||
324 | |||
325 | |||
326 | fmovex %fp0,%fp1 | ||
327 | fmulx %fp1,%fp1 | ||
328 | fmoved ATANA3,%fp2 | ||
329 | faddx %fp1,%fp2 | ...A3+V | ||
330 | fmulx %fp1,%fp2 | ...V*(A3+V) | ||
331 | fmulx %fp0,%fp1 | ...U*V | ||
332 | faddd ATANA2,%fp2 | ...A2+V*(A3+V) | ||
333 | fmuld ATANA1,%fp1 | ...A1*U*V | ||
334 | fmulx %fp2,%fp1 | ...A1*U*V*(A2+V*(A3+V)) | ||
335 | |||
336 | faddx %fp1,%fp0 | ...ATAN(U), FP1 RELEASED | ||
337 | fmovel %d1,%FPCR |restore users exceptions | ||
338 | faddx ATANF(%a6),%fp0 | ...ATAN(X) | ||
339 | bra t_frcinx | ||
340 | |||
341 | ATANBORS: | ||
342 | |--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED. | ||
343 | |--FP0 IS X AND |X| <= 1/16 OR |X| >= 16. | ||
344 | cmpil #0x3FFF8000,%d0 | ||
345 | bgt ATANBIG | ...I.E. |X| >= 16 | ||
346 | |||
347 | ATANSM: | ||
348 | |--|X| <= 1/16 | ||
349 | |--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE | ||
350 | |--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6))))) | ||
351 | |--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] ) | ||
352 | |--WHERE Y = X*X, AND Z = Y*Y. | ||
353 | |||
354 | cmpil #0x3FD78000,%d0 | ||
355 | blt ATANTINY | ||
356 | |--COMPUTE POLYNOMIAL | ||
357 | fmulx %fp0,%fp0 | ...FP0 IS Y = X*X | ||
358 | |||
359 | |||
360 | movew #0x0000,XDCARE(%a6) | ||
361 | |||
362 | fmovex %fp0,%fp1 | ||
363 | fmulx %fp1,%fp1 | ...FP1 IS Z = Y*Y | ||
364 | |||
365 | fmoved ATANB6,%fp2 | ||
366 | fmoved ATANB5,%fp3 | ||
367 | |||
368 | fmulx %fp1,%fp2 | ...Z*B6 | ||
369 | fmulx %fp1,%fp3 | ...Z*B5 | ||
370 | |||
371 | faddd ATANB4,%fp2 | ...B4+Z*B6 | ||
372 | faddd ATANB3,%fp3 | ...B3+Z*B5 | ||
373 | |||
374 | fmulx %fp1,%fp2 | ...Z*(B4+Z*B6) | ||
375 | fmulx %fp3,%fp1 | ...Z*(B3+Z*B5) | ||
376 | |||
377 | faddd ATANB2,%fp2 | ...B2+Z*(B4+Z*B6) | ||
378 | faddd ATANB1,%fp1 | ...B1+Z*(B3+Z*B5) | ||
379 | |||
380 | fmulx %fp0,%fp2 | ...Y*(B2+Z*(B4+Z*B6)) | ||
381 | fmulx X(%a6),%fp0 | ...X*Y | ||
382 | |||
383 | faddx %fp2,%fp1 | ...[B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))] | ||
384 | |||
385 | |||
386 | fmulx %fp1,%fp0 | ...X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) | ||
387 | |||
388 | fmovel %d1,%FPCR |restore users exceptions | ||
389 | faddx X(%a6),%fp0 | ||
390 | |||
391 | bra t_frcinx | ||
392 | |||
393 | ATANTINY: | ||
394 | |--|X| < 2^(-40), ATAN(X) = X | ||
395 | movew #0x0000,XDCARE(%a6) | ||
396 | |||
397 | fmovel %d1,%FPCR |restore users exceptions | ||
398 | fmovex X(%a6),%fp0 |last inst - possible exception set | ||
399 | |||
400 | bra t_frcinx | ||
401 | |||
402 | ATANBIG: | ||
403 | |--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE, | ||
404 | |--RETURN SIGN(X)*PI/2 + ATAN(-1/X). | ||
405 | cmpil #0x40638000,%d0 | ||
406 | bgt ATANHUGE | ||
407 | |||
408 | |--APPROXIMATE ATAN(-1/X) BY | ||
409 | |--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X' | ||
410 | |--THIS CAN BE RE-WRITTEN AS | ||
411 | |--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y. | ||
412 | |||
413 | fmoves #0xBF800000,%fp1 | ...LOAD -1 | ||
414 | fdivx %fp0,%fp1 | ...FP1 IS -1/X | ||
415 | |||
416 | |||
417 | |--DIVIDE IS STILL CRANKING | ||
418 | |||
419 | fmovex %fp1,%fp0 | ...FP0 IS X' | ||
420 | fmulx %fp0,%fp0 | ...FP0 IS Y = X'*X' | ||
421 | fmovex %fp1,X(%a6) | ...X IS REALLY X' | ||
422 | |||
423 | fmovex %fp0,%fp1 | ||
424 | fmulx %fp1,%fp1 | ...FP1 IS Z = Y*Y | ||
425 | |||
426 | fmoved ATANC5,%fp3 | ||
427 | fmoved ATANC4,%fp2 | ||
428 | |||
429 | fmulx %fp1,%fp3 | ...Z*C5 | ||
430 | fmulx %fp1,%fp2 | ...Z*B4 | ||
431 | |||
432 | faddd ATANC3,%fp3 | ...C3+Z*C5 | ||
433 | faddd ATANC2,%fp2 | ...C2+Z*C4 | ||
434 | |||
435 | fmulx %fp3,%fp1 | ...Z*(C3+Z*C5), FP3 RELEASED | ||
436 | fmulx %fp0,%fp2 | ...Y*(C2+Z*C4) | ||
437 | |||
438 | faddd ATANC1,%fp1 | ...C1+Z*(C3+Z*C5) | ||
439 | fmulx X(%a6),%fp0 | ...X'*Y | ||
440 | |||
441 | faddx %fp2,%fp1 | ...[Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)] | ||
442 | |||
443 | |||
444 | fmulx %fp1,%fp0 | ...X'*Y*([B1+Z*(B3+Z*B5)] | ||
445 | | ... +[Y*(B2+Z*(B4+Z*B6))]) | ||
446 | faddx X(%a6),%fp0 | ||
447 | |||
448 | fmovel %d1,%FPCR |restore users exceptions | ||
449 | |||
450 | btstb #7,(%a0) | ||
451 | beqs pos_big | ||
452 | |||
453 | neg_big: | ||
454 | faddx NPIBY2,%fp0 | ||
455 | bra t_frcinx | ||
456 | |||
457 | pos_big: | ||
458 | faddx PPIBY2,%fp0 | ||
459 | bra t_frcinx | ||
460 | |||
461 | ATANHUGE: | ||
462 | |--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY | ||
463 | btstb #7,(%a0) | ||
464 | beqs pos_huge | ||
465 | |||
466 | neg_huge: | ||
467 | fmovex NPIBY2,%fp0 | ||
468 | fmovel %d1,%fpcr | ||
469 | fsubx NTINY,%fp0 | ||
470 | bra t_frcinx | ||
471 | |||
472 | pos_huge: | ||
473 | fmovex PPIBY2,%fp0 | ||
474 | fmovel %d1,%fpcr | ||
475 | fsubx PTINY,%fp0 | ||
476 | bra t_frcinx | ||
477 | |||
478 | |end | ||
diff --git a/arch/m68k/fpsp040/satanh.S b/arch/m68k/fpsp040/satanh.S new file mode 100644 index 000000000000..20f07810bcda --- /dev/null +++ b/arch/m68k/fpsp040/satanh.S | |||
@@ -0,0 +1,104 @@ | |||
1 | | | ||
2 | | satanh.sa 3.3 12/19/90 | ||
3 | | | ||
4 | | The entry point satanh computes the inverse | ||
5 | | hyperbolic tangent of | ||
6 | | an input argument; satanhd does the same except for denormalized | ||
7 | | input. | ||
8 | | | ||
9 | | Input: Double-extended number X in location pointed to | ||
10 | | by address register a0. | ||
11 | | | ||
12 | | Output: The value arctanh(X) returned in floating-point register Fp0. | ||
13 | | | ||
14 | | Accuracy and Monotonicity: The returned result is within 3 ulps in | ||
15 | | 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the | ||
16 | | result is subsequently rounded to double precision. The | ||
17 | | result is provably monotonic in double precision. | ||
18 | | | ||
19 | | Speed: The program satanh takes approximately 270 cycles. | ||
20 | | | ||
21 | | Algorithm: | ||
22 | | | ||
23 | | ATANH | ||
24 | | 1. If |X| >= 1, go to 3. | ||
25 | | | ||
26 | | 2. (|X| < 1) Calculate atanh(X) by | ||
27 | | sgn := sign(X) | ||
28 | | y := |X| | ||
29 | | z := 2y/(1-y) | ||
30 | | atanh(X) := sgn * (1/2) * logp1(z) | ||
31 | | Exit. | ||
32 | | | ||
33 | | 3. If |X| > 1, go to 5. | ||
34 | | | ||
35 | | 4. (|X| = 1) Generate infinity with an appropriate sign and | ||
36 | | divide-by-zero by | ||
37 | | sgn := sign(X) | ||
38 | | atan(X) := sgn / (+0). | ||
39 | | Exit. | ||
40 | | | ||
41 | | 5. (|X| > 1) Generate an invalid operation by 0 * infinity. | ||
42 | | Exit. | ||
43 | | | ||
44 | |||
45 | | Copyright (C) Motorola, Inc. 1990 | ||
46 | | All Rights Reserved | ||
47 | | | ||
48 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
49 | | The copyright notice above does not evidence any | ||
50 | | actual or intended publication of such source code. | ||
51 | |||
52 | |satanh idnt 2,1 | Motorola 040 Floating Point Software Package | ||
53 | |||
54 | |section 8 | ||
55 | |||
56 | |xref t_dz | ||
57 | |xref t_operr | ||
58 | |xref t_frcinx | ||
59 | |xref t_extdnrm | ||
60 | |xref slognp1 | ||
61 | |||
62 | .global satanhd | ||
63 | satanhd: | ||
64 | |--ATANH(X) = X FOR DENORMALIZED X | ||
65 | |||
66 | bra t_extdnrm | ||
67 | |||
68 | .global satanh | ||
69 | satanh: | ||
70 | movel (%a0),%d0 | ||
71 | movew 4(%a0),%d0 | ||
72 | andil #0x7FFFFFFF,%d0 | ||
73 | cmpil #0x3FFF8000,%d0 | ||
74 | bges ATANHBIG | ||
75 | |||
76 | |--THIS IS THE USUAL CASE, |X| < 1 | ||
77 | |--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z). | ||
78 | |||
79 | fabsx (%a0),%fp0 | ...Y = |X| | ||
80 | fmovex %fp0,%fp1 | ||
81 | fnegx %fp1 | ...-Y | ||
82 | faddx %fp0,%fp0 | ...2Y | ||
83 | fadds #0x3F800000,%fp1 | ...1-Y | ||
84 | fdivx %fp1,%fp0 | ...2Y/(1-Y) | ||
85 | movel (%a0),%d0 | ||
86 | andil #0x80000000,%d0 | ||
87 | oril #0x3F000000,%d0 | ...SIGN(X)*HALF | ||
88 | movel %d0,-(%sp) | ||
89 | |||
90 | fmovemx %fp0-%fp0,(%a0) | ...overwrite input | ||
91 | movel %d1,-(%sp) | ||
92 | clrl %d1 | ||
93 | bsr slognp1 | ...LOG1P(Z) | ||
94 | fmovel (%sp)+,%fpcr | ||
95 | fmuls (%sp)+,%fp0 | ||
96 | bra t_frcinx | ||
97 | |||
98 | ATANHBIG: | ||
99 | fabsx (%a0),%fp0 | ...|X| | ||
100 | fcmps #0x3F800000,%fp0 | ||
101 | fbgt t_operr | ||
102 | bra t_dz | ||
103 | |||
104 | |end | ||
diff --git a/arch/m68k/fpsp040/scale.S b/arch/m68k/fpsp040/scale.S new file mode 100644 index 000000000000..5c9b805265f2 --- /dev/null +++ b/arch/m68k/fpsp040/scale.S | |||
@@ -0,0 +1,371 @@ | |||
1 | | | ||
2 | | scale.sa 3.3 7/30/91 | ||
3 | | | ||
4 | | The entry point sSCALE computes the destination operand | ||
5 | | scaled by the source operand. If the absolute value of | ||
6 | | the source operand is (>= 2^14) an overflow or underflow | ||
7 | | is returned. | ||
8 | | | ||
9 | | The entry point sscale is called from do_func to emulate | ||
10 | | the fscale unimplemented instruction. | ||
11 | | | ||
12 | | Input: Double-extended destination operand in FPTEMP, | ||
13 | | double-extended source operand in ETEMP. | ||
14 | | | ||
15 | | Output: The function returns scale(X,Y) to fp0. | ||
16 | | | ||
17 | | Modifies: fp0. | ||
18 | | | ||
19 | | Algorithm: | ||
20 | | | ||
21 | | Copyright (C) Motorola, Inc. 1990 | ||
22 | | All Rights Reserved | ||
23 | | | ||
24 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
25 | | The copyright notice above does not evidence any | ||
26 | | actual or intended publication of such source code. | ||
27 | |||
28 | |SCALE idnt 2,1 | Motorola 040 Floating Point Software Package | ||
29 | |||
30 | |section 8 | ||
31 | |||
32 | #include "fpsp.h" | ||
33 | |||
34 | |xref t_ovfl2 | ||
35 | |xref t_unfl | ||
36 | |xref round | ||
37 | |xref t_resdnrm | ||
38 | |||
39 | SRC_BNDS: .short 0x3fff,0x400c | ||
40 | |||
41 | | | ||
42 | | This entry point is used by the unimplemented instruction exception | ||
43 | | handler. | ||
44 | | | ||
45 | | | ||
46 | | | ||
47 | | FSCALE | ||
48 | | | ||
49 | .global sscale | ||
50 | sscale: | ||
51 | fmovel #0,%fpcr |clr user enabled exc | ||
52 | clrl %d1 | ||
53 | movew FPTEMP(%a6),%d1 |get dest exponent | ||
54 | smi L_SCR1(%a6) |use L_SCR1 to hold sign | ||
55 | andil #0x7fff,%d1 |strip sign | ||
56 | movew ETEMP(%a6),%d0 |check src bounds | ||
57 | andiw #0x7fff,%d0 |clr sign bit | ||
58 | cmp2w SRC_BNDS,%d0 | ||
59 | bccs src_in | ||
60 | cmpiw #0x400c,%d0 |test for too large | ||
61 | bge src_out | ||
62 | | | ||
63 | | The source input is below 1, so we check for denormalized numbers | ||
64 | | and set unfl. | ||
65 | | | ||
66 | src_small: | ||
67 | moveb DTAG(%a6),%d0 | ||
68 | andib #0xe0,%d0 | ||
69 | tstb %d0 | ||
70 | beqs no_denorm | ||
71 | st STORE_FLG(%a6) |dest already contains result | ||
72 | orl #unfl_mask,USER_FPSR(%a6) |set UNFL | ||
73 | den_done: | ||
74 | leal FPTEMP(%a6),%a0 | ||
75 | bra t_resdnrm | ||
76 | no_denorm: | ||
77 | fmovel USER_FPCR(%a6),%FPCR | ||
78 | fmovex FPTEMP(%a6),%fp0 |simply return dest | ||
79 | rts | ||
80 | |||
81 | |||
82 | | | ||
83 | | Source is within 2^14 range. To perform the int operation, | ||
84 | | move it to d0. | ||
85 | | | ||
86 | src_in: | ||
87 | fmovex ETEMP(%a6),%fp0 |move in src for int | ||
88 | fmovel #rz_mode,%fpcr |force rz for src conversion | ||
89 | fmovel %fp0,%d0 |int src to d0 | ||
90 | fmovel #0,%FPSR |clr status from above | ||
91 | tstw ETEMP(%a6) |check src sign | ||
92 | blt src_neg | ||
93 | | | ||
94 | | Source is positive. Add the src to the dest exponent. | ||
95 | | The result can be denormalized, if src = 0, or overflow, | ||
96 | | if the result of the add sets a bit in the upper word. | ||
97 | | | ||
98 | src_pos: | ||
99 | tstw %d1 |check for denorm | ||
100 | beq dst_dnrm | ||
101 | addl %d0,%d1 |add src to dest exp | ||
102 | beqs denorm |if zero, result is denorm | ||
103 | cmpil #0x7fff,%d1 |test for overflow | ||
104 | bges ovfl | ||
105 | tstb L_SCR1(%a6) | ||
106 | beqs spos_pos | ||
107 | orw #0x8000,%d1 | ||
108 | spos_pos: | ||
109 | movew %d1,FPTEMP(%a6) |result in FPTEMP | ||
110 | fmovel USER_FPCR(%a6),%FPCR | ||
111 | fmovex FPTEMP(%a6),%fp0 |write result to fp0 | ||
112 | rts | ||
113 | ovfl: | ||
114 | tstb L_SCR1(%a6) | ||
115 | beqs sovl_pos | ||
116 | orw #0x8000,%d1 | ||
117 | sovl_pos: | ||
118 | movew FPTEMP(%a6),ETEMP(%a6) |result in ETEMP | ||
119 | movel FPTEMP_HI(%a6),ETEMP_HI(%a6) | ||
120 | movel FPTEMP_LO(%a6),ETEMP_LO(%a6) | ||
121 | bra t_ovfl2 | ||
122 | |||
123 | denorm: | ||
124 | tstb L_SCR1(%a6) | ||
125 | beqs den_pos | ||
126 | orw #0x8000,%d1 | ||
127 | den_pos: | ||
128 | tstl FPTEMP_HI(%a6) |check j bit | ||
129 | blts nden_exit |if set, not denorm | ||
130 | movew %d1,ETEMP(%a6) |input expected in ETEMP | ||
131 | movel FPTEMP_HI(%a6),ETEMP_HI(%a6) | ||
132 | movel FPTEMP_LO(%a6),ETEMP_LO(%a6) | ||
133 | orl #unfl_bit,USER_FPSR(%a6) |set unfl | ||
134 | leal ETEMP(%a6),%a0 | ||
135 | bra t_resdnrm | ||
136 | nden_exit: | ||
137 | movew %d1,FPTEMP(%a6) |result in FPTEMP | ||
138 | fmovel USER_FPCR(%a6),%FPCR | ||
139 | fmovex FPTEMP(%a6),%fp0 |write result to fp0 | ||
140 | rts | ||
141 | |||
142 | | | ||
143 | | Source is negative. Add the src to the dest exponent. | ||
144 | | (The result exponent will be reduced). The result can be | ||
145 | | denormalized. | ||
146 | | | ||
147 | src_neg: | ||
148 | addl %d0,%d1 |add src to dest | ||
149 | beqs denorm |if zero, result is denorm | ||
150 | blts fix_dnrm |if negative, result is | ||
151 | | ;needing denormalization | ||
152 | tstb L_SCR1(%a6) | ||
153 | beqs sneg_pos | ||
154 | orw #0x8000,%d1 | ||
155 | sneg_pos: | ||
156 | movew %d1,FPTEMP(%a6) |result in FPTEMP | ||
157 | fmovel USER_FPCR(%a6),%FPCR | ||
158 | fmovex FPTEMP(%a6),%fp0 |write result to fp0 | ||
159 | rts | ||
160 | |||
161 | |||
162 | | | ||
163 | | The result exponent is below denorm value. Test for catastrophic | ||
164 | | underflow and force zero if true. If not, try to shift the | ||
165 | | mantissa right until a zero exponent exists. | ||
166 | | | ||
167 | fix_dnrm: | ||
168 | cmpiw #0xffc0,%d1 |lower bound for normalization | ||
169 | blt fix_unfl |if lower, catastrophic unfl | ||
170 | movew %d1,%d0 |use d0 for exp | ||
171 | movel %d2,-(%a7) |free d2 for norm | ||
172 | movel FPTEMP_HI(%a6),%d1 | ||
173 | movel FPTEMP_LO(%a6),%d2 | ||
174 | clrl L_SCR2(%a6) | ||
175 | fix_loop: | ||
176 | addw #1,%d0 |drive d0 to 0 | ||
177 | lsrl #1,%d1 |while shifting the | ||
178 | roxrl #1,%d2 |mantissa to the right | ||
179 | bccs no_carry | ||
180 | st L_SCR2(%a6) |use L_SCR2 to capture inex | ||
181 | no_carry: | ||
182 | tstw %d0 |it is finished when | ||
183 | blts fix_loop |d0 is zero or the mantissa | ||
184 | tstb L_SCR2(%a6) | ||
185 | beqs tst_zero | ||
186 | orl #unfl_inx_mask,USER_FPSR(%a6) | ||
187 | | ;set unfl, aunfl, ainex | ||
188 | | | ||
189 | | Test for zero. If zero, simply use fmove to return +/- zero | ||
190 | | to the fpu. | ||
191 | | | ||
192 | tst_zero: | ||
193 | clrw FPTEMP_EX(%a6) | ||
194 | tstb L_SCR1(%a6) |test for sign | ||
195 | beqs tst_con | ||
196 | orw #0x8000,FPTEMP_EX(%a6) |set sign bit | ||
197 | tst_con: | ||
198 | movel %d1,FPTEMP_HI(%a6) | ||
199 | movel %d2,FPTEMP_LO(%a6) | ||
200 | movel (%a7)+,%d2 | ||
201 | tstl %d1 | ||
202 | bnes not_zero | ||
203 | tstl FPTEMP_LO(%a6) | ||
204 | bnes not_zero | ||
205 | | | ||
206 | | Result is zero. Check for rounding mode to set lsb. If the | ||
207 | | mode is rp, and the zero is positive, return smallest denorm. | ||
208 | | If the mode is rm, and the zero is negative, return smallest | ||
209 | | negative denorm. | ||
210 | | | ||
211 | btstb #5,FPCR_MODE(%a6) |test if rm or rp | ||
212 | beqs no_dir | ||
213 | btstb #4,FPCR_MODE(%a6) |check which one | ||
214 | beqs zer_rm | ||
215 | zer_rp: | ||
216 | tstb L_SCR1(%a6) |check sign | ||
217 | bnes no_dir |if set, neg op, no inc | ||
218 | movel #1,FPTEMP_LO(%a6) |set lsb | ||
219 | bras sm_dnrm | ||
220 | zer_rm: | ||
221 | tstb L_SCR1(%a6) |check sign | ||
222 | beqs no_dir |if clr, neg op, no inc | ||
223 | movel #1,FPTEMP_LO(%a6) |set lsb | ||
224 | orl #neg_mask,USER_FPSR(%a6) |set N | ||
225 | bras sm_dnrm | ||
226 | no_dir: | ||
227 | fmovel USER_FPCR(%a6),%FPCR | ||
228 | fmovex FPTEMP(%a6),%fp0 |use fmove to set cc's | ||
229 | rts | ||
230 | |||
231 | | | ||
232 | | The rounding mode changed the zero to a smallest denorm. Call | ||
233 | | t_resdnrm with exceptional operand in ETEMP. | ||
234 | | | ||
235 | sm_dnrm: | ||
236 | movel FPTEMP_EX(%a6),ETEMP_EX(%a6) | ||
237 | movel FPTEMP_HI(%a6),ETEMP_HI(%a6) | ||
238 | movel FPTEMP_LO(%a6),ETEMP_LO(%a6) | ||
239 | leal ETEMP(%a6),%a0 | ||
240 | bra t_resdnrm | ||
241 | |||
242 | | | ||
243 | | Result is still denormalized. | ||
244 | | | ||
245 | not_zero: | ||
246 | orl #unfl_mask,USER_FPSR(%a6) |set unfl | ||
247 | tstb L_SCR1(%a6) |check for sign | ||
248 | beqs fix_exit | ||
249 | orl #neg_mask,USER_FPSR(%a6) |set N | ||
250 | fix_exit: | ||
251 | bras sm_dnrm | ||
252 | |||
253 | |||
254 | | | ||
255 | | The result has underflowed to zero. Return zero and set | ||
256 | | unfl, aunfl, and ainex. | ||
257 | | | ||
258 | fix_unfl: | ||
259 | orl #unfl_inx_mask,USER_FPSR(%a6) | ||
260 | btstb #5,FPCR_MODE(%a6) |test if rm or rp | ||
261 | beqs no_dir2 | ||
262 | btstb #4,FPCR_MODE(%a6) |check which one | ||
263 | beqs zer_rm2 | ||
264 | zer_rp2: | ||
265 | tstb L_SCR1(%a6) |check sign | ||
266 | bnes no_dir2 |if set, neg op, no inc | ||
267 | clrl FPTEMP_EX(%a6) | ||
268 | clrl FPTEMP_HI(%a6) | ||
269 | movel #1,FPTEMP_LO(%a6) |set lsb | ||
270 | bras sm_dnrm |return smallest denorm | ||
271 | zer_rm2: | ||
272 | tstb L_SCR1(%a6) |check sign | ||
273 | beqs no_dir2 |if clr, neg op, no inc | ||
274 | movew #0x8000,FPTEMP_EX(%a6) | ||
275 | clrl FPTEMP_HI(%a6) | ||
276 | movel #1,FPTEMP_LO(%a6) |set lsb | ||
277 | orl #neg_mask,USER_FPSR(%a6) |set N | ||
278 | bra sm_dnrm |return smallest denorm | ||
279 | |||
280 | no_dir2: | ||
281 | tstb L_SCR1(%a6) | ||
282 | bges pos_zero | ||
283 | neg_zero: | ||
284 | clrl FP_SCR1(%a6) |clear the exceptional operand | ||
285 | clrl FP_SCR1+4(%a6) |for gen_except. | ||
286 | clrl FP_SCR1+8(%a6) | ||
287 | fmoves #0x80000000,%fp0 | ||
288 | rts | ||
289 | pos_zero: | ||
290 | clrl FP_SCR1(%a6) |clear the exceptional operand | ||
291 | clrl FP_SCR1+4(%a6) |for gen_except. | ||
292 | clrl FP_SCR1+8(%a6) | ||
293 | fmoves #0x00000000,%fp0 | ||
294 | rts | ||
295 | |||
296 | | | ||
297 | | The destination is a denormalized number. It must be handled | ||
298 | | by first shifting the bits in the mantissa until it is normalized, | ||
299 | | then adding the remainder of the source to the exponent. | ||
300 | | | ||
301 | dst_dnrm: | ||
302 | moveml %d2/%d3,-(%a7) | ||
303 | movew FPTEMP_EX(%a6),%d1 | ||
304 | movel FPTEMP_HI(%a6),%d2 | ||
305 | movel FPTEMP_LO(%a6),%d3 | ||
306 | dst_loop: | ||
307 | tstl %d2 |test for normalized result | ||
308 | blts dst_norm |exit loop if so | ||
309 | tstl %d0 |otherwise, test shift count | ||
310 | beqs dst_fin |if zero, shifting is done | ||
311 | subil #1,%d0 |dec src | ||
312 | lsll #1,%d3 | ||
313 | roxll #1,%d2 | ||
314 | bras dst_loop | ||
315 | | | ||
316 | | Destination became normalized. Simply add the remaining | ||
317 | | portion of the src to the exponent. | ||
318 | | | ||
319 | dst_norm: | ||
320 | addw %d0,%d1 |dst is normalized; add src | ||
321 | tstb L_SCR1(%a6) | ||
322 | beqs dnrm_pos | ||
323 | orl #0x8000,%d1 | ||
324 | dnrm_pos: | ||
325 | movemw %d1,FPTEMP_EX(%a6) | ||
326 | moveml %d2,FPTEMP_HI(%a6) | ||
327 | moveml %d3,FPTEMP_LO(%a6) | ||
328 | fmovel USER_FPCR(%a6),%FPCR | ||
329 | fmovex FPTEMP(%a6),%fp0 | ||
330 | moveml (%a7)+,%d2/%d3 | ||
331 | rts | ||
332 | |||
333 | | | ||
334 | | Destination remained denormalized. Call t_excdnrm with | ||
335 | | exceptional operand in ETEMP. | ||
336 | | | ||
337 | dst_fin: | ||
338 | tstb L_SCR1(%a6) |check for sign | ||
339 | beqs dst_exit | ||
340 | orl #neg_mask,USER_FPSR(%a6) |set N | ||
341 | orl #0x8000,%d1 | ||
342 | dst_exit: | ||
343 | movemw %d1,ETEMP_EX(%a6) | ||
344 | moveml %d2,ETEMP_HI(%a6) | ||
345 | moveml %d3,ETEMP_LO(%a6) | ||
346 | orl #unfl_mask,USER_FPSR(%a6) |set unfl | ||
347 | moveml (%a7)+,%d2/%d3 | ||
348 | leal ETEMP(%a6),%a0 | ||
349 | bra t_resdnrm | ||
350 | |||
351 | | | ||
352 | | Source is outside of 2^14 range. Test the sign and branch | ||
353 | | to the appropriate exception handler. | ||
354 | | | ||
355 | src_out: | ||
356 | tstb L_SCR1(%a6) | ||
357 | beqs scro_pos | ||
358 | orl #0x8000,%d1 | ||
359 | scro_pos: | ||
360 | movel FPTEMP_HI(%a6),ETEMP_HI(%a6) | ||
361 | movel FPTEMP_LO(%a6),ETEMP_LO(%a6) | ||
362 | tstw ETEMP(%a6) | ||
363 | blts res_neg | ||
364 | res_pos: | ||
365 | movew %d1,ETEMP(%a6) |result in ETEMP | ||
366 | bra t_ovfl2 | ||
367 | res_neg: | ||
368 | movew %d1,ETEMP(%a6) |result in ETEMP | ||
369 | leal ETEMP(%a6),%a0 | ||
370 | bra t_unfl | ||
371 | |end | ||
diff --git a/arch/m68k/fpsp040/scosh.S b/arch/m68k/fpsp040/scosh.S new file mode 100644 index 000000000000..e81edbb87642 --- /dev/null +++ b/arch/m68k/fpsp040/scosh.S | |||
@@ -0,0 +1,132 @@ | |||
1 | | | ||
2 | | scosh.sa 3.1 12/10/90 | ||
3 | | | ||
4 | | The entry point sCosh computes the hyperbolic cosine of | ||
5 | | an input argument; sCoshd does the same except for denormalized | ||
6 | | input. | ||
7 | | | ||
8 | | Input: Double-extended number X in location pointed to | ||
9 | | by address register a0. | ||
10 | | | ||
11 | | Output: The value cosh(X) returned in floating-point register Fp0. | ||
12 | | | ||
13 | | Accuracy and Monotonicity: The returned result is within 3 ulps in | ||
14 | | 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the | ||
15 | | result is subsequently rounded to double precision. The | ||
16 | | result is provably monotonic in double precision. | ||
17 | | | ||
18 | | Speed: The program sCOSH takes approximately 250 cycles. | ||
19 | | | ||
20 | | Algorithm: | ||
21 | | | ||
22 | | COSH | ||
23 | | 1. If |X| > 16380 log2, go to 3. | ||
24 | | | ||
25 | | 2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae | ||
26 | | y = |X|, z = exp(Y), and | ||
27 | | cosh(X) = (1/2)*( z + 1/z ). | ||
28 | | Exit. | ||
29 | | | ||
30 | | 3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5. | ||
31 | | | ||
32 | | 4. (16380 log2 < |X| <= 16480 log2) | ||
33 | | cosh(X) = sign(X) * exp(|X|)/2. | ||
34 | | However, invoking exp(|X|) may cause premature overflow. | ||
35 | | Thus, we calculate sinh(X) as follows: | ||
36 | | Y := |X| | ||
37 | | Fact := 2**(16380) | ||
38 | | Y' := Y - 16381 log2 | ||
39 | | cosh(X) := Fact * exp(Y'). | ||
40 | | Exit. | ||
41 | | | ||
42 | | 5. (|X| > 16480 log2) sinh(X) must overflow. Return | ||
43 | | Huge*Huge to generate overflow and an infinity with | ||
44 | | the appropriate sign. Huge is the largest finite number in | ||
45 | | extended format. Exit. | ||
46 | | | ||
47 | | | ||
48 | |||
49 | | Copyright (C) Motorola, Inc. 1990 | ||
50 | | All Rights Reserved | ||
51 | | | ||
52 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
53 | | The copyright notice above does not evidence any | ||
54 | | actual or intended publication of such source code. | ||
55 | |||
56 | |SCOSH idnt 2,1 | Motorola 040 Floating Point Software Package | ||
57 | |||
58 | |section 8 | ||
59 | |||
60 | |xref t_ovfl | ||
61 | |xref t_frcinx | ||
62 | |xref setox | ||
63 | |||
64 | T1: .long 0x40C62D38,0xD3D64634 | ... 16381 LOG2 LEAD | ||
65 | T2: .long 0x3D6F90AE,0xB1E75CC7 | ... 16381 LOG2 TRAIL | ||
66 | |||
67 | TWO16380: .long 0x7FFB0000,0x80000000,0x00000000,0x00000000 | ||
68 | |||
69 | .global scoshd | ||
70 | scoshd: | ||
71 | |--COSH(X) = 1 FOR DENORMALIZED X | ||
72 | |||
73 | fmoves #0x3F800000,%fp0 | ||
74 | |||
75 | fmovel %d1,%FPCR | ||
76 | fadds #0x00800000,%fp0 | ||
77 | bra t_frcinx | ||
78 | |||
79 | .global scosh | ||
80 | scosh: | ||
81 | fmovex (%a0),%fp0 | ...LOAD INPUT | ||
82 | |||
83 | movel (%a0),%d0 | ||
84 | movew 4(%a0),%d0 | ||
85 | andil #0x7FFFFFFF,%d0 | ||
86 | cmpil #0x400CB167,%d0 | ||
87 | bgts COSHBIG | ||
88 | |||
89 | |--THIS IS THE USUAL CASE, |X| < 16380 LOG2 | ||
90 | |--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) ) | ||
91 | |||
92 | fabsx %fp0 | ...|X| | ||
93 | |||
94 | movel %d1,-(%sp) | ||
95 | clrl %d1 | ||
96 | fmovemx %fp0-%fp0,(%a0) |pass parameter to setox | ||
97 | bsr setox | ...FP0 IS EXP(|X|) | ||
98 | fmuls #0x3F000000,%fp0 | ...(1/2)EXP(|X|) | ||
99 | movel (%sp)+,%d1 | ||
100 | |||
101 | fmoves #0x3E800000,%fp1 | ...(1/4) | ||
102 | fdivx %fp0,%fp1 | ...1/(2 EXP(|X|)) | ||
103 | |||
104 | fmovel %d1,%FPCR | ||
105 | faddx %fp1,%fp0 | ||
106 | |||
107 | bra t_frcinx | ||
108 | |||
109 | COSHBIG: | ||
110 | cmpil #0x400CB2B3,%d0 | ||
111 | bgts COSHHUGE | ||
112 | |||
113 | fabsx %fp0 | ||
114 | fsubd T1(%pc),%fp0 | ...(|X|-16381LOG2_LEAD) | ||
115 | fsubd T2(%pc),%fp0 | ...|X| - 16381 LOG2, ACCURATE | ||
116 | |||
117 | movel %d1,-(%sp) | ||
118 | clrl %d1 | ||
119 | fmovemx %fp0-%fp0,(%a0) | ||
120 | bsr setox | ||
121 | fmovel (%sp)+,%fpcr | ||
122 | |||
123 | fmulx TWO16380(%pc),%fp0 | ||
124 | bra t_frcinx | ||
125 | |||
126 | COSHHUGE: | ||
127 | fmovel #0,%fpsr |clr N bit if set by source | ||
128 | bclrb #7,(%a0) |always return positive value | ||
129 | fmovemx (%a0),%fp0-%fp0 | ||
130 | bra t_ovfl | ||
131 | |||
132 | |end | ||
diff --git a/arch/m68k/fpsp040/setox.S b/arch/m68k/fpsp040/setox.S new file mode 100644 index 000000000000..0aa75f9bf7d1 --- /dev/null +++ b/arch/m68k/fpsp040/setox.S | |||
@@ -0,0 +1,865 @@ | |||
1 | | | ||
2 | | setox.sa 3.1 12/10/90 | ||
3 | | | ||
4 | | The entry point setox computes the exponential of a value. | ||
5 | | setoxd does the same except the input value is a denormalized | ||
6 | | number. setoxm1 computes exp(X)-1, and setoxm1d computes | ||
7 | | exp(X)-1 for denormalized X. | ||
8 | | | ||
9 | | INPUT | ||
10 | | ----- | ||
11 | | Double-extended value in memory location pointed to by address | ||
12 | | register a0. | ||
13 | | | ||
14 | | OUTPUT | ||
15 | | ------ | ||
16 | | exp(X) or exp(X)-1 returned in floating-point register fp0. | ||
17 | | | ||
18 | | ACCURACY and MONOTONICITY | ||
19 | | ------------------------- | ||
20 | | The returned result is within 0.85 ulps in 64 significant bit, i.e. | ||
21 | | within 0.5001 ulp to 53 bits if the result is subsequently rounded | ||
22 | | to double precision. The result is provably monotonic in double | ||
23 | | precision. | ||
24 | | | ||
25 | | SPEED | ||
26 | | ----- | ||
27 | | Two timings are measured, both in the copy-back mode. The | ||
28 | | first one is measured when the function is invoked the first time | ||
29 | | (so the instructions and data are not in cache), and the | ||
30 | | second one is measured when the function is reinvoked at the same | ||
31 | | input argument. | ||
32 | | | ||
33 | | The program setox takes approximately 210/190 cycles for input | ||
34 | | argument X whose magnitude is less than 16380 log2, which | ||
35 | | is the usual situation. For the less common arguments, | ||
36 | | depending on their values, the program may run faster or slower -- | ||
37 | | but no worse than 10% slower even in the extreme cases. | ||
38 | | | ||
39 | | The program setoxm1 takes approximately ???/??? cycles for input | ||
40 | | argument X, 0.25 <= |X| < 70log2. For |X| < 0.25, it takes | ||
41 | | approximately ???/??? cycles. For the less common arguments, | ||
42 | | depending on their values, the program may run faster or slower -- | ||
43 | | but no worse than 10% slower even in the extreme cases. | ||
44 | | | ||
45 | | ALGORITHM and IMPLEMENTATION NOTES | ||
46 | | ---------------------------------- | ||
47 | | | ||
48 | | setoxd | ||
49 | | ------ | ||
50 | | Step 1. Set ans := 1.0 | ||
51 | | | ||
52 | | Step 2. Return ans := ans + sign(X)*2^(-126). Exit. | ||
53 | | Notes: This will always generate one exception -- inexact. | ||
54 | | | ||
55 | | | ||
56 | | setox | ||
57 | | ----- | ||
58 | | | ||
59 | | Step 1. Filter out extreme cases of input argument. | ||
60 | | 1.1 If |X| >= 2^(-65), go to Step 1.3. | ||
61 | | 1.2 Go to Step 7. | ||
62 | | 1.3 If |X| < 16380 log(2), go to Step 2. | ||
63 | | 1.4 Go to Step 8. | ||
64 | | Notes: The usual case should take the branches 1.1 -> 1.3 -> 2. | ||
65 | | To avoid the use of floating-point comparisons, a | ||
66 | | compact representation of |X| is used. This format is a | ||
67 | | 32-bit integer, the upper (more significant) 16 bits are | ||
68 | | the sign and biased exponent field of |X|; the lower 16 | ||
69 | | bits are the 16 most significant fraction (including the | ||
70 | | explicit bit) bits of |X|. Consequently, the comparisons | ||
71 | | in Steps 1.1 and 1.3 can be performed by integer comparison. | ||
72 | | Note also that the constant 16380 log(2) used in Step 1.3 | ||
73 | | is also in the compact form. Thus taking the branch | ||
74 | | to Step 2 guarantees |X| < 16380 log(2). There is no harm | ||
75 | | to have a small number of cases where |X| is less than, | ||
76 | | but close to, 16380 log(2) and the branch to Step 9 is | ||
77 | | taken. | ||
78 | | | ||
79 | | Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). | ||
80 | | 2.1 Set AdjFlag := 0 (indicates the branch 1.3 -> 2 was taken) | ||
81 | | 2.2 N := round-to-nearest-integer( X * 64/log2 ). | ||
82 | | 2.3 Calculate J = N mod 64; so J = 0,1,2,..., or 63. | ||
83 | | 2.4 Calculate M = (N - J)/64; so N = 64M + J. | ||
84 | | 2.5 Calculate the address of the stored value of 2^(J/64). | ||
85 | | 2.6 Create the value Scale = 2^M. | ||
86 | | Notes: The calculation in 2.2 is really performed by | ||
87 | | | ||
88 | | Z := X * constant | ||
89 | | N := round-to-nearest-integer(Z) | ||
90 | | | ||
91 | | where | ||
92 | | | ||
93 | | constant := single-precision( 64/log 2 ). | ||
94 | | | ||
95 | | Using a single-precision constant avoids memory access. | ||
96 | | Another effect of using a single-precision "constant" is | ||
97 | | that the calculated value Z is | ||
98 | | | ||
99 | | Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24). | ||
100 | | | ||
101 | | This error has to be considered later in Steps 3 and 4. | ||
102 | | | ||
103 | | Step 3. Calculate X - N*log2/64. | ||
104 | | 3.1 R := X + N*L1, where L1 := single-precision(-log2/64). | ||
105 | | 3.2 R := R + N*L2, L2 := extended-precision(-log2/64 - L1). | ||
106 | | Notes: a) The way L1 and L2 are chosen ensures L1+L2 approximate | ||
107 | | the value -log2/64 to 88 bits of accuracy. | ||
108 | | b) N*L1 is exact because N is no longer than 22 bits and | ||
109 | | L1 is no longer than 24 bits. | ||
110 | | c) The calculation X+N*L1 is also exact due to cancellation. | ||
111 | | Thus, R is practically X+N(L1+L2) to full 64 bits. | ||
112 | | d) It is important to estimate how large can |R| be after | ||
113 | | Step 3.2. | ||
114 | | | ||
115 | | N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24) | ||
116 | | X*64/log2 (1+eps) = N + f, |f| <= 0.5 | ||
117 | | X*64/log2 - N = f - eps*X 64/log2 | ||
118 | | X - N*log2/64 = f*log2/64 - eps*X | ||
119 | | | ||
120 | | | ||
121 | | Now |X| <= 16446 log2, thus | ||
122 | | | ||
123 | | |X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64 | ||
124 | | <= 0.57 log2/64. | ||
125 | | This bound will be used in Step 4. | ||
126 | | | ||
127 | | Step 4. Approximate exp(R)-1 by a polynomial | ||
128 | | p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) | ||
129 | | Notes: a) In order to reduce memory access, the coefficients are | ||
130 | | made as "short" as possible: A1 (which is 1/2), A4 and A5 | ||
131 | | are single precision; A2 and A3 are double precision. | ||
132 | | b) Even with the restrictions above, | ||
133 | | |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062. | ||
134 | | Note that 0.0062 is slightly bigger than 0.57 log2/64. | ||
135 | | c) To fully utilize the pipeline, p is separated into | ||
136 | | two independent pieces of roughly equal complexities | ||
137 | | p = [ R + R*S*(A2 + S*A4) ] + | ||
138 | | [ S*(A1 + S*(A3 + S*A5)) ] | ||
139 | | where S = R*R. | ||
140 | | | ||
141 | | Step 5. Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by | ||
142 | | ans := T + ( T*p + t) | ||
143 | | where T and t are the stored values for 2^(J/64). | ||
144 | | Notes: 2^(J/64) is stored as T and t where T+t approximates | ||
145 | | 2^(J/64) to roughly 85 bits; T is in extended precision | ||
146 | | and t is in single precision. Note also that T is rounded | ||
147 | | to 62 bits so that the last two bits of T are zero. The | ||
148 | | reason for such a special form is that T-1, T-2, and T-8 | ||
149 | | will all be exact --- a property that will give much | ||
150 | | more accurate computation of the function EXPM1. | ||
151 | | | ||
152 | | Step 6. Reconstruction of exp(X) | ||
153 | | exp(X) = 2^M * 2^(J/64) * exp(R). | ||
154 | | 6.1 If AdjFlag = 0, go to 6.3 | ||
155 | | 6.2 ans := ans * AdjScale | ||
156 | | 6.3 Restore the user FPCR | ||
157 | | 6.4 Return ans := ans * Scale. Exit. | ||
158 | | Notes: If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R, | ||
159 | | |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will | ||
160 | | neither overflow nor underflow. If AdjFlag = 1, that | ||
161 | | means that | ||
162 | | X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380. | ||
163 | | Hence, exp(X) may overflow or underflow or neither. | ||
164 | | When that is the case, AdjScale = 2^(M1) where M1 is | ||
165 | | approximately M. Thus 6.2 will never cause over/underflow. | ||
166 | | Possible exception in 6.4 is overflow or underflow. | ||
167 | | The inexact exception is not generated in 6.4. Although | ||
168 | | one can argue that the inexact flag should always be | ||
169 | | raised, to simulate that exception cost to much than the | ||
170 | | flag is worth in practical uses. | ||
171 | | | ||
172 | | Step 7. Return 1 + X. | ||
173 | | 7.1 ans := X | ||
174 | | 7.2 Restore user FPCR. | ||
175 | | 7.3 Return ans := 1 + ans. Exit | ||
176 | | Notes: For non-zero X, the inexact exception will always be | ||
177 | | raised by 7.3. That is the only exception raised by 7.3. | ||
178 | | Note also that we use the FMOVEM instruction to move X | ||
179 | | in Step 7.1 to avoid unnecessary trapping. (Although | ||
180 | | the FMOVEM may not seem relevant since X is normalized, | ||
181 | | the precaution will be useful in the library version of | ||
182 | | this code where the separate entry for denormalized inputs | ||
183 | | will be done away with.) | ||
184 | | | ||
185 | | Step 8. Handle exp(X) where |X| >= 16380log2. | ||
186 | | 8.1 If |X| > 16480 log2, go to Step 9. | ||
187 | | (mimic 2.2 - 2.6) | ||
188 | | 8.2 N := round-to-integer( X * 64/log2 ) | ||
189 | | 8.3 Calculate J = N mod 64, J = 0,1,...,63 | ||
190 | | 8.4 K := (N-J)/64, M1 := truncate(K/2), M = K-M1, AdjFlag := 1. | ||
191 | | 8.5 Calculate the address of the stored value 2^(J/64). | ||
192 | | 8.6 Create the values Scale = 2^M, AdjScale = 2^M1. | ||
193 | | 8.7 Go to Step 3. | ||
194 | | Notes: Refer to notes for 2.2 - 2.6. | ||
195 | | | ||
196 | | Step 9. Handle exp(X), |X| > 16480 log2. | ||
197 | | 9.1 If X < 0, go to 9.3 | ||
198 | | 9.2 ans := Huge, go to 9.4 | ||
199 | | 9.3 ans := Tiny. | ||
200 | | 9.4 Restore user FPCR. | ||
201 | | 9.5 Return ans := ans * ans. Exit. | ||
202 | | Notes: Exp(X) will surely overflow or underflow, depending on | ||
203 | | X's sign. "Huge" and "Tiny" are respectively large/tiny | ||
204 | | extended-precision numbers whose square over/underflow | ||
205 | | with an inexact result. Thus, 9.5 always raises the | ||
206 | | inexact together with either overflow or underflow. | ||
207 | | | ||
208 | | | ||
209 | | setoxm1d | ||
210 | | -------- | ||
211 | | | ||
212 | | Step 1. Set ans := 0 | ||
213 | | | ||
214 | | Step 2. Return ans := X + ans. Exit. | ||
215 | | Notes: This will return X with the appropriate rounding | ||
216 | | precision prescribed by the user FPCR. | ||
217 | | | ||
218 | | setoxm1 | ||
219 | | ------- | ||
220 | | | ||
221 | | Step 1. Check |X| | ||
222 | | 1.1 If |X| >= 1/4, go to Step 1.3. | ||
223 | | 1.2 Go to Step 7. | ||
224 | | 1.3 If |X| < 70 log(2), go to Step 2. | ||
225 | | 1.4 Go to Step 10. | ||
226 | | Notes: The usual case should take the branches 1.1 -> 1.3 -> 2. | ||
227 | | However, it is conceivable |X| can be small very often | ||
228 | | because EXPM1 is intended to evaluate exp(X)-1 accurately | ||
229 | | when |X| is small. For further details on the comparisons, | ||
230 | | see the notes on Step 1 of setox. | ||
231 | | | ||
232 | | Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). | ||
233 | | 2.1 N := round-to-nearest-integer( X * 64/log2 ). | ||
234 | | 2.2 Calculate J = N mod 64; so J = 0,1,2,..., or 63. | ||
235 | | 2.3 Calculate M = (N - J)/64; so N = 64M + J. | ||
236 | | 2.4 Calculate the address of the stored value of 2^(J/64). | ||
237 | | 2.5 Create the values Sc = 2^M and OnebySc := -2^(-M). | ||
238 | | Notes: See the notes on Step 2 of setox. | ||
239 | | | ||
240 | | Step 3. Calculate X - N*log2/64. | ||
241 | | 3.1 R := X + N*L1, where L1 := single-precision(-log2/64). | ||
242 | | 3.2 R := R + N*L2, L2 := extended-precision(-log2/64 - L1). | ||
243 | | Notes: Applying the analysis of Step 3 of setox in this case | ||
244 | | shows that |R| <= 0.0055 (note that |X| <= 70 log2 in | ||
245 | | this case). | ||
246 | | | ||
247 | | Step 4. Approximate exp(R)-1 by a polynomial | ||
248 | | p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6))))) | ||
249 | | Notes: a) In order to reduce memory access, the coefficients are | ||
250 | | made as "short" as possible: A1 (which is 1/2), A5 and A6 | ||
251 | | are single precision; A2, A3 and A4 are double precision. | ||
252 | | b) Even with the restriction above, | ||
253 | | |p - (exp(R)-1)| < |R| * 2^(-72.7) | ||
254 | | for all |R| <= 0.0055. | ||
255 | | c) To fully utilize the pipeline, p is separated into | ||
256 | | two independent pieces of roughly equal complexity | ||
257 | | p = [ R*S*(A2 + S*(A4 + S*A6)) ] + | ||
258 | | [ R + S*(A1 + S*(A3 + S*A5)) ] | ||
259 | | where S = R*R. | ||
260 | | | ||
261 | | Step 5. Compute 2^(J/64)*p by | ||
262 | | p := T*p | ||
263 | | where T and t are the stored values for 2^(J/64). | ||
264 | | Notes: 2^(J/64) is stored as T and t where T+t approximates | ||
265 | | 2^(J/64) to roughly 85 bits; T is in extended precision | ||
266 | | and t is in single precision. Note also that T is rounded | ||
267 | | to 62 bits so that the last two bits of T are zero. The | ||
268 | | reason for such a special form is that T-1, T-2, and T-8 | ||
269 | | will all be exact --- a property that will be exploited | ||
270 | | in Step 6 below. The total relative error in p is no | ||
271 | | bigger than 2^(-67.7) compared to the final result. | ||
272 | | | ||
273 | | Step 6. Reconstruction of exp(X)-1 | ||
274 | | exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ). | ||
275 | | 6.1 If M <= 63, go to Step 6.3. | ||
276 | | 6.2 ans := T + (p + (t + OnebySc)). Go to 6.6 | ||
277 | | 6.3 If M >= -3, go to 6.5. | ||
278 | | 6.4 ans := (T + (p + t)) + OnebySc. Go to 6.6 | ||
279 | | 6.5 ans := (T + OnebySc) + (p + t). | ||
280 | | 6.6 Restore user FPCR. | ||
281 | | 6.7 Return ans := Sc * ans. Exit. | ||
282 | | Notes: The various arrangements of the expressions give accurate | ||
283 | | evaluations. | ||
284 | | | ||
285 | | Step 7. exp(X)-1 for |X| < 1/4. | ||
286 | | 7.1 If |X| >= 2^(-65), go to Step 9. | ||
287 | | 7.2 Go to Step 8. | ||
288 | | | ||
289 | | Step 8. Calculate exp(X)-1, |X| < 2^(-65). | ||
290 | | 8.1 If |X| < 2^(-16312), goto 8.3 | ||
291 | | 8.2 Restore FPCR; return ans := X - 2^(-16382). Exit. | ||
292 | | 8.3 X := X * 2^(140). | ||
293 | | 8.4 Restore FPCR; ans := ans - 2^(-16382). | ||
294 | | Return ans := ans*2^(140). Exit | ||
295 | | Notes: The idea is to return "X - tiny" under the user | ||
296 | | precision and rounding modes. To avoid unnecessary | ||
297 | | inefficiency, we stay away from denormalized numbers the | ||
298 | | best we can. For |X| >= 2^(-16312), the straightforward | ||
299 | | 8.2 generates the inexact exception as the case warrants. | ||
300 | | | ||
301 | | Step 9. Calculate exp(X)-1, |X| < 1/4, by a polynomial | ||
302 | | p = X + X*X*(B1 + X*(B2 + ... + X*B12)) | ||
303 | | Notes: a) In order to reduce memory access, the coefficients are | ||
304 | | made as "short" as possible: B1 (which is 1/2), B9 to B12 | ||
305 | | are single precision; B3 to B8 are double precision; and | ||
306 | | B2 is double extended. | ||
307 | | b) Even with the restriction above, | ||
308 | | |p - (exp(X)-1)| < |X| 2^(-70.6) | ||
309 | | for all |X| <= 0.251. | ||
310 | | Note that 0.251 is slightly bigger than 1/4. | ||
311 | | c) To fully preserve accuracy, the polynomial is computed | ||
312 | | as X + ( S*B1 + Q ) where S = X*X and | ||
313 | | Q = X*S*(B2 + X*(B3 + ... + X*B12)) | ||
314 | | d) To fully utilize the pipeline, Q is separated into | ||
315 | | two independent pieces of roughly equal complexity | ||
316 | | Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] + | ||
317 | | [ S*S*(B3 + S*(B5 + ... + S*B11)) ] | ||
318 | | | ||
319 | | Step 10. Calculate exp(X)-1 for |X| >= 70 log 2. | ||
320 | | 10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all practical | ||
321 | | purposes. Therefore, go to Step 1 of setox. | ||
322 | | 10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical purposes. | ||
323 | | ans := -1 | ||
324 | | Restore user FPCR | ||
325 | | Return ans := ans + 2^(-126). Exit. | ||
326 | | Notes: 10.2 will always create an inexact and return -1 + tiny | ||
327 | | in the user rounding precision and mode. | ||
328 | | | ||
329 | | | ||
330 | |||
331 | | Copyright (C) Motorola, Inc. 1990 | ||
332 | | All Rights Reserved | ||
333 | | | ||
334 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
335 | | The copyright notice above does not evidence any | ||
336 | | actual or intended publication of such source code. | ||
337 | |||
338 | |setox idnt 2,1 | Motorola 040 Floating Point Software Package | ||
339 | |||
340 | |section 8 | ||
341 | |||
342 | #include "fpsp.h" | ||
343 | |||
344 | L2: .long 0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000 | ||
345 | |||
346 | EXPA3: .long 0x3FA55555,0x55554431 | ||
347 | EXPA2: .long 0x3FC55555,0x55554018 | ||
348 | |||
349 | HUGE: .long 0x7FFE0000,0xFFFFFFFF,0xFFFFFFFF,0x00000000 | ||
350 | TINY: .long 0x00010000,0xFFFFFFFF,0xFFFFFFFF,0x00000000 | ||
351 | |||
352 | EM1A4: .long 0x3F811111,0x11174385 | ||
353 | EM1A3: .long 0x3FA55555,0x55554F5A | ||
354 | |||
355 | EM1A2: .long 0x3FC55555,0x55555555,0x00000000,0x00000000 | ||
356 | |||
357 | EM1B8: .long 0x3EC71DE3,0xA5774682 | ||
358 | EM1B7: .long 0x3EFA01A0,0x19D7CB68 | ||
359 | |||
360 | EM1B6: .long 0x3F2A01A0,0x1A019DF3 | ||
361 | EM1B5: .long 0x3F56C16C,0x16C170E2 | ||
362 | |||
363 | EM1B4: .long 0x3F811111,0x11111111 | ||
364 | EM1B3: .long 0x3FA55555,0x55555555 | ||
365 | |||
366 | EM1B2: .long 0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB | ||
367 | .long 0x00000000 | ||
368 | |||
369 | TWO140: .long 0x48B00000,0x00000000 | ||
370 | TWON140: .long 0x37300000,0x00000000 | ||
371 | |||
372 | EXPTBL: | ||
373 | .long 0x3FFF0000,0x80000000,0x00000000,0x00000000 | ||
374 | .long 0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B | ||
375 | .long 0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9 | ||
376 | .long 0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369 | ||
377 | .long 0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C | ||
378 | .long 0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F | ||
379 | .long 0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729 | ||
380 | .long 0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF | ||
381 | .long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF | ||
382 | .long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA | ||
383 | .long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051 | ||
384 | .long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029 | ||
385 | .long 0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494 | ||
386 | .long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0 | ||
387 | .long 0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D | ||
388 | .long 0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537 | ||
389 | .long 0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD | ||
390 | .long 0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087 | ||
391 | .long 0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818 | ||
392 | .long 0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D | ||
393 | .long 0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890 | ||
394 | .long 0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C | ||
395 | .long 0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05 | ||
396 | .long 0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126 | ||
397 | .long 0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140 | ||
398 | .long 0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA | ||
399 | .long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A | ||
400 | .long 0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC | ||
401 | .long 0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC | ||
402 | .long 0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610 | ||
403 | .long 0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90 | ||
404 | .long 0x3FFF0000,0xB311C412,0xA9112488,0x201F678A | ||
405 | .long 0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13 | ||
406 | .long 0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30 | ||
407 | .long 0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC | ||
408 | .long 0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6 | ||
409 | .long 0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70 | ||
410 | .long 0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518 | ||
411 | .long 0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41 | ||
412 | .long 0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B | ||
413 | .long 0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568 | ||
414 | .long 0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E | ||
415 | .long 0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03 | ||
416 | .long 0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D | ||
417 | .long 0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4 | ||
418 | .long 0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C | ||
419 | .long 0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9 | ||
420 | .long 0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21 | ||
421 | .long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F | ||
422 | .long 0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F | ||
423 | .long 0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207 | ||
424 | .long 0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175 | ||
425 | .long 0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B | ||
426 | .long 0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5 | ||
427 | .long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A | ||
428 | .long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22 | ||
429 | .long 0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945 | ||
430 | .long 0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B | ||
431 | .long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3 | ||
432 | .long 0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05 | ||
433 | .long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19 | ||
434 | .long 0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5 | ||
435 | .long 0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22 | ||
436 | .long 0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A | ||
437 | |||
438 | .set ADJFLAG,L_SCR2 | ||
439 | .set SCALE,FP_SCR1 | ||
440 | .set ADJSCALE,FP_SCR2 | ||
441 | .set SC,FP_SCR3 | ||
442 | .set ONEBYSC,FP_SCR4 | ||
443 | |||
444 | | xref t_frcinx | ||
445 | |xref t_extdnrm | ||
446 | |xref t_unfl | ||
447 | |xref t_ovfl | ||
448 | |||
449 | .global setoxd | ||
450 | setoxd: | ||
451 | |--entry point for EXP(X), X is denormalized | ||
452 | movel (%a0),%d0 | ||
453 | andil #0x80000000,%d0 | ||
454 | oril #0x00800000,%d0 | ...sign(X)*2^(-126) | ||
455 | movel %d0,-(%sp) | ||
456 | fmoves #0x3F800000,%fp0 | ||
457 | fmovel %d1,%fpcr | ||
458 | fadds (%sp)+,%fp0 | ||
459 | bra t_frcinx | ||
460 | |||
461 | .global setox | ||
462 | setox: | ||
463 | |--entry point for EXP(X), here X is finite, non-zero, and not NaN's | ||
464 | |||
465 | |--Step 1. | ||
466 | movel (%a0),%d0 | ...load part of input X | ||
467 | andil #0x7FFF0000,%d0 | ...biased expo. of X | ||
468 | cmpil #0x3FBE0000,%d0 | ...2^(-65) | ||
469 | bges EXPC1 | ...normal case | ||
470 | bra EXPSM | ||
471 | |||
472 | EXPC1: | ||
473 | |--The case |X| >= 2^(-65) | ||
474 | movew 4(%a0),%d0 | ...expo. and partial sig. of |X| | ||
475 | cmpil #0x400CB167,%d0 | ...16380 log2 trunc. 16 bits | ||
476 | blts EXPMAIN | ...normal case | ||
477 | bra EXPBIG | ||
478 | |||
479 | EXPMAIN: | ||
480 | |--Step 2. | ||
481 | |--This is the normal branch: 2^(-65) <= |X| < 16380 log2. | ||
482 | fmovex (%a0),%fp0 | ...load input from (a0) | ||
483 | |||
484 | fmovex %fp0,%fp1 | ||
485 | fmuls #0x42B8AA3B,%fp0 | ...64/log2 * X | ||
486 | fmovemx %fp2-%fp2/%fp3,-(%a7) | ...save fp2 | ||
487 | movel #0,ADJFLAG(%a6) | ||
488 | fmovel %fp0,%d0 | ...N = int( X * 64/log2 ) | ||
489 | lea EXPTBL,%a1 | ||
490 | fmovel %d0,%fp0 | ...convert to floating-format | ||
491 | |||
492 | movel %d0,L_SCR1(%a6) | ...save N temporarily | ||
493 | andil #0x3F,%d0 | ...D0 is J = N mod 64 | ||
494 | lsll #4,%d0 | ||
495 | addal %d0,%a1 | ...address of 2^(J/64) | ||
496 | movel L_SCR1(%a6),%d0 | ||
497 | asrl #6,%d0 | ...D0 is M | ||
498 | addiw #0x3FFF,%d0 | ...biased expo. of 2^(M) | ||
499 | movew L2,L_SCR1(%a6) | ...prefetch L2, no need in CB | ||
500 | |||
501 | EXPCONT1: | ||
502 | |--Step 3. | ||
503 | |--fp1,fp2 saved on the stack. fp0 is N, fp1 is X, | ||
504 | |--a0 points to 2^(J/64), D0 is biased expo. of 2^(M) | ||
505 | fmovex %fp0,%fp2 | ||
506 | fmuls #0xBC317218,%fp0 | ...N * L1, L1 = lead(-log2/64) | ||
507 | fmulx L2,%fp2 | ...N * L2, L1+L2 = -log2/64 | ||
508 | faddx %fp1,%fp0 | ...X + N*L1 | ||
509 | faddx %fp2,%fp0 | ...fp0 is R, reduced arg. | ||
510 | | MOVE.W #$3FA5,EXPA3 ...load EXPA3 in cache | ||
511 | |||
512 | |--Step 4. | ||
513 | |--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL | ||
514 | |-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) | ||
515 | |--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R | ||
516 | |--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))] | ||
517 | |||
518 | fmovex %fp0,%fp1 | ||
519 | fmulx %fp1,%fp1 | ...fp1 IS S = R*R | ||
520 | |||
521 | fmoves #0x3AB60B70,%fp2 | ...fp2 IS A5 | ||
522 | | MOVE.W #0,2(%a1) ...load 2^(J/64) in cache | ||
523 | |||
524 | fmulx %fp1,%fp2 | ...fp2 IS S*A5 | ||
525 | fmovex %fp1,%fp3 | ||
526 | fmuls #0x3C088895,%fp3 | ...fp3 IS S*A4 | ||
527 | |||
528 | faddd EXPA3,%fp2 | ...fp2 IS A3+S*A5 | ||
529 | faddd EXPA2,%fp3 | ...fp3 IS A2+S*A4 | ||
530 | |||
531 | fmulx %fp1,%fp2 | ...fp2 IS S*(A3+S*A5) | ||
532 | movew %d0,SCALE(%a6) | ...SCALE is 2^(M) in extended | ||
533 | clrw SCALE+2(%a6) | ||
534 | movel #0x80000000,SCALE+4(%a6) | ||
535 | clrl SCALE+8(%a6) | ||
536 | |||
537 | fmulx %fp1,%fp3 | ...fp3 IS S*(A2+S*A4) | ||
538 | |||
539 | fadds #0x3F000000,%fp2 | ...fp2 IS A1+S*(A3+S*A5) | ||
540 | fmulx %fp0,%fp3 | ...fp3 IS R*S*(A2+S*A4) | ||
541 | |||
542 | fmulx %fp1,%fp2 | ...fp2 IS S*(A1+S*(A3+S*A5)) | ||
543 | faddx %fp3,%fp0 | ...fp0 IS R+R*S*(A2+S*A4), | ||
544 | | ...fp3 released | ||
545 | |||
546 | fmovex (%a1)+,%fp1 | ...fp1 is lead. pt. of 2^(J/64) | ||
547 | faddx %fp2,%fp0 | ...fp0 is EXP(R) - 1 | ||
548 | | ...fp2 released | ||
549 | |||
550 | |--Step 5 | ||
551 | |--final reconstruction process | ||
552 | |--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) ) | ||
553 | |||
554 | fmulx %fp1,%fp0 | ...2^(J/64)*(Exp(R)-1) | ||
555 | fmovemx (%a7)+,%fp2-%fp2/%fp3 | ...fp2 restored | ||
556 | fadds (%a1),%fp0 | ...accurate 2^(J/64) | ||
557 | |||
558 | faddx %fp1,%fp0 | ...2^(J/64) + 2^(J/64)*... | ||
559 | movel ADJFLAG(%a6),%d0 | ||
560 | |||
561 | |--Step 6 | ||
562 | tstl %d0 | ||
563 | beqs NORMAL | ||
564 | ADJUST: | ||
565 | fmulx ADJSCALE(%a6),%fp0 | ||
566 | NORMAL: | ||
567 | fmovel %d1,%FPCR | ...restore user FPCR | ||
568 | fmulx SCALE(%a6),%fp0 | ...multiply 2^(M) | ||
569 | bra t_frcinx | ||
570 | |||
571 | EXPSM: | ||
572 | |--Step 7 | ||
573 | fmovemx (%a0),%fp0-%fp0 | ...in case X is denormalized | ||
574 | fmovel %d1,%FPCR | ||
575 | fadds #0x3F800000,%fp0 | ...1+X in user mode | ||
576 | bra t_frcinx | ||
577 | |||
578 | EXPBIG: | ||
579 | |--Step 8 | ||
580 | cmpil #0x400CB27C,%d0 | ...16480 log2 | ||
581 | bgts EXP2BIG | ||
582 | |--Steps 8.2 -- 8.6 | ||
583 | fmovex (%a0),%fp0 | ...load input from (a0) | ||
584 | |||
585 | fmovex %fp0,%fp1 | ||
586 | fmuls #0x42B8AA3B,%fp0 | ...64/log2 * X | ||
587 | fmovemx %fp2-%fp2/%fp3,-(%a7) | ...save fp2 | ||
588 | movel #1,ADJFLAG(%a6) | ||
589 | fmovel %fp0,%d0 | ...N = int( X * 64/log2 ) | ||
590 | lea EXPTBL,%a1 | ||
591 | fmovel %d0,%fp0 | ...convert to floating-format | ||
592 | movel %d0,L_SCR1(%a6) | ...save N temporarily | ||
593 | andil #0x3F,%d0 | ...D0 is J = N mod 64 | ||
594 | lsll #4,%d0 | ||
595 | addal %d0,%a1 | ...address of 2^(J/64) | ||
596 | movel L_SCR1(%a6),%d0 | ||
597 | asrl #6,%d0 | ...D0 is K | ||
598 | movel %d0,L_SCR1(%a6) | ...save K temporarily | ||
599 | asrl #1,%d0 | ...D0 is M1 | ||
600 | subl %d0,L_SCR1(%a6) | ...a1 is M | ||
601 | addiw #0x3FFF,%d0 | ...biased expo. of 2^(M1) | ||
602 | movew %d0,ADJSCALE(%a6) | ...ADJSCALE := 2^(M1) | ||
603 | clrw ADJSCALE+2(%a6) | ||
604 | movel #0x80000000,ADJSCALE+4(%a6) | ||
605 | clrl ADJSCALE+8(%a6) | ||
606 | movel L_SCR1(%a6),%d0 | ...D0 is M | ||
607 | addiw #0x3FFF,%d0 | ...biased expo. of 2^(M) | ||
608 | bra EXPCONT1 | ...go back to Step 3 | ||
609 | |||
610 | EXP2BIG: | ||
611 | |--Step 9 | ||
612 | fmovel %d1,%FPCR | ||
613 | movel (%a0),%d0 | ||
614 | bclrb #sign_bit,(%a0) | ...setox always returns positive | ||
615 | cmpil #0,%d0 | ||
616 | blt t_unfl | ||
617 | bra t_ovfl | ||
618 | |||
619 | .global setoxm1d | ||
620 | setoxm1d: | ||
621 | |--entry point for EXPM1(X), here X is denormalized | ||
622 | |--Step 0. | ||
623 | bra t_extdnrm | ||
624 | |||
625 | |||
626 | .global setoxm1 | ||
627 | setoxm1: | ||
628 | |--entry point for EXPM1(X), here X is finite, non-zero, non-NaN | ||
629 | |||
630 | |--Step 1. | ||
631 | |--Step 1.1 | ||
632 | movel (%a0),%d0 | ...load part of input X | ||
633 | andil #0x7FFF0000,%d0 | ...biased expo. of X | ||
634 | cmpil #0x3FFD0000,%d0 | ...1/4 | ||
635 | bges EM1CON1 | ...|X| >= 1/4 | ||
636 | bra EM1SM | ||
637 | |||
638 | EM1CON1: | ||
639 | |--Step 1.3 | ||
640 | |--The case |X| >= 1/4 | ||
641 | movew 4(%a0),%d0 | ...expo. and partial sig. of |X| | ||
642 | cmpil #0x4004C215,%d0 | ...70log2 rounded up to 16 bits | ||
643 | bles EM1MAIN | ...1/4 <= |X| <= 70log2 | ||
644 | bra EM1BIG | ||
645 | |||
646 | EM1MAIN: | ||
647 | |--Step 2. | ||
648 | |--This is the case: 1/4 <= |X| <= 70 log2. | ||
649 | fmovex (%a0),%fp0 | ...load input from (a0) | ||
650 | |||
651 | fmovex %fp0,%fp1 | ||
652 | fmuls #0x42B8AA3B,%fp0 | ...64/log2 * X | ||
653 | fmovemx %fp2-%fp2/%fp3,-(%a7) | ...save fp2 | ||
654 | | MOVE.W #$3F81,EM1A4 ...prefetch in CB mode | ||
655 | fmovel %fp0,%d0 | ...N = int( X * 64/log2 ) | ||
656 | lea EXPTBL,%a1 | ||
657 | fmovel %d0,%fp0 | ...convert to floating-format | ||
658 | |||
659 | movel %d0,L_SCR1(%a6) | ...save N temporarily | ||
660 | andil #0x3F,%d0 | ...D0 is J = N mod 64 | ||
661 | lsll #4,%d0 | ||
662 | addal %d0,%a1 | ...address of 2^(J/64) | ||
663 | movel L_SCR1(%a6),%d0 | ||
664 | asrl #6,%d0 | ...D0 is M | ||
665 | movel %d0,L_SCR1(%a6) | ...save a copy of M | ||
666 | | MOVE.W #$3FDC,L2 ...prefetch L2 in CB mode | ||
667 | |||
668 | |--Step 3. | ||
669 | |--fp1,fp2 saved on the stack. fp0 is N, fp1 is X, | ||
670 | |--a0 points to 2^(J/64), D0 and a1 both contain M | ||
671 | fmovex %fp0,%fp2 | ||
672 | fmuls #0xBC317218,%fp0 | ...N * L1, L1 = lead(-log2/64) | ||
673 | fmulx L2,%fp2 | ...N * L2, L1+L2 = -log2/64 | ||
674 | faddx %fp1,%fp0 | ...X + N*L1 | ||
675 | faddx %fp2,%fp0 | ...fp0 is R, reduced arg. | ||
676 | | MOVE.W #$3FC5,EM1A2 ...load EM1A2 in cache | ||
677 | addiw #0x3FFF,%d0 | ...D0 is biased expo. of 2^M | ||
678 | |||
679 | |--Step 4. | ||
680 | |--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL | ||
681 | |-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6))))) | ||
682 | |--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R | ||
683 | |--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))] | ||
684 | |||
685 | fmovex %fp0,%fp1 | ||
686 | fmulx %fp1,%fp1 | ...fp1 IS S = R*R | ||
687 | |||
688 | fmoves #0x3950097B,%fp2 | ...fp2 IS a6 | ||
689 | | MOVE.W #0,2(%a1) ...load 2^(J/64) in cache | ||
690 | |||
691 | fmulx %fp1,%fp2 | ...fp2 IS S*A6 | ||
692 | fmovex %fp1,%fp3 | ||
693 | fmuls #0x3AB60B6A,%fp3 | ...fp3 IS S*A5 | ||
694 | |||
695 | faddd EM1A4,%fp2 | ...fp2 IS A4+S*A6 | ||
696 | faddd EM1A3,%fp3 | ...fp3 IS A3+S*A5 | ||
697 | movew %d0,SC(%a6) | ...SC is 2^(M) in extended | ||
698 | clrw SC+2(%a6) | ||
699 | movel #0x80000000,SC+4(%a6) | ||
700 | clrl SC+8(%a6) | ||
701 | |||
702 | fmulx %fp1,%fp2 | ...fp2 IS S*(A4+S*A6) | ||
703 | movel L_SCR1(%a6),%d0 | ...D0 is M | ||
704 | negw %d0 | ...D0 is -M | ||
705 | fmulx %fp1,%fp3 | ...fp3 IS S*(A3+S*A5) | ||
706 | addiw #0x3FFF,%d0 | ...biased expo. of 2^(-M) | ||
707 | faddd EM1A2,%fp2 | ...fp2 IS A2+S*(A4+S*A6) | ||
708 | fadds #0x3F000000,%fp3 | ...fp3 IS A1+S*(A3+S*A5) | ||
709 | |||
710 | fmulx %fp1,%fp2 | ...fp2 IS S*(A2+S*(A4+S*A6)) | ||
711 | oriw #0x8000,%d0 | ...signed/expo. of -2^(-M) | ||
712 | movew %d0,ONEBYSC(%a6) | ...OnebySc is -2^(-M) | ||
713 | clrw ONEBYSC+2(%a6) | ||
714 | movel #0x80000000,ONEBYSC+4(%a6) | ||
715 | clrl ONEBYSC+8(%a6) | ||
716 | fmulx %fp3,%fp1 | ...fp1 IS S*(A1+S*(A3+S*A5)) | ||
717 | | ...fp3 released | ||
718 | |||
719 | fmulx %fp0,%fp2 | ...fp2 IS R*S*(A2+S*(A4+S*A6)) | ||
720 | faddx %fp1,%fp0 | ...fp0 IS R+S*(A1+S*(A3+S*A5)) | ||
721 | | ...fp1 released | ||
722 | |||
723 | faddx %fp2,%fp0 | ...fp0 IS EXP(R)-1 | ||
724 | | ...fp2 released | ||
725 | fmovemx (%a7)+,%fp2-%fp2/%fp3 | ...fp2 restored | ||
726 | |||
727 | |--Step 5 | ||
728 | |--Compute 2^(J/64)*p | ||
729 | |||
730 | fmulx (%a1),%fp0 | ...2^(J/64)*(Exp(R)-1) | ||
731 | |||
732 | |--Step 6 | ||
733 | |--Step 6.1 | ||
734 | movel L_SCR1(%a6),%d0 | ...retrieve M | ||
735 | cmpil #63,%d0 | ||
736 | bles MLE63 | ||
737 | |--Step 6.2 M >= 64 | ||
738 | fmoves 12(%a1),%fp1 | ...fp1 is t | ||
739 | faddx ONEBYSC(%a6),%fp1 | ...fp1 is t+OnebySc | ||
740 | faddx %fp1,%fp0 | ...p+(t+OnebySc), fp1 released | ||
741 | faddx (%a1),%fp0 | ...T+(p+(t+OnebySc)) | ||
742 | bras EM1SCALE | ||
743 | MLE63: | ||
744 | |--Step 6.3 M <= 63 | ||
745 | cmpil #-3,%d0 | ||
746 | bges MGEN3 | ||
747 | MLTN3: | ||
748 | |--Step 6.4 M <= -4 | ||
749 | fadds 12(%a1),%fp0 | ...p+t | ||
750 | faddx (%a1),%fp0 | ...T+(p+t) | ||
751 | faddx ONEBYSC(%a6),%fp0 | ...OnebySc + (T+(p+t)) | ||
752 | bras EM1SCALE | ||
753 | MGEN3: | ||
754 | |--Step 6.5 -3 <= M <= 63 | ||
755 | fmovex (%a1)+,%fp1 | ...fp1 is T | ||
756 | fadds (%a1),%fp0 | ...fp0 is p+t | ||
757 | faddx ONEBYSC(%a6),%fp1 | ...fp1 is T+OnebySc | ||
758 | faddx %fp1,%fp0 | ...(T+OnebySc)+(p+t) | ||
759 | |||
760 | EM1SCALE: | ||
761 | |--Step 6.6 | ||
762 | fmovel %d1,%FPCR | ||
763 | fmulx SC(%a6),%fp0 | ||
764 | |||
765 | bra t_frcinx | ||
766 | |||
767 | EM1SM: | ||
768 | |--Step 7 |X| < 1/4. | ||
769 | cmpil #0x3FBE0000,%d0 | ...2^(-65) | ||
770 | bges EM1POLY | ||
771 | |||
772 | EM1TINY: | ||
773 | |--Step 8 |X| < 2^(-65) | ||
774 | cmpil #0x00330000,%d0 | ...2^(-16312) | ||
775 | blts EM12TINY | ||
776 | |--Step 8.2 | ||
777 | movel #0x80010000,SC(%a6) | ...SC is -2^(-16382) | ||
778 | movel #0x80000000,SC+4(%a6) | ||
779 | clrl SC+8(%a6) | ||
780 | fmovex (%a0),%fp0 | ||
781 | fmovel %d1,%FPCR | ||
782 | faddx SC(%a6),%fp0 | ||
783 | |||
784 | bra t_frcinx | ||
785 | |||
786 | EM12TINY: | ||
787 | |--Step 8.3 | ||
788 | fmovex (%a0),%fp0 | ||
789 | fmuld TWO140,%fp0 | ||
790 | movel #0x80010000,SC(%a6) | ||
791 | movel #0x80000000,SC+4(%a6) | ||
792 | clrl SC+8(%a6) | ||
793 | faddx SC(%a6),%fp0 | ||
794 | fmovel %d1,%FPCR | ||
795 | fmuld TWON140,%fp0 | ||
796 | |||
797 | bra t_frcinx | ||
798 | |||
799 | EM1POLY: | ||
800 | |--Step 9 exp(X)-1 by a simple polynomial | ||
801 | fmovex (%a0),%fp0 | ...fp0 is X | ||
802 | fmulx %fp0,%fp0 | ...fp0 is S := X*X | ||
803 | fmovemx %fp2-%fp2/%fp3,-(%a7) | ...save fp2 | ||
804 | fmoves #0x2F30CAA8,%fp1 | ...fp1 is B12 | ||
805 | fmulx %fp0,%fp1 | ...fp1 is S*B12 | ||
806 | fmoves #0x310F8290,%fp2 | ...fp2 is B11 | ||
807 | fadds #0x32D73220,%fp1 | ...fp1 is B10+S*B12 | ||
808 | |||
809 | fmulx %fp0,%fp2 | ...fp2 is S*B11 | ||
810 | fmulx %fp0,%fp1 | ...fp1 is S*(B10 + ... | ||
811 | |||
812 | fadds #0x3493F281,%fp2 | ...fp2 is B9+S*... | ||
813 | faddd EM1B8,%fp1 | ...fp1 is B8+S*... | ||
814 | |||
815 | fmulx %fp0,%fp2 | ...fp2 is S*(B9+... | ||
816 | fmulx %fp0,%fp1 | ...fp1 is S*(B8+... | ||
817 | |||
818 | faddd EM1B7,%fp2 | ...fp2 is B7+S*... | ||
819 | faddd EM1B6,%fp1 | ...fp1 is B6+S*... | ||
820 | |||
821 | fmulx %fp0,%fp2 | ...fp2 is S*(B7+... | ||
822 | fmulx %fp0,%fp1 | ...fp1 is S*(B6+... | ||
823 | |||
824 | faddd EM1B5,%fp2 | ...fp2 is B5+S*... | ||
825 | faddd EM1B4,%fp1 | ...fp1 is B4+S*... | ||
826 | |||
827 | fmulx %fp0,%fp2 | ...fp2 is S*(B5+... | ||
828 | fmulx %fp0,%fp1 | ...fp1 is S*(B4+... | ||
829 | |||
830 | faddd EM1B3,%fp2 | ...fp2 is B3+S*... | ||
831 | faddx EM1B2,%fp1 | ...fp1 is B2+S*... | ||
832 | |||
833 | fmulx %fp0,%fp2 | ...fp2 is S*(B3+... | ||
834 | fmulx %fp0,%fp1 | ...fp1 is S*(B2+... | ||
835 | |||
836 | fmulx %fp0,%fp2 | ...fp2 is S*S*(B3+...) | ||
837 | fmulx (%a0),%fp1 | ...fp1 is X*S*(B2... | ||
838 | |||
839 | fmuls #0x3F000000,%fp0 | ...fp0 is S*B1 | ||
840 | faddx %fp2,%fp1 | ...fp1 is Q | ||
841 | | ...fp2 released | ||
842 | |||
843 | fmovemx (%a7)+,%fp2-%fp2/%fp3 | ...fp2 restored | ||
844 | |||
845 | faddx %fp1,%fp0 | ...fp0 is S*B1+Q | ||
846 | | ...fp1 released | ||
847 | |||
848 | fmovel %d1,%FPCR | ||
849 | faddx (%a0),%fp0 | ||
850 | |||
851 | bra t_frcinx | ||
852 | |||
853 | EM1BIG: | ||
854 | |--Step 10 |X| > 70 log2 | ||
855 | movel (%a0),%d0 | ||
856 | cmpil #0,%d0 | ||
857 | bgt EXPC1 | ||
858 | |--Step 10.2 | ||
859 | fmoves #0xBF800000,%fp0 | ...fp0 is -1 | ||
860 | fmovel %d1,%FPCR | ||
861 | fadds #0x00800000,%fp0 | ...-1 + 2^(-126) | ||
862 | |||
863 | bra t_frcinx | ||
864 | |||
865 | |end | ||
diff --git a/arch/m68k/fpsp040/sgetem.S b/arch/m68k/fpsp040/sgetem.S new file mode 100644 index 000000000000..0fcbd045ba75 --- /dev/null +++ b/arch/m68k/fpsp040/sgetem.S | |||
@@ -0,0 +1,141 @@ | |||
1 | | | ||
2 | | sgetem.sa 3.1 12/10/90 | ||
3 | | | ||
4 | | The entry point sGETEXP returns the exponent portion | ||
5 | | of the input argument. The exponent bias is removed | ||
6 | | and the exponent value is returned as an extended | ||
7 | | precision number in fp0. sGETEXPD handles denormalized | ||
8 | | numbers. | ||
9 | | | ||
10 | | The entry point sGETMAN extracts the mantissa of the | ||
11 | | input argument. The mantissa is converted to an | ||
12 | | extended precision number and returned in fp0. The | ||
13 | | range of the result is [1.0 - 2.0). | ||
14 | | | ||
15 | | | ||
16 | | Input: Double-extended number X in the ETEMP space in | ||
17 | | the floating-point save stack. | ||
18 | | | ||
19 | | Output: The functions return exp(X) or man(X) in fp0. | ||
20 | | | ||
21 | | Modified: fp0. | ||
22 | | | ||
23 | | | ||
24 | | Copyright (C) Motorola, Inc. 1990 | ||
25 | | All Rights Reserved | ||
26 | | | ||
27 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
28 | | The copyright notice above does not evidence any | ||
29 | | actual or intended publication of such source code. | ||
30 | |||
31 | |SGETEM idnt 2,1 | Motorola 040 Floating Point Software Package | ||
32 | |||
33 | |section 8 | ||
34 | |||
35 | #include "fpsp.h" | ||
36 | |||
37 | |xref nrm_set | ||
38 | |||
39 | | | ||
40 | | This entry point is used by the unimplemented instruction exception | ||
41 | | handler. It points a0 to the input operand. | ||
42 | | | ||
43 | | | ||
44 | | | ||
45 | | SGETEXP | ||
46 | | | ||
47 | |||
48 | .global sgetexp | ||
49 | sgetexp: | ||
50 | movew LOCAL_EX(%a0),%d0 |get the exponent | ||
51 | bclrl #15,%d0 |clear the sign bit | ||
52 | subw #0x3fff,%d0 |subtract off the bias | ||
53 | fmovew %d0,%fp0 |move the exp to fp0 | ||
54 | rts | ||
55 | |||
56 | .global sgetexpd | ||
57 | sgetexpd: | ||
58 | bclrb #sign_bit,LOCAL_EX(%a0) | ||
59 | bsr nrm_set |normalize (exp will go negative) | ||
60 | movew LOCAL_EX(%a0),%d0 |load resulting exponent into d0 | ||
61 | subw #0x3fff,%d0 |subtract off the bias | ||
62 | fmovew %d0,%fp0 |move the exp to fp0 | ||
63 | rts | ||
64 | | | ||
65 | | | ||
66 | | This entry point is used by the unimplemented instruction exception | ||
67 | | handler. It points a0 to the input operand. | ||
68 | | | ||
69 | | | ||
70 | | | ||
71 | | SGETMAN | ||
72 | | | ||
73 | | | ||
74 | | For normalized numbers, leave the mantissa alone, simply load | ||
75 | | with an exponent of +/- $3fff. | ||
76 | | | ||
77 | .global sgetman | ||
78 | sgetman: | ||
79 | movel USER_FPCR(%a6),%d0 | ||
80 | andil #0xffffff00,%d0 |clear rounding precision and mode | ||
81 | fmovel %d0,%fpcr |this fpcr setting is used by the 882 | ||
82 | movew LOCAL_EX(%a0),%d0 |get the exp (really just want sign bit) | ||
83 | orw #0x7fff,%d0 |clear old exp | ||
84 | bclrl #14,%d0 |make it the new exp +-3fff | ||
85 | movew %d0,LOCAL_EX(%a0) |move the sign & exp back to fsave stack | ||
86 | fmovex (%a0),%fp0 |put new value back in fp0 | ||
87 | rts | ||
88 | |||
89 | | | ||
90 | | For denormalized numbers, shift the mantissa until the j-bit = 1, | ||
91 | | then load the exponent with +/1 $3fff. | ||
92 | | | ||
93 | .global sgetmand | ||
94 | sgetmand: | ||
95 | movel LOCAL_HI(%a0),%d0 |load ms mant in d0 | ||
96 | movel LOCAL_LO(%a0),%d1 |load ls mant in d1 | ||
97 | bsr shft |shift mantissa bits till msbit is set | ||
98 | movel %d0,LOCAL_HI(%a0) |put ms mant back on stack | ||
99 | movel %d1,LOCAL_LO(%a0) |put ls mant back on stack | ||
100 | bras sgetman | ||
101 | |||
102 | | | ||
103 | | SHFT | ||
104 | | | ||
105 | | Shifts the mantissa bits until msbit is set. | ||
106 | | input: | ||
107 | | ms mantissa part in d0 | ||
108 | | ls mantissa part in d1 | ||
109 | | output: | ||
110 | | shifted bits in d0 and d1 | ||
111 | shft: | ||
112 | tstl %d0 |if any bits set in ms mant | ||
113 | bnes upper |then branch | ||
114 | | ;else no bits set in ms mant | ||
115 | tstl %d1 |test if any bits set in ls mant | ||
116 | bnes cont |if set then continue | ||
117 | bras shft_end |else return | ||
118 | cont: | ||
119 | movel %d3,-(%a7) |save d3 | ||
120 | exg %d0,%d1 |shift ls mant to ms mant | ||
121 | bfffo %d0{#0:#32},%d3 |find first 1 in ls mant to d0 | ||
122 | lsll %d3,%d0 |shift first 1 to integer bit in ms mant | ||
123 | movel (%a7)+,%d3 |restore d3 | ||
124 | bras shft_end | ||
125 | upper: | ||
126 | |||
127 | moveml %d3/%d5/%d6,-(%a7) |save registers | ||
128 | bfffo %d0{#0:#32},%d3 |find first 1 in ls mant to d0 | ||
129 | lsll %d3,%d0 |shift ms mant until j-bit is set | ||
130 | movel %d1,%d6 |save ls mant in d6 | ||
131 | lsll %d3,%d1 |shift ls mant by count | ||
132 | movel #32,%d5 | ||
133 | subl %d3,%d5 |sub 32 from shift for ls mant | ||
134 | lsrl %d5,%d6 |shift off all bits but those that will | ||
135 | | ;be shifted into ms mant | ||
136 | orl %d6,%d0 |shift the ls mant bits into the ms mant | ||
137 | moveml (%a7)+,%d3/%d5/%d6 |restore registers | ||
138 | shft_end: | ||
139 | rts | ||
140 | |||
141 | |end | ||
diff --git a/arch/m68k/fpsp040/sint.S b/arch/m68k/fpsp040/sint.S new file mode 100644 index 000000000000..0f9bd28e55a0 --- /dev/null +++ b/arch/m68k/fpsp040/sint.S | |||
@@ -0,0 +1,247 @@ | |||
1 | | | ||
2 | | sint.sa 3.1 12/10/90 | ||
3 | | | ||
4 | | The entry point sINT computes the rounded integer | ||
5 | | equivalent of the input argument, sINTRZ computes | ||
6 | | the integer rounded to zero of the input argument. | ||
7 | | | ||
8 | | Entry points sint and sintrz are called from do_func | ||
9 | | to emulate the fint and fintrz unimplemented instructions, | ||
10 | | respectively. Entry point sintdo is used by bindec. | ||
11 | | | ||
12 | | Input: (Entry points sint and sintrz) Double-extended | ||
13 | | number X in the ETEMP space in the floating-point | ||
14 | | save stack. | ||
15 | | (Entry point sintdo) Double-extended number X in | ||
16 | | location pointed to by the address register a0. | ||
17 | | (Entry point sintd) Double-extended denormalized | ||
18 | | number X in the ETEMP space in the floating-point | ||
19 | | save stack. | ||
20 | | | ||
21 | | Output: The function returns int(X) or intrz(X) in fp0. | ||
22 | | | ||
23 | | Modifies: fp0. | ||
24 | | | ||
25 | | Algorithm: (sint and sintrz) | ||
26 | | | ||
27 | | 1. If exp(X) >= 63, return X. | ||
28 | | If exp(X) < 0, return +/- 0 or +/- 1, according to | ||
29 | | the rounding mode. | ||
30 | | | ||
31 | | 2. (X is in range) set rsc = 63 - exp(X). Unnormalize the | ||
32 | | result to the exponent $403e. | ||
33 | | | ||
34 | | 3. Round the result in the mode given in USER_FPCR. For | ||
35 | | sintrz, force round-to-zero mode. | ||
36 | | | ||
37 | | 4. Normalize the rounded result; store in fp0. | ||
38 | | | ||
39 | | For the denormalized cases, force the correct result | ||
40 | | for the given sign and rounding mode. | ||
41 | | | ||
42 | | Sign(X) | ||
43 | | RMODE + - | ||
44 | | ----- -------- | ||
45 | | RN +0 -0 | ||
46 | | RZ +0 -0 | ||
47 | | RM +0 -1 | ||
48 | | RP +1 -0 | ||
49 | | | ||
50 | | | ||
51 | | Copyright (C) Motorola, Inc. 1990 | ||
52 | | All Rights Reserved | ||
53 | | | ||
54 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
55 | | The copyright notice above does not evidence any | ||
56 | | actual or intended publication of such source code. | ||
57 | |||
58 | |SINT idnt 2,1 | Motorola 040 Floating Point Software Package | ||
59 | |||
60 | |section 8 | ||
61 | |||
62 | #include "fpsp.h" | ||
63 | |||
64 | |xref dnrm_lp | ||
65 | |xref nrm_set | ||
66 | |xref round | ||
67 | |xref t_inx2 | ||
68 | |xref ld_pone | ||
69 | |xref ld_mone | ||
70 | |xref ld_pzero | ||
71 | |xref ld_mzero | ||
72 | |xref snzrinx | ||
73 | |||
74 | | | ||
75 | | FINT | ||
76 | | | ||
77 | .global sint | ||
78 | sint: | ||
79 | bfextu FPCR_MODE(%a6){#2:#2},%d1 |use user's mode for rounding | ||
80 | | ;implicitly has extend precision | ||
81 | | ;in upper word. | ||
82 | movel %d1,L_SCR1(%a6) |save mode bits | ||
83 | bras sintexc | ||
84 | |||
85 | | | ||
86 | | FINT with extended denorm inputs. | ||
87 | | | ||
88 | .global sintd | ||
89 | sintd: | ||
90 | btstb #5,FPCR_MODE(%a6) | ||
91 | beq snzrinx |if round nearest or round zero, +/- 0 | ||
92 | btstb #4,FPCR_MODE(%a6) | ||
93 | beqs rnd_mns | ||
94 | rnd_pls: | ||
95 | btstb #sign_bit,LOCAL_EX(%a0) | ||
96 | bnes sintmz | ||
97 | bsr ld_pone |if round plus inf and pos, answer is +1 | ||
98 | bra t_inx2 | ||
99 | rnd_mns: | ||
100 | btstb #sign_bit,LOCAL_EX(%a0) | ||
101 | beqs sintpz | ||
102 | bsr ld_mone |if round mns inf and neg, answer is -1 | ||
103 | bra t_inx2 | ||
104 | sintpz: | ||
105 | bsr ld_pzero | ||
106 | bra t_inx2 | ||
107 | sintmz: | ||
108 | bsr ld_mzero | ||
109 | bra t_inx2 | ||
110 | |||
111 | | | ||
112 | | FINTRZ | ||
113 | | | ||
114 | .global sintrz | ||
115 | sintrz: | ||
116 | movel #1,L_SCR1(%a6) |use rz mode for rounding | ||
117 | | ;implicitly has extend precision | ||
118 | | ;in upper word. | ||
119 | bras sintexc | ||
120 | | | ||
121 | | SINTDO | ||
122 | | | ||
123 | | Input: a0 points to an IEEE extended format operand | ||
124 | | Output: fp0 has the result | ||
125 | | | ||
126 | | Exceptions: | ||
127 | | | ||
128 | | If the subroutine results in an inexact operation, the inx2 and | ||
129 | | ainx bits in the USER_FPSR are set. | ||
130 | | | ||
131 | | | ||
132 | .global sintdo | ||
133 | sintdo: | ||
134 | bfextu FPCR_MODE(%a6){#2:#2},%d1 |use user's mode for rounding | ||
135 | | ;implicitly has ext precision | ||
136 | | ;in upper word. | ||
137 | movel %d1,L_SCR1(%a6) |save mode bits | ||
138 | | | ||
139 | | Real work of sint is in sintexc | ||
140 | | | ||
141 | sintexc: | ||
142 | bclrb #sign_bit,LOCAL_EX(%a0) |convert to internal extended | ||
143 | | ;format | ||
144 | sne LOCAL_SGN(%a0) | ||
145 | cmpw #0x403e,LOCAL_EX(%a0) |check if (unbiased) exp > 63 | ||
146 | bgts out_rnge |branch if exp < 63 | ||
147 | cmpw #0x3ffd,LOCAL_EX(%a0) |check if (unbiased) exp < 0 | ||
148 | bgt in_rnge |if 63 >= exp > 0, do calc | ||
149 | | | ||
150 | | Input is less than zero. Restore sign, and check for directed | ||
151 | | rounding modes. L_SCR1 contains the rmode in the lower byte. | ||
152 | | | ||
153 | un_rnge: | ||
154 | btstb #1,L_SCR1+3(%a6) |check for rn and rz | ||
155 | beqs un_rnrz | ||
156 | tstb LOCAL_SGN(%a0) |check for sign | ||
157 | bnes un_rmrp_neg | ||
158 | | | ||
159 | | Sign is +. If rp, load +1.0, if rm, load +0.0 | ||
160 | | | ||
161 | cmpib #3,L_SCR1+3(%a6) |check for rp | ||
162 | beqs un_ldpone |if rp, load +1.0 | ||
163 | bsr ld_pzero |if rm, load +0.0 | ||
164 | bra t_inx2 | ||
165 | un_ldpone: | ||
166 | bsr ld_pone | ||
167 | bra t_inx2 | ||
168 | | | ||
169 | | Sign is -. If rm, load -1.0, if rp, load -0.0 | ||
170 | | | ||
171 | un_rmrp_neg: | ||
172 | cmpib #2,L_SCR1+3(%a6) |check for rm | ||
173 | beqs un_ldmone |if rm, load -1.0 | ||
174 | bsr ld_mzero |if rp, load -0.0 | ||
175 | bra t_inx2 | ||
176 | un_ldmone: | ||
177 | bsr ld_mone | ||
178 | bra t_inx2 | ||
179 | | | ||
180 | | Rmode is rn or rz; return signed zero | ||
181 | | | ||
182 | un_rnrz: | ||
183 | tstb LOCAL_SGN(%a0) |check for sign | ||
184 | bnes un_rnrz_neg | ||
185 | bsr ld_pzero | ||
186 | bra t_inx2 | ||
187 | un_rnrz_neg: | ||
188 | bsr ld_mzero | ||
189 | bra t_inx2 | ||
190 | |||
191 | | | ||
192 | | Input is greater than 2^63. All bits are significant. Return | ||
193 | | the input. | ||
194 | | | ||
195 | out_rnge: | ||
196 | bfclr LOCAL_SGN(%a0){#0:#8} |change back to IEEE ext format | ||
197 | beqs intps | ||
198 | bsetb #sign_bit,LOCAL_EX(%a0) | ||
199 | intps: | ||
200 | fmovel %fpcr,-(%sp) | ||
201 | fmovel #0,%fpcr | ||
202 | fmovex LOCAL_EX(%a0),%fp0 |if exp > 63 | ||
203 | | ;then return X to the user | ||
204 | | ;there are no fraction bits | ||
205 | fmovel (%sp)+,%fpcr | ||
206 | rts | ||
207 | |||
208 | in_rnge: | ||
209 | | ;shift off fraction bits | ||
210 | clrl %d0 |clear d0 - initial g,r,s for | ||
211 | | ;dnrm_lp | ||
212 | movel #0x403e,%d1 |set threshold for dnrm_lp | ||
213 | | ;assumes a0 points to operand | ||
214 | bsr dnrm_lp | ||
215 | | ;returns unnormalized number | ||
216 | | ;pointed by a0 | ||
217 | | ;output d0 supplies g,r,s | ||
218 | | ;used by round | ||
219 | movel L_SCR1(%a6),%d1 |use selected rounding mode | ||
220 | | | ||
221 | | | ||
222 | bsr round |round the unnorm based on users | ||
223 | | ;input a0 ptr to ext X | ||
224 | | ; d0 g,r,s bits | ||
225 | | ; d1 PREC/MODE info | ||
226 | | ;output a0 ptr to rounded result | ||
227 | | ;inexact flag set in USER_FPSR | ||
228 | | ;if initial grs set | ||
229 | | | ||
230 | | normalize the rounded result and store value in fp0 | ||
231 | | | ||
232 | bsr nrm_set |normalize the unnorm | ||
233 | | ;Input: a0 points to operand to | ||
234 | | ;be normalized | ||
235 | | ;Output: a0 points to normalized | ||
236 | | ;result | ||
237 | bfclr LOCAL_SGN(%a0){#0:#8} | ||
238 | beqs nrmrndp | ||
239 | bsetb #sign_bit,LOCAL_EX(%a0) |return to IEEE extended format | ||
240 | nrmrndp: | ||
241 | fmovel %fpcr,-(%sp) | ||
242 | fmovel #0,%fpcr | ||
243 | fmovex LOCAL_EX(%a0),%fp0 |move result to fp0 | ||
244 | fmovel (%sp)+,%fpcr | ||
245 | rts | ||
246 | |||
247 | |end | ||
diff --git a/arch/m68k/fpsp040/skeleton.S b/arch/m68k/fpsp040/skeleton.S new file mode 100644 index 000000000000..dbc1255a5e99 --- /dev/null +++ b/arch/m68k/fpsp040/skeleton.S | |||
@@ -0,0 +1,516 @@ | |||
1 | | | ||
2 | | skeleton.sa 3.2 4/26/91 | ||
3 | | | ||
4 | | This file contains code that is system dependent and will | ||
5 | | need to be modified to install the FPSP. | ||
6 | | | ||
7 | | Each entry point for exception 'xxxx' begins with a 'jmp fpsp_xxxx'. | ||
8 | | Put any target system specific handling that must be done immediately | ||
9 | | before the jump instruction. If there no handling necessary, then | ||
10 | | the 'fpsp_xxxx' handler entry point should be placed in the exception | ||
11 | | table so that the 'jmp' can be eliminated. If the FPSP determines that the | ||
12 | | exception is one that must be reported then there will be a | ||
13 | | return from the package by a 'jmp real_xxxx'. At that point | ||
14 | | the machine state will be identical to the state before | ||
15 | | the FPSP was entered. In particular, whatever condition | ||
16 | | that caused the exception will still be pending when the FPSP | ||
17 | | package returns. Thus, there will be system specific code | ||
18 | | to handle the exception. | ||
19 | | | ||
20 | | If the exception was completely handled by the package, then | ||
21 | | the return will be via a 'jmp fpsp_done'. Unless there is | ||
22 | | OS specific work to be done (such as handling a context switch or | ||
23 | | interrupt) the user program can be resumed via 'rte'. | ||
24 | | | ||
25 | | In the following skeleton code, some typical 'real_xxxx' handling | ||
26 | | code is shown. This code may need to be moved to an appropriate | ||
27 | | place in the target system, or rewritten. | ||
28 | | | ||
29 | |||
30 | | Copyright (C) Motorola, Inc. 1990 | ||
31 | | All Rights Reserved | ||
32 | | | ||
33 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
34 | | The copyright notice above does not evidence any | ||
35 | | actual or intended publication of such source code. | ||
36 | |||
37 | | | ||
38 | | Modified for Linux-1.3.x by Jes Sorensen (jds@kom.auc.dk) | ||
39 | | | ||
40 | |||
41 | #include <linux/linkage.h> | ||
42 | #include <asm/entry.h> | ||
43 | #include <asm/offsets.h> | ||
44 | |||
45 | |SKELETON idnt 2,1 | Motorola 040 Floating Point Software Package | ||
46 | |||
47 | |section 15 | ||
48 | | | ||
49 | | The following counters are used for standalone testing | ||
50 | | | ||
51 | |||
52 | |section 8 | ||
53 | |||
54 | #include "fpsp.h" | ||
55 | |||
56 | |xref b1238_fix | ||
57 | |||
58 | | | ||
59 | | Divide by Zero exception | ||
60 | | | ||
61 | | All dz exceptions are 'real', hence no fpsp_dz entry point. | ||
62 | | | ||
63 | .global dz | ||
64 | .global real_dz | ||
65 | dz: | ||
66 | real_dz: | ||
67 | link %a6,#-LOCAL_SIZE | ||
68 | fsave -(%sp) | ||
69 | bclrb #E1,E_BYTE(%a6) | ||
70 | frestore (%sp)+ | ||
71 | unlk %a6 | ||
72 | |||
73 | SAVE_ALL_INT | ||
74 | GET_CURRENT(%d0) | ||
75 | movel %sp,%sp@- | stack frame pointer argument | ||
76 | bsrl trap_c | ||
77 | addql #4,%sp | ||
78 | bral ret_from_exception | ||
79 | |||
80 | | | ||
81 | | Inexact exception | ||
82 | | | ||
83 | | All inexact exceptions are real, but the 'real' handler | ||
84 | | will probably want to clear the pending exception. | ||
85 | | The provided code will clear the E3 exception (if pending), | ||
86 | | otherwise clear the E1 exception. The frestore is not really | ||
87 | | necessary for E1 exceptions. | ||
88 | | | ||
89 | | Code following the 'inex' label is to handle bug #1232. In this | ||
90 | | bug, if an E1 snan, ovfl, or unfl occurred, and the process was | ||
91 | | swapped out before taking the exception, the exception taken on | ||
92 | | return was inex, rather than the correct exception. The snan, ovfl, | ||
93 | | and unfl exception to be taken must not have been enabled. The | ||
94 | | fix is to check for E1, and the existence of one of snan, ovfl, | ||
95 | | or unfl bits set in the fpsr. If any of these are set, branch | ||
96 | | to the appropriate handler for the exception in the fpsr. Note | ||
97 | | that this fix is only for d43b parts, and is skipped if the | ||
98 | | version number is not $40. | ||
99 | | | ||
100 | | | ||
101 | .global real_inex | ||
102 | .global inex | ||
103 | inex: | ||
104 | link %a6,#-LOCAL_SIZE | ||
105 | fsave -(%sp) | ||
106 | cmpib #VER_40,(%sp) |test version number | ||
107 | bnes not_fmt40 | ||
108 | fmovel %fpsr,-(%sp) | ||
109 | btstb #E1,E_BYTE(%a6) |test for E1 set | ||
110 | beqs not_b1232 | ||
111 | btstb #snan_bit,2(%sp) |test for snan | ||
112 | beq inex_ckofl | ||
113 | addl #4,%sp | ||
114 | frestore (%sp)+ | ||
115 | unlk %a6 | ||
116 | bra snan | ||
117 | inex_ckofl: | ||
118 | btstb #ovfl_bit,2(%sp) |test for ovfl | ||
119 | beq inex_ckufl | ||
120 | addl #4,%sp | ||
121 | frestore (%sp)+ | ||
122 | unlk %a6 | ||
123 | bra ovfl | ||
124 | inex_ckufl: | ||
125 | btstb #unfl_bit,2(%sp) |test for unfl | ||
126 | beq not_b1232 | ||
127 | addl #4,%sp | ||
128 | frestore (%sp)+ | ||
129 | unlk %a6 | ||
130 | bra unfl | ||
131 | |||
132 | | | ||
133 | | We do not have the bug 1232 case. Clean up the stack and call | ||
134 | | real_inex. | ||
135 | | | ||
136 | not_b1232: | ||
137 | addl #4,%sp | ||
138 | frestore (%sp)+ | ||
139 | unlk %a6 | ||
140 | |||
141 | real_inex: | ||
142 | |||
143 | link %a6,#-LOCAL_SIZE | ||
144 | fsave -(%sp) | ||
145 | not_fmt40: | ||
146 | bclrb #E3,E_BYTE(%a6) |clear and test E3 flag | ||
147 | beqs inex_cke1 | ||
148 | | | ||
149 | | Clear dirty bit on dest resister in the frame before branching | ||
150 | | to b1238_fix. | ||
151 | | | ||
152 | moveml %d0/%d1,USER_DA(%a6) | ||
153 | bfextu CMDREG1B(%a6){#6:#3},%d0 |get dest reg no | ||
154 | bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit | ||
155 | bsrl b1238_fix |test for bug1238 case | ||
156 | moveml USER_DA(%a6),%d0/%d1 | ||
157 | bras inex_done | ||
158 | inex_cke1: | ||
159 | bclrb #E1,E_BYTE(%a6) | ||
160 | inex_done: | ||
161 | frestore (%sp)+ | ||
162 | unlk %a6 | ||
163 | |||
164 | SAVE_ALL_INT | ||
165 | GET_CURRENT(%d0) | ||
166 | movel %sp,%sp@- | stack frame pointer argument | ||
167 | bsrl trap_c | ||
168 | addql #4,%sp | ||
169 | bral ret_from_exception | ||
170 | |||
171 | | | ||
172 | | Overflow exception | ||
173 | | | ||
174 | |xref fpsp_ovfl | ||
175 | .global real_ovfl | ||
176 | .global ovfl | ||
177 | ovfl: | ||
178 | jmp fpsp_ovfl | ||
179 | real_ovfl: | ||
180 | |||
181 | link %a6,#-LOCAL_SIZE | ||
182 | fsave -(%sp) | ||
183 | bclrb #E3,E_BYTE(%a6) |clear and test E3 flag | ||
184 | bnes ovfl_done | ||
185 | bclrb #E1,E_BYTE(%a6) | ||
186 | ovfl_done: | ||
187 | frestore (%sp)+ | ||
188 | unlk %a6 | ||
189 | |||
190 | SAVE_ALL_INT | ||
191 | GET_CURRENT(%d0) | ||
192 | movel %sp,%sp@- | stack frame pointer argument | ||
193 | bsrl trap_c | ||
194 | addql #4,%sp | ||
195 | bral ret_from_exception | ||
196 | |||
197 | | | ||
198 | | Underflow exception | ||
199 | | | ||
200 | |xref fpsp_unfl | ||
201 | .global real_unfl | ||
202 | .global unfl | ||
203 | unfl: | ||
204 | jmp fpsp_unfl | ||
205 | real_unfl: | ||
206 | |||
207 | link %a6,#-LOCAL_SIZE | ||
208 | fsave -(%sp) | ||
209 | bclrb #E3,E_BYTE(%a6) |clear and test E3 flag | ||
210 | bnes unfl_done | ||
211 | bclrb #E1,E_BYTE(%a6) | ||
212 | unfl_done: | ||
213 | frestore (%sp)+ | ||
214 | unlk %a6 | ||
215 | |||
216 | SAVE_ALL_INT | ||
217 | GET_CURRENT(%d0) | ||
218 | movel %sp,%sp@- | stack frame pointer argument | ||
219 | bsrl trap_c | ||
220 | addql #4,%sp | ||
221 | bral ret_from_exception | ||
222 | |||
223 | | | ||
224 | | Signalling NAN exception | ||
225 | | | ||
226 | |xref fpsp_snan | ||
227 | .global real_snan | ||
228 | .global snan | ||
229 | snan: | ||
230 | jmp fpsp_snan | ||
231 | real_snan: | ||
232 | link %a6,#-LOCAL_SIZE | ||
233 | fsave -(%sp) | ||
234 | bclrb #E1,E_BYTE(%a6) |snan is always an E1 exception | ||
235 | frestore (%sp)+ | ||
236 | unlk %a6 | ||
237 | |||
238 | SAVE_ALL_INT | ||
239 | GET_CURRENT(%d0) | ||
240 | movel %sp,%sp@- | stack frame pointer argument | ||
241 | bsrl trap_c | ||
242 | addql #4,%sp | ||
243 | bral ret_from_exception | ||
244 | |||
245 | | | ||
246 | | Operand Error exception | ||
247 | | | ||
248 | |xref fpsp_operr | ||
249 | .global real_operr | ||
250 | .global operr | ||
251 | operr: | ||
252 | jmp fpsp_operr | ||
253 | real_operr: | ||
254 | link %a6,#-LOCAL_SIZE | ||
255 | fsave -(%sp) | ||
256 | bclrb #E1,E_BYTE(%a6) |operr is always an E1 exception | ||
257 | frestore (%sp)+ | ||
258 | unlk %a6 | ||
259 | |||
260 | SAVE_ALL_INT | ||
261 | GET_CURRENT(%d0) | ||
262 | movel %sp,%sp@- | stack frame pointer argument | ||
263 | bsrl trap_c | ||
264 | addql #4,%sp | ||
265 | bral ret_from_exception | ||
266 | |||
267 | |||
268 | | | ||
269 | | BSUN exception | ||
270 | | | ||
271 | | This sample handler simply clears the nan bit in the FPSR. | ||
272 | | | ||
273 | |xref fpsp_bsun | ||
274 | .global real_bsun | ||
275 | .global bsun | ||
276 | bsun: | ||
277 | jmp fpsp_bsun | ||
278 | real_bsun: | ||
279 | link %a6,#-LOCAL_SIZE | ||
280 | fsave -(%sp) | ||
281 | bclrb #E1,E_BYTE(%a6) |bsun is always an E1 exception | ||
282 | fmovel %FPSR,-(%sp) | ||
283 | bclrb #nan_bit,(%sp) | ||
284 | fmovel (%sp)+,%FPSR | ||
285 | frestore (%sp)+ | ||
286 | unlk %a6 | ||
287 | |||
288 | SAVE_ALL_INT | ||
289 | GET_CURRENT(%d0) | ||
290 | movel %sp,%sp@- | stack frame pointer argument | ||
291 | bsrl trap_c | ||
292 | addql #4,%sp | ||
293 | bral ret_from_exception | ||
294 | |||
295 | | | ||
296 | | F-line exception | ||
297 | | | ||
298 | | A 'real' F-line exception is one that the FPSP isn't supposed to | ||
299 | | handle. E.g. an instruction with a co-processor ID that is not 1. | ||
300 | | | ||
301 | | | ||
302 | |xref fpsp_fline | ||
303 | .global real_fline | ||
304 | .global fline | ||
305 | fline: | ||
306 | jmp fpsp_fline | ||
307 | real_fline: | ||
308 | |||
309 | SAVE_ALL_INT | ||
310 | GET_CURRENT(%d0) | ||
311 | movel %sp,%sp@- | stack frame pointer argument | ||
312 | bsrl trap_c | ||
313 | addql #4,%sp | ||
314 | bral ret_from_exception | ||
315 | |||
316 | | | ||
317 | | Unsupported data type exception | ||
318 | | | ||
319 | |xref fpsp_unsupp | ||
320 | .global real_unsupp | ||
321 | .global unsupp | ||
322 | unsupp: | ||
323 | jmp fpsp_unsupp | ||
324 | real_unsupp: | ||
325 | link %a6,#-LOCAL_SIZE | ||
326 | fsave -(%sp) | ||
327 | bclrb #E1,E_BYTE(%a6) |unsupp is always an E1 exception | ||
328 | frestore (%sp)+ | ||
329 | unlk %a6 | ||
330 | |||
331 | SAVE_ALL_INT | ||
332 | GET_CURRENT(%d0) | ||
333 | movel %sp,%sp@- | stack frame pointer argument | ||
334 | bsrl trap_c | ||
335 | addql #4,%sp | ||
336 | bral ret_from_exception | ||
337 | |||
338 | | | ||
339 | | Trace exception | ||
340 | | | ||
341 | .global real_trace | ||
342 | real_trace: | ||
343 | | | ||
344 | bral trap | ||
345 | |||
346 | | | ||
347 | | fpsp_fmt_error --- exit point for frame format error | ||
348 | | | ||
349 | | The fpu stack frame does not match the frames existing | ||
350 | | or planned at the time of this writing. The fpsp is | ||
351 | | unable to handle frame sizes not in the following | ||
352 | | version:size pairs: | ||
353 | | | ||
354 | | {4060, 4160} - busy frame | ||
355 | | {4028, 4130} - unimp frame | ||
356 | | {4000, 4100} - idle frame | ||
357 | | | ||
358 | | This entry point simply holds an f-line illegal value. | ||
359 | | Replace this with a call to your kernel panic code or | ||
360 | | code to handle future revisions of the fpu. | ||
361 | | | ||
362 | .global fpsp_fmt_error | ||
363 | fpsp_fmt_error: | ||
364 | |||
365 | .long 0xf27f0000 |f-line illegal | ||
366 | |||
367 | | | ||
368 | | fpsp_done --- FPSP exit point | ||
369 | | | ||
370 | | The exception has been handled by the package and we are ready | ||
371 | | to return to user mode, but there may be OS specific code | ||
372 | | to execute before we do. If there is, do it now. | ||
373 | | | ||
374 | | | ||
375 | |||
376 | .global fpsp_done | ||
377 | fpsp_done: | ||
378 | btst #0x5,%sp@ | supervisor bit set in saved SR? | ||
379 | beq .Lnotkern | ||
380 | rte | ||
381 | .Lnotkern: | ||
382 | SAVE_ALL_INT | ||
383 | GET_CURRENT(%d0) | ||
384 | tstb %curptr@(TASK_NEEDRESCHED) | ||
385 | jne ret_from_exception | deliver signals, | ||
386 | | reschedule etc.. | ||
387 | RESTORE_ALL | ||
388 | |||
389 | | | ||
390 | | mem_write --- write to user or supervisor address space | ||
391 | | | ||
392 | | Writes to memory while in supervisor mode. copyout accomplishes | ||
393 | | this via a 'moves' instruction. copyout is a UNIX SVR3 (and later) function. | ||
394 | | If you don't have copyout, use the local copy of the function below. | ||
395 | | | ||
396 | | a0 - supervisor source address | ||
397 | | a1 - user destination address | ||
398 | | d0 - number of bytes to write (maximum count is 12) | ||
399 | | | ||
400 | | The supervisor source address is guaranteed to point into the supervisor | ||
401 | | stack. The result is that a UNIX | ||
402 | | process is allowed to sleep as a consequence of a page fault during | ||
403 | | copyout. The probability of a page fault is exceedingly small because | ||
404 | | the 68040 always reads the destination address and thus the page | ||
405 | | faults should have already been handled. | ||
406 | | | ||
407 | | If the EXC_SR shows that the exception was from supervisor space, | ||
408 | | then just do a dumb (and slow) memory move. In a UNIX environment | ||
409 | | there shouldn't be any supervisor mode floating point exceptions. | ||
410 | | | ||
411 | .global mem_write | ||
412 | mem_write: | ||
413 | btstb #5,EXC_SR(%a6) |check for supervisor state | ||
414 | beqs user_write | ||
415 | super_write: | ||
416 | moveb (%a0)+,(%a1)+ | ||
417 | subql #1,%d0 | ||
418 | bnes super_write | ||
419 | rts | ||
420 | user_write: | ||
421 | movel %d1,-(%sp) |preserve d1 just in case | ||
422 | movel %d0,-(%sp) | ||
423 | movel %a1,-(%sp) | ||
424 | movel %a0,-(%sp) | ||
425 | jsr copyout | ||
426 | addw #12,%sp | ||
427 | movel (%sp)+,%d1 | ||
428 | rts | ||
429 | | | ||
430 | | mem_read --- read from user or supervisor address space | ||
431 | | | ||
432 | | Reads from memory while in supervisor mode. copyin accomplishes | ||
433 | | this via a 'moves' instruction. copyin is a UNIX SVR3 (and later) function. | ||
434 | | If you don't have copyin, use the local copy of the function below. | ||
435 | | | ||
436 | | The FPSP calls mem_read to read the original F-line instruction in order | ||
437 | | to extract the data register number when the 'Dn' addressing mode is | ||
438 | | used. | ||
439 | | | ||
440 | |Input: | ||
441 | | a0 - user source address | ||
442 | | a1 - supervisor destination address | ||
443 | | d0 - number of bytes to read (maximum count is 12) | ||
444 | | | ||
445 | | Like mem_write, mem_read always reads with a supervisor | ||
446 | | destination address on the supervisor stack. Also like mem_write, | ||
447 | | the EXC_SR is checked and a simple memory copy is done if reading | ||
448 | | from supervisor space is indicated. | ||
449 | | | ||
450 | .global mem_read | ||
451 | mem_read: | ||
452 | btstb #5,EXC_SR(%a6) |check for supervisor state | ||
453 | beqs user_read | ||
454 | super_read: | ||
455 | moveb (%a0)+,(%a1)+ | ||
456 | subql #1,%d0 | ||
457 | bnes super_read | ||
458 | rts | ||
459 | user_read: | ||
460 | movel %d1,-(%sp) |preserve d1 just in case | ||
461 | movel %d0,-(%sp) | ||
462 | movel %a1,-(%sp) | ||
463 | movel %a0,-(%sp) | ||
464 | jsr copyin | ||
465 | addw #12,%sp | ||
466 | movel (%sp)+,%d1 | ||
467 | rts | ||
468 | |||
469 | | | ||
470 | | Use these routines if your kernel doesn't have copyout/copyin equivalents. | ||
471 | | Assumes that D0/D1/A0/A1 are scratch registers. copyout overwrites DFC, | ||
472 | | and copyin overwrites SFC. | ||
473 | | | ||
474 | copyout: | ||
475 | movel 4(%sp),%a0 | source | ||
476 | movel 8(%sp),%a1 | destination | ||
477 | movel 12(%sp),%d0 | count | ||
478 | subl #1,%d0 | dec count by 1 for dbra | ||
479 | movel #1,%d1 | ||
480 | |||
481 | | DFC is already set | ||
482 | | movec %d1,%DFC | set dfc for user data space | ||
483 | moreout: | ||
484 | moveb (%a0)+,%d1 | fetch supervisor byte | ||
485 | out_ea: | ||
486 | movesb %d1,(%a1)+ | write user byte | ||
487 | dbf %d0,moreout | ||
488 | rts | ||
489 | |||
490 | copyin: | ||
491 | movel 4(%sp),%a0 | source | ||
492 | movel 8(%sp),%a1 | destination | ||
493 | movel 12(%sp),%d0 | count | ||
494 | subl #1,%d0 | dec count by 1 for dbra | ||
495 | movel #1,%d1 | ||
496 | | SFC is already set | ||
497 | | movec %d1,%SFC | set sfc for user space | ||
498 | morein: | ||
499 | in_ea: | ||
500 | movesb (%a0)+,%d1 | fetch user byte | ||
501 | moveb %d1,(%a1)+ | write supervisor byte | ||
502 | dbf %d0,morein | ||
503 | rts | ||
504 | |||
505 | .section .fixup,#alloc,#execinstr | ||
506 | .even | ||
507 | 1: | ||
508 | jbra fpsp040_die | ||
509 | |||
510 | .section __ex_table,#alloc | ||
511 | .align 4 | ||
512 | |||
513 | .long in_ea,1b | ||
514 | .long out_ea,1b | ||
515 | |||
516 | |end | ||
diff --git a/arch/m68k/fpsp040/slog2.S b/arch/m68k/fpsp040/slog2.S new file mode 100644 index 000000000000..517fa4563246 --- /dev/null +++ b/arch/m68k/fpsp040/slog2.S | |||
@@ -0,0 +1,188 @@ | |||
1 | | | ||
2 | | slog2.sa 3.1 12/10/90 | ||
3 | | | ||
4 | | The entry point slog10 computes the base-10 | ||
5 | | logarithm of an input argument X. | ||
6 | | slog10d does the same except the input value is a | ||
7 | | denormalized number. | ||
8 | | sLog2 and sLog2d are the base-2 analogues. | ||
9 | | | ||
10 | | INPUT: Double-extended value in memory location pointed to | ||
11 | | by address register a0. | ||
12 | | | ||
13 | | OUTPUT: log_10(X) or log_2(X) returned in floating-point | ||
14 | | register fp0. | ||
15 | | | ||
16 | | ACCURACY and MONOTONICITY: The returned result is within 1.7 | ||
17 | | ulps in 64 significant bit, i.e. within 0.5003 ulp | ||
18 | | to 53 bits if the result is subsequently rounded | ||
19 | | to double precision. The result is provably monotonic | ||
20 | | in double precision. | ||
21 | | | ||
22 | | SPEED: Two timings are measured, both in the copy-back mode. | ||
23 | | The first one is measured when the function is invoked | ||
24 | | the first time (so the instructions and data are not | ||
25 | | in cache), and the second one is measured when the | ||
26 | | function is reinvoked at the same input argument. | ||
27 | | | ||
28 | | ALGORITHM and IMPLEMENTATION NOTES: | ||
29 | | | ||
30 | | slog10d: | ||
31 | | | ||
32 | | Step 0. If X < 0, create a NaN and raise the invalid operation | ||
33 | | flag. Otherwise, save FPCR in D1; set FpCR to default. | ||
34 | | Notes: Default means round-to-nearest mode, no floating-point | ||
35 | | traps, and precision control = double extended. | ||
36 | | | ||
37 | | Step 1. Call slognd to obtain Y = log(X), the natural log of X. | ||
38 | | Notes: Even if X is denormalized, log(X) is always normalized. | ||
39 | | | ||
40 | | Step 2. Compute log_10(X) = log(X) * (1/log(10)). | ||
41 | | 2.1 Restore the user FPCR | ||
42 | | 2.2 Return ans := Y * INV_L10. | ||
43 | | | ||
44 | | | ||
45 | | slog10: | ||
46 | | | ||
47 | | Step 0. If X < 0, create a NaN and raise the invalid operation | ||
48 | | flag. Otherwise, save FPCR in D1; set FpCR to default. | ||
49 | | Notes: Default means round-to-nearest mode, no floating-point | ||
50 | | traps, and precision control = double extended. | ||
51 | | | ||
52 | | Step 1. Call sLogN to obtain Y = log(X), the natural log of X. | ||
53 | | | ||
54 | | Step 2. Compute log_10(X) = log(X) * (1/log(10)). | ||
55 | | 2.1 Restore the user FPCR | ||
56 | | 2.2 Return ans := Y * INV_L10. | ||
57 | | | ||
58 | | | ||
59 | | sLog2d: | ||
60 | | | ||
61 | | Step 0. If X < 0, create a NaN and raise the invalid operation | ||
62 | | flag. Otherwise, save FPCR in D1; set FpCR to default. | ||
63 | | Notes: Default means round-to-nearest mode, no floating-point | ||
64 | | traps, and precision control = double extended. | ||
65 | | | ||
66 | | Step 1. Call slognd to obtain Y = log(X), the natural log of X. | ||
67 | | Notes: Even if X is denormalized, log(X) is always normalized. | ||
68 | | | ||
69 | | Step 2. Compute log_10(X) = log(X) * (1/log(2)). | ||
70 | | 2.1 Restore the user FPCR | ||
71 | | 2.2 Return ans := Y * INV_L2. | ||
72 | | | ||
73 | | | ||
74 | | sLog2: | ||
75 | | | ||
76 | | Step 0. If X < 0, create a NaN and raise the invalid operation | ||
77 | | flag. Otherwise, save FPCR in D1; set FpCR to default. | ||
78 | | Notes: Default means round-to-nearest mode, no floating-point | ||
79 | | traps, and precision control = double extended. | ||
80 | | | ||
81 | | Step 1. If X is not an integer power of two, i.e., X != 2^k, | ||
82 | | go to Step 3. | ||
83 | | | ||
84 | | Step 2. Return k. | ||
85 | | 2.1 Get integer k, X = 2^k. | ||
86 | | 2.2 Restore the user FPCR. | ||
87 | | 2.3 Return ans := convert-to-double-extended(k). | ||
88 | | | ||
89 | | Step 3. Call sLogN to obtain Y = log(X), the natural log of X. | ||
90 | | | ||
91 | | Step 4. Compute log_2(X) = log(X) * (1/log(2)). | ||
92 | | 4.1 Restore the user FPCR | ||
93 | | 4.2 Return ans := Y * INV_L2. | ||
94 | | | ||
95 | |||
96 | | Copyright (C) Motorola, Inc. 1990 | ||
97 | | All Rights Reserved | ||
98 | | | ||
99 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
100 | | The copyright notice above does not evidence any | ||
101 | | actual or intended publication of such source code. | ||
102 | |||
103 | |SLOG2 idnt 2,1 | Motorola 040 Floating Point Software Package | ||
104 | |||
105 | |section 8 | ||
106 | |||
107 | |xref t_frcinx | ||
108 | |xref t_operr | ||
109 | |xref slogn | ||
110 | |xref slognd | ||
111 | |||
112 | INV_L10: .long 0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000 | ||
113 | |||
114 | INV_L2: .long 0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000 | ||
115 | |||
116 | .global slog10d | ||
117 | slog10d: | ||
118 | |--entry point for Log10(X), X is denormalized | ||
119 | movel (%a0),%d0 | ||
120 | blt invalid | ||
121 | movel %d1,-(%sp) | ||
122 | clrl %d1 | ||
123 | bsr slognd | ...log(X), X denorm. | ||
124 | fmovel (%sp)+,%fpcr | ||
125 | fmulx INV_L10,%fp0 | ||
126 | bra t_frcinx | ||
127 | |||
128 | .global slog10 | ||
129 | slog10: | ||
130 | |--entry point for Log10(X), X is normalized | ||
131 | |||
132 | movel (%a0),%d0 | ||
133 | blt invalid | ||
134 | movel %d1,-(%sp) | ||
135 | clrl %d1 | ||
136 | bsr slogn | ...log(X), X normal. | ||
137 | fmovel (%sp)+,%fpcr | ||
138 | fmulx INV_L10,%fp0 | ||
139 | bra t_frcinx | ||
140 | |||
141 | |||
142 | .global slog2d | ||
143 | slog2d: | ||
144 | |--entry point for Log2(X), X is denormalized | ||
145 | |||
146 | movel (%a0),%d0 | ||
147 | blt invalid | ||
148 | movel %d1,-(%sp) | ||
149 | clrl %d1 | ||
150 | bsr slognd | ...log(X), X denorm. | ||
151 | fmovel (%sp)+,%fpcr | ||
152 | fmulx INV_L2,%fp0 | ||
153 | bra t_frcinx | ||
154 | |||
155 | .global slog2 | ||
156 | slog2: | ||
157 | |--entry point for Log2(X), X is normalized | ||
158 | movel (%a0),%d0 | ||
159 | blt invalid | ||
160 | |||
161 | movel 8(%a0),%d0 | ||
162 | bnes continue | ...X is not 2^k | ||
163 | |||
164 | movel 4(%a0),%d0 | ||
165 | andl #0x7FFFFFFF,%d0 | ||
166 | tstl %d0 | ||
167 | bnes continue | ||
168 | |||
169 | |--X = 2^k. | ||
170 | movew (%a0),%d0 | ||
171 | andl #0x00007FFF,%d0 | ||
172 | subl #0x3FFF,%d0 | ||
173 | fmovel %d1,%fpcr | ||
174 | fmovel %d0,%fp0 | ||
175 | bra t_frcinx | ||
176 | |||
177 | continue: | ||
178 | movel %d1,-(%sp) | ||
179 | clrl %d1 | ||
180 | bsr slogn | ...log(X), X normal. | ||
181 | fmovel (%sp)+,%fpcr | ||
182 | fmulx INV_L2,%fp0 | ||
183 | bra t_frcinx | ||
184 | |||
185 | invalid: | ||
186 | bra t_operr | ||
187 | |||
188 | |end | ||
diff --git a/arch/m68k/fpsp040/slogn.S b/arch/m68k/fpsp040/slogn.S new file mode 100644 index 000000000000..2aaa0725c035 --- /dev/null +++ b/arch/m68k/fpsp040/slogn.S | |||
@@ -0,0 +1,592 @@ | |||
1 | | | ||
2 | | slogn.sa 3.1 12/10/90 | ||
3 | | | ||
4 | | slogn computes the natural logarithm of an | ||
5 | | input value. slognd does the same except the input value is a | ||
6 | | denormalized number. slognp1 computes log(1+X), and slognp1d | ||
7 | | computes log(1+X) for denormalized X. | ||
8 | | | ||
9 | | Input: Double-extended value in memory location pointed to by address | ||
10 | | register a0. | ||
11 | | | ||
12 | | Output: log(X) or log(1+X) returned in floating-point register Fp0. | ||
13 | | | ||
14 | | Accuracy and Monotonicity: The returned result is within 2 ulps in | ||
15 | | 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the | ||
16 | | result is subsequently rounded to double precision. The | ||
17 | | result is provably monotonic in double precision. | ||
18 | | | ||
19 | | Speed: The program slogn takes approximately 190 cycles for input | ||
20 | | argument X such that |X-1| >= 1/16, which is the usual | ||
21 | | situation. For those arguments, slognp1 takes approximately | ||
22 | | 210 cycles. For the less common arguments, the program will | ||
23 | | run no worse than 10% slower. | ||
24 | | | ||
25 | | Algorithm: | ||
26 | | LOGN: | ||
27 | | Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in | ||
28 | | u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2. | ||
29 | | | ||
30 | | Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven | ||
31 | | significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base | ||
32 | | 2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7). | ||
33 | | | ||
34 | | Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u, | ||
35 | | log(1+u) = poly. | ||
36 | | | ||
37 | | Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u) | ||
38 | | by k*log(2) + (log(F) + poly). The values of log(F) are calculated | ||
39 | | beforehand and stored in the program. | ||
40 | | | ||
41 | | lognp1: | ||
42 | | Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in | ||
43 | | u where u = 2X/(2+X). Otherwise, move on to Step 2. | ||
44 | | | ||
45 | | Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2 | ||
46 | | of the algorithm for LOGN and compute log(1+X) as | ||
47 | | k*log(2) + log(F) + poly where poly approximates log(1+u), | ||
48 | | u = (Y-F)/F. | ||
49 | | | ||
50 | | Implementation Notes: | ||
51 | | Note 1. There are 64 different possible values for F, thus 64 log(F)'s | ||
52 | | need to be tabulated. Moreover, the values of 1/F are also | ||
53 | | tabulated so that the division in (Y-F)/F can be performed by a | ||
54 | | multiplication. | ||
55 | | | ||
56 | | Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value | ||
57 | | Y-F has to be calculated carefully when 1/2 <= X < 3/2. | ||
58 | | | ||
59 | | Note 3. To fully exploit the pipeline, polynomials are usually separated | ||
60 | | into two parts evaluated independently before being added up. | ||
61 | | | ||
62 | |||
63 | | Copyright (C) Motorola, Inc. 1990 | ||
64 | | All Rights Reserved | ||
65 | | | ||
66 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
67 | | The copyright notice above does not evidence any | ||
68 | | actual or intended publication of such source code. | ||
69 | |||
70 | |slogn idnt 2,1 | Motorola 040 Floating Point Software Package | ||
71 | |||
72 | |section 8 | ||
73 | |||
74 | #include "fpsp.h" | ||
75 | |||
76 | BOUNDS1: .long 0x3FFEF07D,0x3FFF8841 | ||
77 | BOUNDS2: .long 0x3FFE8000,0x3FFFC000 | ||
78 | |||
79 | LOGOF2: .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000 | ||
80 | |||
81 | one: .long 0x3F800000 | ||
82 | zero: .long 0x00000000 | ||
83 | infty: .long 0x7F800000 | ||
84 | negone: .long 0xBF800000 | ||
85 | |||
86 | LOGA6: .long 0x3FC2499A,0xB5E4040B | ||
87 | LOGA5: .long 0xBFC555B5,0x848CB7DB | ||
88 | |||
89 | LOGA4: .long 0x3FC99999,0x987D8730 | ||
90 | LOGA3: .long 0xBFCFFFFF,0xFF6F7E97 | ||
91 | |||
92 | LOGA2: .long 0x3FD55555,0x555555a4 | ||
93 | LOGA1: .long 0xBFE00000,0x00000008 | ||
94 | |||
95 | LOGB5: .long 0x3F175496,0xADD7DAD6 | ||
96 | LOGB4: .long 0x3F3C71C2,0xFE80C7E0 | ||
97 | |||
98 | LOGB3: .long 0x3F624924,0x928BCCFF | ||
99 | LOGB2: .long 0x3F899999,0x999995EC | ||
100 | |||
101 | LOGB1: .long 0x3FB55555,0x55555555 | ||
102 | TWO: .long 0x40000000,0x00000000 | ||
103 | |||
104 | LTHOLD: .long 0x3f990000,0x80000000,0x00000000,0x00000000 | ||
105 | |||
106 | LOGTBL: | ||
107 | .long 0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000 | ||
108 | .long 0x3FF70000,0xFF015358,0x833C47E2,0x00000000 | ||
109 | .long 0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000 | ||
110 | .long 0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000 | ||
111 | .long 0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000 | ||
112 | .long 0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000 | ||
113 | .long 0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000 | ||
114 | .long 0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000 | ||
115 | .long 0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000 | ||
116 | .long 0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000 | ||
117 | .long 0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000 | ||
118 | .long 0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000 | ||
119 | .long 0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000 | ||
120 | .long 0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000 | ||
121 | .long 0x3FFE0000,0xE525982A,0xF70C880E,0x00000000 | ||
122 | .long 0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000 | ||
123 | .long 0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000 | ||
124 | .long 0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000 | ||
125 | .long 0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000 | ||
126 | .long 0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000 | ||
127 | .long 0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000 | ||
128 | .long 0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000 | ||
129 | .long 0x3FFE0000,0xD901B203,0x6406C80E,0x00000000 | ||
130 | .long 0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000 | ||
131 | .long 0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000 | ||
132 | .long 0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000 | ||
133 | .long 0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000 | ||
134 | .long 0x3FFC0000,0xC3FD0329,0x06488481,0x00000000 | ||
135 | .long 0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000 | ||
136 | .long 0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000 | ||
137 | .long 0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000 | ||
138 | .long 0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000 | ||
139 | .long 0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000 | ||
140 | .long 0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000 | ||
141 | .long 0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000 | ||
142 | .long 0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000 | ||
143 | .long 0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000 | ||
144 | .long 0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000 | ||
145 | .long 0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000 | ||
146 | .long 0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000 | ||
147 | .long 0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000 | ||
148 | .long 0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000 | ||
149 | .long 0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000 | ||
150 | .long 0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000 | ||
151 | .long 0x3FFE0000,0xBD691047,0x07661AA3,0x00000000 | ||
152 | .long 0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000 | ||
153 | .long 0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000 | ||
154 | .long 0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000 | ||
155 | .long 0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000 | ||
156 | .long 0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000 | ||
157 | .long 0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000 | ||
158 | .long 0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000 | ||
159 | .long 0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000 | ||
160 | .long 0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000 | ||
161 | .long 0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000 | ||
162 | .long 0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000 | ||
163 | .long 0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000 | ||
164 | .long 0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000 | ||
165 | .long 0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000 | ||
166 | .long 0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000 | ||
167 | .long 0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000 | ||
168 | .long 0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000 | ||
169 | .long 0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000 | ||
170 | .long 0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000 | ||
171 | .long 0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000 | ||
172 | .long 0x3FFD0000,0xD2420487,0x2DD85160,0x00000000 | ||
173 | .long 0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000 | ||
174 | .long 0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000 | ||
175 | .long 0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000 | ||
176 | .long 0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000 | ||
177 | .long 0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000 | ||
178 | .long 0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000 | ||
179 | .long 0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000 | ||
180 | .long 0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000 | ||
181 | .long 0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000 | ||
182 | .long 0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000 | ||
183 | .long 0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000 | ||
184 | .long 0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000 | ||
185 | .long 0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000 | ||
186 | .long 0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000 | ||
187 | .long 0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000 | ||
188 | .long 0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000 | ||
189 | .long 0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000 | ||
190 | .long 0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000 | ||
191 | .long 0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000 | ||
192 | .long 0x3FFE0000,0x825EFCED,0x49369330,0x00000000 | ||
193 | .long 0x3FFE0000,0x9868C809,0x868C8098,0x00000000 | ||
194 | .long 0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000 | ||
195 | .long 0x3FFE0000,0x97012E02,0x5C04B809,0x00000000 | ||
196 | .long 0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000 | ||
197 | .long 0x3FFE0000,0x95A02568,0x095A0257,0x00000000 | ||
198 | .long 0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000 | ||
199 | .long 0x3FFE0000,0x94458094,0x45809446,0x00000000 | ||
200 | .long 0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000 | ||
201 | .long 0x3FFE0000,0x92F11384,0x0497889C,0x00000000 | ||
202 | .long 0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000 | ||
203 | .long 0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000 | ||
204 | .long 0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000 | ||
205 | .long 0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000 | ||
206 | .long 0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000 | ||
207 | .long 0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000 | ||
208 | .long 0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000 | ||
209 | .long 0x3FFE0000,0x8DDA5202,0x37694809,0x00000000 | ||
210 | .long 0x3FFE0000,0x9723A1B7,0x20134203,0x00000000 | ||
211 | .long 0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000 | ||
212 | .long 0x3FFE0000,0x995899C8,0x90EB8990,0x00000000 | ||
213 | .long 0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000 | ||
214 | .long 0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000 | ||
215 | .long 0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000 | ||
216 | .long 0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000 | ||
217 | .long 0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000 | ||
218 | .long 0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000 | ||
219 | .long 0x3FFE0000,0x87F78087,0xF78087F8,0x00000000 | ||
220 | .long 0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000 | ||
221 | .long 0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000 | ||
222 | .long 0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000 | ||
223 | .long 0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000 | ||
224 | .long 0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000 | ||
225 | .long 0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000 | ||
226 | .long 0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000 | ||
227 | .long 0x3FFE0000,0x83993052,0x3FBE3368,0x00000000 | ||
228 | .long 0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000 | ||
229 | .long 0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000 | ||
230 | .long 0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000 | ||
231 | .long 0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000 | ||
232 | .long 0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000 | ||
233 | .long 0x3FFE0000,0x80808080,0x80808081,0x00000000 | ||
234 | .long 0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000 | ||
235 | |||
236 | .set ADJK,L_SCR1 | ||
237 | |||
238 | .set X,FP_SCR1 | ||
239 | .set XDCARE,X+2 | ||
240 | .set XFRAC,X+4 | ||
241 | |||
242 | .set F,FP_SCR2 | ||
243 | .set FFRAC,F+4 | ||
244 | |||
245 | .set KLOG2,FP_SCR3 | ||
246 | |||
247 | .set SAVEU,FP_SCR4 | ||
248 | |||
249 | | xref t_frcinx | ||
250 | |xref t_extdnrm | ||
251 | |xref t_operr | ||
252 | |xref t_dz | ||
253 | |||
254 | .global slognd | ||
255 | slognd: | ||
256 | |--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT | ||
257 | |||
258 | movel #-100,ADJK(%a6) | ...INPUT = 2^(ADJK) * FP0 | ||
259 | |||
260 | |----normalize the input value by left shifting k bits (k to be determined | ||
261 | |----below), adjusting exponent and storing -k to ADJK | ||
262 | |----the value TWOTO100 is no longer needed. | ||
263 | |----Note that this code assumes the denormalized input is NON-ZERO. | ||
264 | |||
265 | moveml %d2-%d7,-(%a7) | ...save some registers | ||
266 | movel #0x00000000,%d3 | ...D3 is exponent of smallest norm. # | ||
267 | movel 4(%a0),%d4 | ||
268 | movel 8(%a0),%d5 | ...(D4,D5) is (Hi_X,Lo_X) | ||
269 | clrl %d2 | ...D2 used for holding K | ||
270 | |||
271 | tstl %d4 | ||
272 | bnes HiX_not0 | ||
273 | |||
274 | HiX_0: | ||
275 | movel %d5,%d4 | ||
276 | clrl %d5 | ||
277 | movel #32,%d2 | ||
278 | clrl %d6 | ||
279 | bfffo %d4{#0:#32},%d6 | ||
280 | lsll %d6,%d4 | ||
281 | addl %d6,%d2 | ...(D3,D4,D5) is normalized | ||
282 | |||
283 | movel %d3,X(%a6) | ||
284 | movel %d4,XFRAC(%a6) | ||
285 | movel %d5,XFRAC+4(%a6) | ||
286 | negl %d2 | ||
287 | movel %d2,ADJK(%a6) | ||
288 | fmovex X(%a6),%fp0 | ||
289 | moveml (%a7)+,%d2-%d7 | ...restore registers | ||
290 | lea X(%a6),%a0 | ||
291 | bras LOGBGN | ...begin regular log(X) | ||
292 | |||
293 | |||
294 | HiX_not0: | ||
295 | clrl %d6 | ||
296 | bfffo %d4{#0:#32},%d6 | ...find first 1 | ||
297 | movel %d6,%d2 | ...get k | ||
298 | lsll %d6,%d4 | ||
299 | movel %d5,%d7 | ...a copy of D5 | ||
300 | lsll %d6,%d5 | ||
301 | negl %d6 | ||
302 | addil #32,%d6 | ||
303 | lsrl %d6,%d7 | ||
304 | orl %d7,%d4 | ...(D3,D4,D5) normalized | ||
305 | |||
306 | movel %d3,X(%a6) | ||
307 | movel %d4,XFRAC(%a6) | ||
308 | movel %d5,XFRAC+4(%a6) | ||
309 | negl %d2 | ||
310 | movel %d2,ADJK(%a6) | ||
311 | fmovex X(%a6),%fp0 | ||
312 | moveml (%a7)+,%d2-%d7 | ...restore registers | ||
313 | lea X(%a6),%a0 | ||
314 | bras LOGBGN | ...begin regular log(X) | ||
315 | |||
316 | |||
317 | .global slogn | ||
318 | slogn: | ||
319 | |--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S | ||
320 | |||
321 | fmovex (%a0),%fp0 | ...LOAD INPUT | ||
322 | movel #0x00000000,ADJK(%a6) | ||
323 | |||
324 | LOGBGN: | ||
325 | |--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS | ||
326 | |--A FINITE, NON-ZERO, NORMALIZED NUMBER. | ||
327 | |||
328 | movel (%a0),%d0 | ||
329 | movew 4(%a0),%d0 | ||
330 | |||
331 | movel (%a0),X(%a6) | ||
332 | movel 4(%a0),X+4(%a6) | ||
333 | movel 8(%a0),X+8(%a6) | ||
334 | |||
335 | cmpil #0,%d0 | ...CHECK IF X IS NEGATIVE | ||
336 | blt LOGNEG | ...LOG OF NEGATIVE ARGUMENT IS INVALID | ||
337 | cmp2l BOUNDS1,%d0 | ...X IS POSITIVE, CHECK IF X IS NEAR 1 | ||
338 | bcc LOGNEAR1 | ...BOUNDS IS ROUGHLY [15/16, 17/16] | ||
339 | |||
340 | LOGMAIN: | ||
341 | |--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1 | ||
342 | |||
343 | |--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY. | ||
344 | |--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1. | ||
345 | |--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y) | ||
346 | |-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F). | ||
347 | |--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING | ||
348 | |--LOG(1+U) CAN BE VERY EFFICIENT. | ||
349 | |--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO | ||
350 | |--DIVISION IS NEEDED TO CALCULATE (Y-F)/F. | ||
351 | |||
352 | |--GET K, Y, F, AND ADDRESS OF 1/F. | ||
353 | asrl #8,%d0 | ||
354 | asrl #8,%d0 | ...SHIFTED 16 BITS, BIASED EXPO. OF X | ||
355 | subil #0x3FFF,%d0 | ...THIS IS K | ||
356 | addl ADJK(%a6),%d0 | ...ADJUST K, ORIGINAL INPUT MAY BE DENORM. | ||
357 | lea LOGTBL,%a0 | ...BASE ADDRESS OF 1/F AND LOG(F) | ||
358 | fmovel %d0,%fp1 | ...CONVERT K TO FLOATING-POINT FORMAT | ||
359 | |||
360 | |--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F | ||
361 | movel #0x3FFF0000,X(%a6) | ...X IS NOW Y, I.E. 2^(-K)*X | ||
362 | movel XFRAC(%a6),FFRAC(%a6) | ||
363 | andil #0xFE000000,FFRAC(%a6) | ...FIRST 7 BITS OF Y | ||
364 | oril #0x01000000,FFRAC(%a6) | ...GET F: ATTACH A 1 AT THE EIGHTH BIT | ||
365 | movel FFRAC(%a6),%d0 | ...READY TO GET ADDRESS OF 1/F | ||
366 | andil #0x7E000000,%d0 | ||
367 | asrl #8,%d0 | ||
368 | asrl #8,%d0 | ||
369 | asrl #4,%d0 | ...SHIFTED 20, D0 IS THE DISPLACEMENT | ||
370 | addal %d0,%a0 | ...A0 IS THE ADDRESS FOR 1/F | ||
371 | |||
372 | fmovex X(%a6),%fp0 | ||
373 | movel #0x3fff0000,F(%a6) | ||
374 | clrl F+8(%a6) | ||
375 | fsubx F(%a6),%fp0 | ...Y-F | ||
376 | fmovemx %fp2-%fp2/%fp3,-(%sp) | ...SAVE FP2 WHILE FP0 IS NOT READY | ||
377 | |--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K | ||
378 | |--REGISTERS SAVED: FPCR, FP1, FP2 | ||
379 | |||
380 | LP1CONT1: | ||
381 | |--AN RE-ENTRY POINT FOR LOGNP1 | ||
382 | fmulx (%a0),%fp0 | ...FP0 IS U = (Y-F)/F | ||
383 | fmulx LOGOF2,%fp1 | ...GET K*LOG2 WHILE FP0 IS NOT READY | ||
384 | fmovex %fp0,%fp2 | ||
385 | fmulx %fp2,%fp2 | ...FP2 IS V=U*U | ||
386 | fmovex %fp1,KLOG2(%a6) | ...PUT K*LOG2 IN MEMORY, FREE FP1 | ||
387 | |||
388 | |--LOG(1+U) IS APPROXIMATED BY | ||
389 | |--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS | ||
390 | |--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))] | ||
391 | |||
392 | fmovex %fp2,%fp3 | ||
393 | fmovex %fp2,%fp1 | ||
394 | |||
395 | fmuld LOGA6,%fp1 | ...V*A6 | ||
396 | fmuld LOGA5,%fp2 | ...V*A5 | ||
397 | |||
398 | faddd LOGA4,%fp1 | ...A4+V*A6 | ||
399 | faddd LOGA3,%fp2 | ...A3+V*A5 | ||
400 | |||
401 | fmulx %fp3,%fp1 | ...V*(A4+V*A6) | ||
402 | fmulx %fp3,%fp2 | ...V*(A3+V*A5) | ||
403 | |||
404 | faddd LOGA2,%fp1 | ...A2+V*(A4+V*A6) | ||
405 | faddd LOGA1,%fp2 | ...A1+V*(A3+V*A5) | ||
406 | |||
407 | fmulx %fp3,%fp1 | ...V*(A2+V*(A4+V*A6)) | ||
408 | addal #16,%a0 | ...ADDRESS OF LOG(F) | ||
409 | fmulx %fp3,%fp2 | ...V*(A1+V*(A3+V*A5)), FP3 RELEASED | ||
410 | |||
411 | fmulx %fp0,%fp1 | ...U*V*(A2+V*(A4+V*A6)) | ||
412 | faddx %fp2,%fp0 | ...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED | ||
413 | |||
414 | faddx (%a0),%fp1 | ...LOG(F)+U*V*(A2+V*(A4+V*A6)) | ||
415 | fmovemx (%sp)+,%fp2-%fp2/%fp3 | ...RESTORE FP2 | ||
416 | faddx %fp1,%fp0 | ...FP0 IS LOG(F) + LOG(1+U) | ||
417 | |||
418 | fmovel %d1,%fpcr | ||
419 | faddx KLOG2(%a6),%fp0 | ...FINAL ADD | ||
420 | bra t_frcinx | ||
421 | |||
422 | |||
423 | LOGNEAR1: | ||
424 | |--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT. | ||
425 | fmovex %fp0,%fp1 | ||
426 | fsubs one,%fp1 | ...FP1 IS X-1 | ||
427 | fadds one,%fp0 | ...FP0 IS X+1 | ||
428 | faddx %fp1,%fp1 | ...FP1 IS 2(X-1) | ||
429 | |--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL | ||
430 | |--IN U, U = 2(X-1)/(X+1) = FP1/FP0 | ||
431 | |||
432 | LP1CONT2: | ||
433 | |--THIS IS AN RE-ENTRY POINT FOR LOGNP1 | ||
434 | fdivx %fp0,%fp1 | ...FP1 IS U | ||
435 | fmovemx %fp2-%fp2/%fp3,-(%sp) | ...SAVE FP2 | ||
436 | |--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3 | ||
437 | |--LET V=U*U, W=V*V, CALCULATE | ||
438 | |--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY | ||
439 | |--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] ) | ||
440 | fmovex %fp1,%fp0 | ||
441 | fmulx %fp0,%fp0 | ...FP0 IS V | ||
442 | fmovex %fp1,SAVEU(%a6) | ...STORE U IN MEMORY, FREE FP1 | ||
443 | fmovex %fp0,%fp1 | ||
444 | fmulx %fp1,%fp1 | ...FP1 IS W | ||
445 | |||
446 | fmoved LOGB5,%fp3 | ||
447 | fmoved LOGB4,%fp2 | ||
448 | |||
449 | fmulx %fp1,%fp3 | ...W*B5 | ||
450 | fmulx %fp1,%fp2 | ...W*B4 | ||
451 | |||
452 | faddd LOGB3,%fp3 | ...B3+W*B5 | ||
453 | faddd LOGB2,%fp2 | ...B2+W*B4 | ||
454 | |||
455 | fmulx %fp3,%fp1 | ...W*(B3+W*B5), FP3 RELEASED | ||
456 | |||
457 | fmulx %fp0,%fp2 | ...V*(B2+W*B4) | ||
458 | |||
459 | faddd LOGB1,%fp1 | ...B1+W*(B3+W*B5) | ||
460 | fmulx SAVEU(%a6),%fp0 | ...FP0 IS U*V | ||
461 | |||
462 | faddx %fp2,%fp1 | ...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED | ||
463 | fmovemx (%sp)+,%fp2-%fp2/%fp3 | ...FP2 RESTORED | ||
464 | |||
465 | fmulx %fp1,%fp0 | ...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] ) | ||
466 | |||
467 | fmovel %d1,%fpcr | ||
468 | faddx SAVEU(%a6),%fp0 | ||
469 | bra t_frcinx | ||
470 | rts | ||
471 | |||
472 | LOGNEG: | ||
473 | |--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID | ||
474 | bra t_operr | ||
475 | |||
476 | .global slognp1d | ||
477 | slognp1d: | ||
478 | |--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT | ||
479 | | Simply return the denorm | ||
480 | |||
481 | bra t_extdnrm | ||
482 | |||
483 | .global slognp1 | ||
484 | slognp1: | ||
485 | |--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S | ||
486 | |||
487 | fmovex (%a0),%fp0 | ...LOAD INPUT | ||
488 | fabsx %fp0 |test magnitude | ||
489 | fcmpx LTHOLD,%fp0 |compare with min threshold | ||
490 | fbgt LP1REAL |if greater, continue | ||
491 | fmovel #0,%fpsr |clr N flag from compare | ||
492 | fmovel %d1,%fpcr | ||
493 | fmovex (%a0),%fp0 |return signed argument | ||
494 | bra t_frcinx | ||
495 | |||
496 | LP1REAL: | ||
497 | fmovex (%a0),%fp0 | ...LOAD INPUT | ||
498 | movel #0x00000000,ADJK(%a6) | ||
499 | fmovex %fp0,%fp1 | ...FP1 IS INPUT Z | ||
500 | fadds one,%fp0 | ...X := ROUND(1+Z) | ||
501 | fmovex %fp0,X(%a6) | ||
502 | movew XFRAC(%a6),XDCARE(%a6) | ||
503 | movel X(%a6),%d0 | ||
504 | cmpil #0,%d0 | ||
505 | ble LP1NEG0 | ...LOG OF ZERO OR -VE | ||
506 | cmp2l BOUNDS2,%d0 | ||
507 | bcs LOGMAIN | ...BOUNDS2 IS [1/2,3/2] | ||
508 | |--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z, | ||
509 | |--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE, | ||
510 | |--SIMPLY INVOKE LOG(X) FOR LOG(1+Z). | ||
511 | |||
512 | LP1NEAR1: | ||
513 | |--NEXT SEE IF EXP(-1/16) < X < EXP(1/16) | ||
514 | cmp2l BOUNDS1,%d0 | ||
515 | bcss LP1CARE | ||
516 | |||
517 | LP1ONE16: | ||
518 | |--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2) | ||
519 | |--WHERE U = 2Z/(2+Z) = 2Z/(1+X). | ||
520 | faddx %fp1,%fp1 | ...FP1 IS 2Z | ||
521 | fadds one,%fp0 | ...FP0 IS 1+X | ||
522 | |--U = FP1/FP0 | ||
523 | bra LP1CONT2 | ||
524 | |||
525 | LP1CARE: | ||
526 | |--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE | ||
527 | |--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST | ||
528 | |--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2], | ||
529 | |--THERE ARE ONLY TWO CASES. | ||
530 | |--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z | ||
531 | |--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z | ||
532 | |--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF | ||
533 | |--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED. | ||
534 | |||
535 | movel XFRAC(%a6),FFRAC(%a6) | ||
536 | andil #0xFE000000,FFRAC(%a6) | ||
537 | oril #0x01000000,FFRAC(%a6) | ...F OBTAINED | ||
538 | cmpil #0x3FFF8000,%d0 | ...SEE IF 1+Z > 1 | ||
539 | bges KISZERO | ||
540 | |||
541 | KISNEG1: | ||
542 | fmoves TWO,%fp0 | ||
543 | movel #0x3fff0000,F(%a6) | ||
544 | clrl F+8(%a6) | ||
545 | fsubx F(%a6),%fp0 | ...2-F | ||
546 | movel FFRAC(%a6),%d0 | ||
547 | andil #0x7E000000,%d0 | ||
548 | asrl #8,%d0 | ||
549 | asrl #8,%d0 | ||
550 | asrl #4,%d0 | ...D0 CONTAINS DISPLACEMENT FOR 1/F | ||
551 | faddx %fp1,%fp1 | ...GET 2Z | ||
552 | fmovemx %fp2-%fp2/%fp3,-(%sp) | ...SAVE FP2 | ||
553 | faddx %fp1,%fp0 | ...FP0 IS Y-F = (2-F)+2Z | ||
554 | lea LOGTBL,%a0 | ...A0 IS ADDRESS OF 1/F | ||
555 | addal %d0,%a0 | ||
556 | fmoves negone,%fp1 | ...FP1 IS K = -1 | ||
557 | bra LP1CONT1 | ||
558 | |||
559 | KISZERO: | ||
560 | fmoves one,%fp0 | ||
561 | movel #0x3fff0000,F(%a6) | ||
562 | clrl F+8(%a6) | ||
563 | fsubx F(%a6),%fp0 | ...1-F | ||
564 | movel FFRAC(%a6),%d0 | ||
565 | andil #0x7E000000,%d0 | ||
566 | asrl #8,%d0 | ||
567 | asrl #8,%d0 | ||
568 | asrl #4,%d0 | ||
569 | faddx %fp1,%fp0 | ...FP0 IS Y-F | ||
570 | fmovemx %fp2-%fp2/%fp3,-(%sp) | ...FP2 SAVED | ||
571 | lea LOGTBL,%a0 | ||
572 | addal %d0,%a0 | ...A0 IS ADDRESS OF 1/F | ||
573 | fmoves zero,%fp1 | ...FP1 IS K = 0 | ||
574 | bra LP1CONT1 | ||
575 | |||
576 | LP1NEG0: | ||
577 | |--FPCR SAVED. D0 IS X IN COMPACT FORM. | ||
578 | cmpil #0,%d0 | ||
579 | blts LP1NEG | ||
580 | LP1ZERO: | ||
581 | fmoves negone,%fp0 | ||
582 | |||
583 | fmovel %d1,%fpcr | ||
584 | bra t_dz | ||
585 | |||
586 | LP1NEG: | ||
587 | fmoves zero,%fp0 | ||
588 | |||
589 | fmovel %d1,%fpcr | ||
590 | bra t_operr | ||
591 | |||
592 | |end | ||
diff --git a/arch/m68k/fpsp040/smovecr.S b/arch/m68k/fpsp040/smovecr.S new file mode 100644 index 000000000000..a0127fa55e9c --- /dev/null +++ b/arch/m68k/fpsp040/smovecr.S | |||
@@ -0,0 +1,162 @@ | |||
1 | | | ||
2 | | smovecr.sa 3.1 12/10/90 | ||
3 | | | ||
4 | | The entry point sMOVECR returns the constant at the | ||
5 | | offset given in the instruction field. | ||
6 | | | ||
7 | | Input: An offset in the instruction word. | ||
8 | | | ||
9 | | Output: The constant rounded to the user's rounding | ||
10 | | mode unchecked for overflow. | ||
11 | | | ||
12 | | Modified: fp0. | ||
13 | | | ||
14 | | | ||
15 | | Copyright (C) Motorola, Inc. 1990 | ||
16 | | All Rights Reserved | ||
17 | | | ||
18 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
19 | | The copyright notice above does not evidence any | ||
20 | | actual or intended publication of such source code. | ||
21 | |||
22 | |SMOVECR idnt 2,1 | Motorola 040 Floating Point Software Package | ||
23 | |||
24 | |section 8 | ||
25 | |||
26 | #include "fpsp.h" | ||
27 | |||
28 | |xref nrm_set | ||
29 | |xref round | ||
30 | |xref PIRN | ||
31 | |xref PIRZRM | ||
32 | |xref PIRP | ||
33 | |xref SMALRN | ||
34 | |xref SMALRZRM | ||
35 | |xref SMALRP | ||
36 | |xref BIGRN | ||
37 | |xref BIGRZRM | ||
38 | |xref BIGRP | ||
39 | |||
40 | FZERO: .long 00000000 | ||
41 | | | ||
42 | | FMOVECR | ||
43 | | | ||
44 | .global smovcr | ||
45 | smovcr: | ||
46 | bfextu CMDREG1B(%a6){#9:#7},%d0 |get offset | ||
47 | bfextu USER_FPCR(%a6){#26:#2},%d1 |get rmode | ||
48 | | | ||
49 | | check range of offset | ||
50 | | | ||
51 | tstb %d0 |if zero, offset is to pi | ||
52 | beqs PI_TBL |it is pi | ||
53 | cmpib #0x0a,%d0 |check range $01 - $0a | ||
54 | bles Z_VAL |if in this range, return zero | ||
55 | cmpib #0x0e,%d0 |check range $0b - $0e | ||
56 | bles SM_TBL |valid constants in this range | ||
57 | cmpib #0x2f,%d0 |check range $10 - $2f | ||
58 | bles Z_VAL |if in this range, return zero | ||
59 | cmpib #0x3f,%d0 |check range $30 - $3f | ||
60 | ble BG_TBL |valid constants in this range | ||
61 | Z_VAL: | ||
62 | fmoves FZERO,%fp0 | ||
63 | rts | ||
64 | PI_TBL: | ||
65 | tstb %d1 |offset is zero, check for rmode | ||
66 | beqs PI_RN |if zero, rn mode | ||
67 | cmpib #0x3,%d1 |check for rp | ||
68 | beqs PI_RP |if 3, rp mode | ||
69 | PI_RZRM: | ||
70 | leal PIRZRM,%a0 |rmode is rz or rm, load PIRZRM in a0 | ||
71 | bra set_finx | ||
72 | PI_RN: | ||
73 | leal PIRN,%a0 |rmode is rn, load PIRN in a0 | ||
74 | bra set_finx | ||
75 | PI_RP: | ||
76 | leal PIRP,%a0 |rmode is rp, load PIRP in a0 | ||
77 | bra set_finx | ||
78 | SM_TBL: | ||
79 | subil #0xb,%d0 |make offset in 0 - 4 range | ||
80 | tstb %d1 |check for rmode | ||
81 | beqs SM_RN |if zero, rn mode | ||
82 | cmpib #0x3,%d1 |check for rp | ||
83 | beqs SM_RP |if 3, rp mode | ||
84 | SM_RZRM: | ||
85 | leal SMALRZRM,%a0 |rmode is rz or rm, load SMRZRM in a0 | ||
86 | cmpib #0x2,%d0 |check if result is inex | ||
87 | ble set_finx |if 0 - 2, it is inexact | ||
88 | bra no_finx |if 3, it is exact | ||
89 | SM_RN: | ||
90 | leal SMALRN,%a0 |rmode is rn, load SMRN in a0 | ||
91 | cmpib #0x2,%d0 |check if result is inex | ||
92 | ble set_finx |if 0 - 2, it is inexact | ||
93 | bra no_finx |if 3, it is exact | ||
94 | SM_RP: | ||
95 | leal SMALRP,%a0 |rmode is rp, load SMRP in a0 | ||
96 | cmpib #0x2,%d0 |check if result is inex | ||
97 | ble set_finx |if 0 - 2, it is inexact | ||
98 | bra no_finx |if 3, it is exact | ||
99 | BG_TBL: | ||
100 | subil #0x30,%d0 |make offset in 0 - f range | ||
101 | tstb %d1 |check for rmode | ||
102 | beqs BG_RN |if zero, rn mode | ||
103 | cmpib #0x3,%d1 |check for rp | ||
104 | beqs BG_RP |if 3, rp mode | ||
105 | BG_RZRM: | ||
106 | leal BIGRZRM,%a0 |rmode is rz or rm, load BGRZRM in a0 | ||
107 | cmpib #0x1,%d0 |check if result is inex | ||
108 | ble set_finx |if 0 - 1, it is inexact | ||
109 | cmpib #0x7,%d0 |second check | ||
110 | ble no_finx |if 0 - 7, it is exact | ||
111 | bra set_finx |if 8 - f, it is inexact | ||
112 | BG_RN: | ||
113 | leal BIGRN,%a0 |rmode is rn, load BGRN in a0 | ||
114 | cmpib #0x1,%d0 |check if result is inex | ||
115 | ble set_finx |if 0 - 1, it is inexact | ||
116 | cmpib #0x7,%d0 |second check | ||
117 | ble no_finx |if 0 - 7, it is exact | ||
118 | bra set_finx |if 8 - f, it is inexact | ||
119 | BG_RP: | ||
120 | leal BIGRP,%a0 |rmode is rp, load SMRP in a0 | ||
121 | cmpib #0x1,%d0 |check if result is inex | ||
122 | ble set_finx |if 0 - 1, it is inexact | ||
123 | cmpib #0x7,%d0 |second check | ||
124 | ble no_finx |if 0 - 7, it is exact | ||
125 | | bra set_finx ;if 8 - f, it is inexact | ||
126 | set_finx: | ||
127 | orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex | ||
128 | no_finx: | ||
129 | mulul #12,%d0 |use offset to point into tables | ||
130 | movel %d1,L_SCR1(%a6) |load mode for round call | ||
131 | bfextu USER_FPCR(%a6){#24:#2},%d1 |get precision | ||
132 | tstl %d1 |check if extended precision | ||
133 | | | ||
134 | | Precision is extended | ||
135 | | | ||
136 | bnes not_ext |if extended, do not call round | ||
137 | fmovemx (%a0,%d0),%fp0-%fp0 |return result in fp0 | ||
138 | rts | ||
139 | | | ||
140 | | Precision is single or double | ||
141 | | | ||
142 | not_ext: | ||
143 | swap %d1 |rnd prec in upper word of d1 | ||
144 | addl L_SCR1(%a6),%d1 |merge rmode in low word of d1 | ||
145 | movel (%a0,%d0),FP_SCR1(%a6) |load first word to temp storage | ||
146 | movel 4(%a0,%d0),FP_SCR1+4(%a6) |load second word | ||
147 | movel 8(%a0,%d0),FP_SCR1+8(%a6) |load third word | ||
148 | clrl %d0 |clear g,r,s | ||
149 | lea FP_SCR1(%a6),%a0 | ||
150 | btstb #sign_bit,LOCAL_EX(%a0) | ||
151 | sne LOCAL_SGN(%a0) |convert to internal ext. format | ||
152 | |||
153 | bsr round |go round the mantissa | ||
154 | |||
155 | bfclr LOCAL_SGN(%a0){#0:#8} |convert back to IEEE ext format | ||
156 | beqs fin_fcr | ||
157 | bsetb #sign_bit,LOCAL_EX(%a0) | ||
158 | fin_fcr: | ||
159 | fmovemx (%a0),%fp0-%fp0 | ||
160 | rts | ||
161 | |||
162 | |end | ||
diff --git a/arch/m68k/fpsp040/srem_mod.S b/arch/m68k/fpsp040/srem_mod.S new file mode 100644 index 000000000000..8c8d7f50cc68 --- /dev/null +++ b/arch/m68k/fpsp040/srem_mod.S | |||
@@ -0,0 +1,422 @@ | |||
1 | | | ||
2 | | srem_mod.sa 3.1 12/10/90 | ||
3 | | | ||
4 | | The entry point sMOD computes the floating point MOD of the | ||
5 | | input values X and Y. The entry point sREM computes the floating | ||
6 | | point (IEEE) REM of the input values X and Y. | ||
7 | | | ||
8 | | INPUT | ||
9 | | ----- | ||
10 | | Double-extended value Y is pointed to by address in register | ||
11 | | A0. Double-extended value X is located in -12(A0). The values | ||
12 | | of X and Y are both nonzero and finite; although either or both | ||
13 | | of them can be denormalized. The special cases of zeros, NaNs, | ||
14 | | and infinities are handled elsewhere. | ||
15 | | | ||
16 | | OUTPUT | ||
17 | | ------ | ||
18 | | FREM(X,Y) or FMOD(X,Y), depending on entry point. | ||
19 | | | ||
20 | | ALGORITHM | ||
21 | | --------- | ||
22 | | | ||
23 | | Step 1. Save and strip signs of X and Y: signX := sign(X), | ||
24 | | signY := sign(Y), X := |X|, Y := |Y|, | ||
25 | | signQ := signX EOR signY. Record whether MOD or REM | ||
26 | | is requested. | ||
27 | | | ||
28 | | Step 2. Set L := expo(X)-expo(Y), k := 0, Q := 0. | ||
29 | | If (L < 0) then | ||
30 | | R := X, go to Step 4. | ||
31 | | else | ||
32 | | R := 2^(-L)X, j := L. | ||
33 | | endif | ||
34 | | | ||
35 | | Step 3. Perform MOD(X,Y) | ||
36 | | 3.1 If R = Y, go to Step 9. | ||
37 | | 3.2 If R > Y, then { R := R - Y, Q := Q + 1} | ||
38 | | 3.3 If j = 0, go to Step 4. | ||
39 | | 3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to | ||
40 | | Step 3.1. | ||
41 | | | ||
42 | | Step 4. At this point, R = X - QY = MOD(X,Y). Set | ||
43 | | Last_Subtract := false (used in Step 7 below). If | ||
44 | | MOD is requested, go to Step 6. | ||
45 | | | ||
46 | | Step 5. R = MOD(X,Y), but REM(X,Y) is requested. | ||
47 | | 5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to | ||
48 | | Step 6. | ||
49 | | 5.2 If R > Y/2, then { set Last_Subtract := true, | ||
50 | | Q := Q + 1, Y := signY*Y }. Go to Step 6. | ||
51 | | 5.3 This is the tricky case of R = Y/2. If Q is odd, | ||
52 | | then { Q := Q + 1, signX := -signX }. | ||
53 | | | ||
54 | | Step 6. R := signX*R. | ||
55 | | | ||
56 | | Step 7. If Last_Subtract = true, R := R - Y. | ||
57 | | | ||
58 | | Step 8. Return signQ, last 7 bits of Q, and R as required. | ||
59 | | | ||
60 | | Step 9. At this point, R = 2^(-j)*X - Q Y = Y. Thus, | ||
61 | | X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1), | ||
62 | | R := 0. Return signQ, last 7 bits of Q, and R. | ||
63 | | | ||
64 | | | ||
65 | |||
66 | | Copyright (C) Motorola, Inc. 1990 | ||
67 | | All Rights Reserved | ||
68 | | | ||
69 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
70 | | The copyright notice above does not evidence any | ||
71 | | actual or intended publication of such source code. | ||
72 | |||
73 | SREM_MOD: |idnt 2,1 | Motorola 040 Floating Point Software Package | ||
74 | |||
75 | |section 8 | ||
76 | |||
77 | #include "fpsp.h" | ||
78 | |||
79 | .set Mod_Flag,L_SCR3 | ||
80 | .set SignY,FP_SCR3+4 | ||
81 | .set SignX,FP_SCR3+8 | ||
82 | .set SignQ,FP_SCR3+12 | ||
83 | .set Sc_Flag,FP_SCR4 | ||
84 | |||
85 | .set Y,FP_SCR1 | ||
86 | .set Y_Hi,Y+4 | ||
87 | .set Y_Lo,Y+8 | ||
88 | |||
89 | .set R,FP_SCR2 | ||
90 | .set R_Hi,R+4 | ||
91 | .set R_Lo,R+8 | ||
92 | |||
93 | |||
94 | Scale: .long 0x00010000,0x80000000,0x00000000,0x00000000 | ||
95 | |||
96 | |xref t_avoid_unsupp | ||
97 | |||
98 | .global smod | ||
99 | smod: | ||
100 | |||
101 | movel #0,Mod_Flag(%a6) | ||
102 | bras Mod_Rem | ||
103 | |||
104 | .global srem | ||
105 | srem: | ||
106 | |||
107 | movel #1,Mod_Flag(%a6) | ||
108 | |||
109 | Mod_Rem: | ||
110 | |..Save sign of X and Y | ||
111 | moveml %d2-%d7,-(%a7) | ...save data registers | ||
112 | movew (%a0),%d3 | ||
113 | movew %d3,SignY(%a6) | ||
114 | andil #0x00007FFF,%d3 | ...Y := |Y| | ||
115 | |||
116 | | | ||
117 | movel 4(%a0),%d4 | ||
118 | movel 8(%a0),%d5 | ...(D3,D4,D5) is |Y| | ||
119 | |||
120 | tstl %d3 | ||
121 | bnes Y_Normal | ||
122 | |||
123 | movel #0x00003FFE,%d3 | ...$3FFD + 1 | ||
124 | tstl %d4 | ||
125 | bnes HiY_not0 | ||
126 | |||
127 | HiY_0: | ||
128 | movel %d5,%d4 | ||
129 | clrl %d5 | ||
130 | subil #32,%d3 | ||
131 | clrl %d6 | ||
132 | bfffo %d4{#0:#32},%d6 | ||
133 | lsll %d6,%d4 | ||
134 | subl %d6,%d3 | ...(D3,D4,D5) is normalized | ||
135 | | ...with bias $7FFD | ||
136 | bras Chk_X | ||
137 | |||
138 | HiY_not0: | ||
139 | clrl %d6 | ||
140 | bfffo %d4{#0:#32},%d6 | ||
141 | subl %d6,%d3 | ||
142 | lsll %d6,%d4 | ||
143 | movel %d5,%d7 | ...a copy of D5 | ||
144 | lsll %d6,%d5 | ||
145 | negl %d6 | ||
146 | addil #32,%d6 | ||
147 | lsrl %d6,%d7 | ||
148 | orl %d7,%d4 | ...(D3,D4,D5) normalized | ||
149 | | ...with bias $7FFD | ||
150 | bras Chk_X | ||
151 | |||
152 | Y_Normal: | ||
153 | addil #0x00003FFE,%d3 | ...(D3,D4,D5) normalized | ||
154 | | ...with bias $7FFD | ||
155 | |||
156 | Chk_X: | ||
157 | movew -12(%a0),%d0 | ||
158 | movew %d0,SignX(%a6) | ||
159 | movew SignY(%a6),%d1 | ||
160 | eorl %d0,%d1 | ||
161 | andil #0x00008000,%d1 | ||
162 | movew %d1,SignQ(%a6) | ...sign(Q) obtained | ||
163 | andil #0x00007FFF,%d0 | ||
164 | movel -8(%a0),%d1 | ||
165 | movel -4(%a0),%d2 | ...(D0,D1,D2) is |X| | ||
166 | tstl %d0 | ||
167 | bnes X_Normal | ||
168 | movel #0x00003FFE,%d0 | ||
169 | tstl %d1 | ||
170 | bnes HiX_not0 | ||
171 | |||
172 | HiX_0: | ||
173 | movel %d2,%d1 | ||
174 | clrl %d2 | ||
175 | subil #32,%d0 | ||
176 | clrl %d6 | ||
177 | bfffo %d1{#0:#32},%d6 | ||
178 | lsll %d6,%d1 | ||
179 | subl %d6,%d0 | ...(D0,D1,D2) is normalized | ||
180 | | ...with bias $7FFD | ||
181 | bras Init | ||
182 | |||
183 | HiX_not0: | ||
184 | clrl %d6 | ||
185 | bfffo %d1{#0:#32},%d6 | ||
186 | subl %d6,%d0 | ||
187 | lsll %d6,%d1 | ||
188 | movel %d2,%d7 | ...a copy of D2 | ||
189 | lsll %d6,%d2 | ||
190 | negl %d6 | ||
191 | addil #32,%d6 | ||
192 | lsrl %d6,%d7 | ||
193 | orl %d7,%d1 | ...(D0,D1,D2) normalized | ||
194 | | ...with bias $7FFD | ||
195 | bras Init | ||
196 | |||
197 | X_Normal: | ||
198 | addil #0x00003FFE,%d0 | ...(D0,D1,D2) normalized | ||
199 | | ...with bias $7FFD | ||
200 | |||
201 | Init: | ||
202 | | | ||
203 | movel %d3,L_SCR1(%a6) | ...save biased expo(Y) | ||
204 | movel %d0,L_SCR2(%a6) |save d0 | ||
205 | subl %d3,%d0 | ...L := expo(X)-expo(Y) | ||
206 | | Move.L D0,L ...D0 is j | ||
207 | clrl %d6 | ...D6 := carry <- 0 | ||
208 | clrl %d3 | ...D3 is Q | ||
209 | moveal #0,%a1 | ...A1 is k; j+k=L, Q=0 | ||
210 | |||
211 | |..(Carry,D1,D2) is R | ||
212 | tstl %d0 | ||
213 | bges Mod_Loop | ||
214 | |||
215 | |..expo(X) < expo(Y). Thus X = mod(X,Y) | ||
216 | | | ||
217 | movel L_SCR2(%a6),%d0 |restore d0 | ||
218 | bra Get_Mod | ||
219 | |||
220 | |..At this point R = 2^(-L)X; Q = 0; k = 0; and k+j = L | ||
221 | |||
222 | |||
223 | Mod_Loop: | ||
224 | tstl %d6 | ...test carry bit | ||
225 | bgts R_GT_Y | ||
226 | |||
227 | |..At this point carry = 0, R = (D1,D2), Y = (D4,D5) | ||
228 | cmpl %d4,%d1 | ...compare hi(R) and hi(Y) | ||
229 | bnes R_NE_Y | ||
230 | cmpl %d5,%d2 | ...compare lo(R) and lo(Y) | ||
231 | bnes R_NE_Y | ||
232 | |||
233 | |..At this point, R = Y | ||
234 | bra Rem_is_0 | ||
235 | |||
236 | R_NE_Y: | ||
237 | |..use the borrow of the previous compare | ||
238 | bcss R_LT_Y | ...borrow is set iff R < Y | ||
239 | |||
240 | R_GT_Y: | ||
241 | |..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0 | ||
242 | |..and Y < (D1,D2) < 2Y. Either way, perform R - Y | ||
243 | subl %d5,%d2 | ...lo(R) - lo(Y) | ||
244 | subxl %d4,%d1 | ...hi(R) - hi(Y) | ||
245 | clrl %d6 | ...clear carry | ||
246 | addql #1,%d3 | ...Q := Q + 1 | ||
247 | |||
248 | R_LT_Y: | ||
249 | |..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0. | ||
250 | tstl %d0 | ...see if j = 0. | ||
251 | beqs PostLoop | ||
252 | |||
253 | addl %d3,%d3 | ...Q := 2Q | ||
254 | addl %d2,%d2 | ...lo(R) = 2lo(R) | ||
255 | roxll #1,%d1 | ...hi(R) = 2hi(R) + carry | ||
256 | scs %d6 | ...set Carry if 2(R) overflows | ||
257 | addql #1,%a1 | ...k := k+1 | ||
258 | subql #1,%d0 | ...j := j - 1 | ||
259 | |..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y. | ||
260 | |||
261 | bras Mod_Loop | ||
262 | |||
263 | PostLoop: | ||
264 | |..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y. | ||
265 | |||
266 | |..normalize R. | ||
267 | movel L_SCR1(%a6),%d0 | ...new biased expo of R | ||
268 | tstl %d1 | ||
269 | bnes HiR_not0 | ||
270 | |||
271 | HiR_0: | ||
272 | movel %d2,%d1 | ||
273 | clrl %d2 | ||
274 | subil #32,%d0 | ||
275 | clrl %d6 | ||
276 | bfffo %d1{#0:#32},%d6 | ||
277 | lsll %d6,%d1 | ||
278 | subl %d6,%d0 | ...(D0,D1,D2) is normalized | ||
279 | | ...with bias $7FFD | ||
280 | bras Get_Mod | ||
281 | |||
282 | HiR_not0: | ||
283 | clrl %d6 | ||
284 | bfffo %d1{#0:#32},%d6 | ||
285 | bmis Get_Mod | ...already normalized | ||
286 | subl %d6,%d0 | ||
287 | lsll %d6,%d1 | ||
288 | movel %d2,%d7 | ...a copy of D2 | ||
289 | lsll %d6,%d2 | ||
290 | negl %d6 | ||
291 | addil #32,%d6 | ||
292 | lsrl %d6,%d7 | ||
293 | orl %d7,%d1 | ...(D0,D1,D2) normalized | ||
294 | |||
295 | | | ||
296 | Get_Mod: | ||
297 | cmpil #0x000041FE,%d0 | ||
298 | bges No_Scale | ||
299 | Do_Scale: | ||
300 | movew %d0,R(%a6) | ||
301 | clrw R+2(%a6) | ||
302 | movel %d1,R_Hi(%a6) | ||
303 | movel %d2,R_Lo(%a6) | ||
304 | movel L_SCR1(%a6),%d6 | ||
305 | movew %d6,Y(%a6) | ||
306 | clrw Y+2(%a6) | ||
307 | movel %d4,Y_Hi(%a6) | ||
308 | movel %d5,Y_Lo(%a6) | ||
309 | fmovex R(%a6),%fp0 | ...no exception | ||
310 | movel #1,Sc_Flag(%a6) | ||
311 | bras ModOrRem | ||
312 | No_Scale: | ||
313 | movel %d1,R_Hi(%a6) | ||
314 | movel %d2,R_Lo(%a6) | ||
315 | subil #0x3FFE,%d0 | ||
316 | movew %d0,R(%a6) | ||
317 | clrw R+2(%a6) | ||
318 | movel L_SCR1(%a6),%d6 | ||
319 | subil #0x3FFE,%d6 | ||
320 | movel %d6,L_SCR1(%a6) | ||
321 | fmovex R(%a6),%fp0 | ||
322 | movew %d6,Y(%a6) | ||
323 | movel %d4,Y_Hi(%a6) | ||
324 | movel %d5,Y_Lo(%a6) | ||
325 | movel #0,Sc_Flag(%a6) | ||
326 | |||
327 | | | ||
328 | |||
329 | |||
330 | ModOrRem: | ||
331 | movel Mod_Flag(%a6),%d6 | ||
332 | beqs Fix_Sign | ||
333 | |||
334 | movel L_SCR1(%a6),%d6 | ...new biased expo(Y) | ||
335 | subql #1,%d6 | ...biased expo(Y/2) | ||
336 | cmpl %d6,%d0 | ||
337 | blts Fix_Sign | ||
338 | bgts Last_Sub | ||
339 | |||
340 | cmpl %d4,%d1 | ||
341 | bnes Not_EQ | ||
342 | cmpl %d5,%d2 | ||
343 | bnes Not_EQ | ||
344 | bra Tie_Case | ||
345 | |||
346 | Not_EQ: | ||
347 | bcss Fix_Sign | ||
348 | |||
349 | Last_Sub: | ||
350 | | | ||
351 | fsubx Y(%a6),%fp0 | ...no exceptions | ||
352 | addql #1,%d3 | ...Q := Q + 1 | ||
353 | |||
354 | | | ||
355 | |||
356 | Fix_Sign: | ||
357 | |..Get sign of X | ||
358 | movew SignX(%a6),%d6 | ||
359 | bges Get_Q | ||
360 | fnegx %fp0 | ||
361 | |||
362 | |..Get Q | ||
363 | | | ||
364 | Get_Q: | ||
365 | clrl %d6 | ||
366 | movew SignQ(%a6),%d6 | ...D6 is sign(Q) | ||
367 | movel #8,%d7 | ||
368 | lsrl %d7,%d6 | ||
369 | andil #0x0000007F,%d3 | ...7 bits of Q | ||
370 | orl %d6,%d3 | ...sign and bits of Q | ||
371 | swap %d3 | ||
372 | fmovel %fpsr,%d6 | ||
373 | andil #0xFF00FFFF,%d6 | ||
374 | orl %d3,%d6 | ||
375 | fmovel %d6,%fpsr | ...put Q in fpsr | ||
376 | |||
377 | | | ||
378 | Restore: | ||
379 | moveml (%a7)+,%d2-%d7 | ||
380 | fmovel USER_FPCR(%a6),%fpcr | ||
381 | movel Sc_Flag(%a6),%d0 | ||
382 | beqs Finish | ||
383 | fmulx Scale(%pc),%fp0 | ...may cause underflow | ||
384 | bra t_avoid_unsupp |check for denorm as a | ||
385 | | ;result of the scaling | ||
386 | |||
387 | Finish: | ||
388 | fmovex %fp0,%fp0 |capture exceptions & round | ||
389 | rts | ||
390 | |||
391 | Rem_is_0: | ||
392 | |..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1) | ||
393 | addql #1,%d3 | ||
394 | cmpil #8,%d0 | ...D0 is j | ||
395 | bges Q_Big | ||
396 | |||
397 | lsll %d0,%d3 | ||
398 | bras Set_R_0 | ||
399 | |||
400 | Q_Big: | ||
401 | clrl %d3 | ||
402 | |||
403 | Set_R_0: | ||
404 | fmoves #0x00000000,%fp0 | ||
405 | movel #0,Sc_Flag(%a6) | ||
406 | bra Fix_Sign | ||
407 | |||
408 | Tie_Case: | ||
409 | |..Check parity of Q | ||
410 | movel %d3,%d6 | ||
411 | andil #0x00000001,%d6 | ||
412 | tstl %d6 | ||
413 | beq Fix_Sign | ...Q is even | ||
414 | |||
415 | |..Q is odd, Q := Q + 1, signX := -signX | ||
416 | addql #1,%d3 | ||
417 | movew SignX(%a6),%d6 | ||
418 | eoril #0x00008000,%d6 | ||
419 | movew %d6,SignX(%a6) | ||
420 | bra Fix_Sign | ||
421 | |||
422 | |end | ||
diff --git a/arch/m68k/fpsp040/ssin.S b/arch/m68k/fpsp040/ssin.S new file mode 100644 index 000000000000..043c91cdd657 --- /dev/null +++ b/arch/m68k/fpsp040/ssin.S | |||
@@ -0,0 +1,746 @@ | |||
1 | | | ||
2 | | ssin.sa 3.3 7/29/91 | ||
3 | | | ||
4 | | The entry point sSIN computes the sine of an input argument | ||
5 | | sCOS computes the cosine, and sSINCOS computes both. The | ||
6 | | corresponding entry points with a "d" computes the same | ||
7 | | corresponding function values for denormalized inputs. | ||
8 | | | ||
9 | | Input: Double-extended number X in location pointed to | ||
10 | | by address register a0. | ||
11 | | | ||
12 | | Output: The function value sin(X) or cos(X) returned in Fp0 if SIN or | ||
13 | | COS is requested. Otherwise, for SINCOS, sin(X) is returned | ||
14 | | in Fp0, and cos(X) is returned in Fp1. | ||
15 | | | ||
16 | | Modifies: Fp0 for SIN or COS; both Fp0 and Fp1 for SINCOS. | ||
17 | | | ||
18 | | Accuracy and Monotonicity: The returned result is within 1 ulp in | ||
19 | | 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the | ||
20 | | result is subsequently rounded to double precision. The | ||
21 | | result is provably monotonic in double precision. | ||
22 | | | ||
23 | | Speed: The programs sSIN and sCOS take approximately 150 cycles for | ||
24 | | input argument X such that |X| < 15Pi, which is the usual | ||
25 | | situation. The speed for sSINCOS is approximately 190 cycles. | ||
26 | | | ||
27 | | Algorithm: | ||
28 | | | ||
29 | | SIN and COS: | ||
30 | | 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1. | ||
31 | | | ||
32 | | 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7. | ||
33 | | | ||
34 | | 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let | ||
35 | | k = N mod 4, so in particular, k = 0,1,2,or 3. Overwrite | ||
36 | | k by k := k + AdjN. | ||
37 | | | ||
38 | | 4. If k is even, go to 6. | ||
39 | | | ||
40 | | 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. Return sgn*cos(r) | ||
41 | | where cos(r) is approximated by an even polynomial in r, | ||
42 | | 1 + r*r*(B1+s*(B2+ ... + s*B8)), s = r*r. | ||
43 | | Exit. | ||
44 | | | ||
45 | | 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r) | ||
46 | | where sin(r) is approximated by an odd polynomial in r | ||
47 | | r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r. | ||
48 | | Exit. | ||
49 | | | ||
50 | | 7. If |X| > 1, go to 9. | ||
51 | | | ||
52 | | 8. (|X|<2**(-40)) If SIN is invoked, return X; otherwise return 1. | ||
53 | | | ||
54 | | 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 3. | ||
55 | | | ||
56 | | SINCOS: | ||
57 | | 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. | ||
58 | | | ||
59 | | 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let | ||
60 | | k = N mod 4, so in particular, k = 0,1,2,or 3. | ||
61 | | | ||
62 | | 3. If k is even, go to 5. | ||
63 | | | ||
64 | | 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), i.e. | ||
65 | | j1 exclusive or with the l.s.b. of k. | ||
66 | | sgn1 := (-1)**j1, sgn2 := (-1)**j2. | ||
67 | | SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where | ||
68 | | sin(r) and cos(r) are computed as odd and even polynomials | ||
69 | | in r, respectively. Exit | ||
70 | | | ||
71 | | 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1. | ||
72 | | SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where | ||
73 | | sin(r) and cos(r) are computed as odd and even polynomials | ||
74 | | in r, respectively. Exit | ||
75 | | | ||
76 | | 6. If |X| > 1, go to 8. | ||
77 | | | ||
78 | | 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit. | ||
79 | | | ||
80 | | 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2. | ||
81 | | | ||
82 | |||
83 | | Copyright (C) Motorola, Inc. 1990 | ||
84 | | All Rights Reserved | ||
85 | | | ||
86 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
87 | | The copyright notice above does not evidence any | ||
88 | | actual or intended publication of such source code. | ||
89 | |||
90 | |SSIN idnt 2,1 | Motorola 040 Floating Point Software Package | ||
91 | |||
92 | |section 8 | ||
93 | |||
94 | #include "fpsp.h" | ||
95 | |||
96 | BOUNDS1: .long 0x3FD78000,0x4004BC7E | ||
97 | TWOBYPI: .long 0x3FE45F30,0x6DC9C883 | ||
98 | |||
99 | SINA7: .long 0xBD6AAA77,0xCCC994F5 | ||
100 | SINA6: .long 0x3DE61209,0x7AAE8DA1 | ||
101 | |||
102 | SINA5: .long 0xBE5AE645,0x2A118AE4 | ||
103 | SINA4: .long 0x3EC71DE3,0xA5341531 | ||
104 | |||
105 | SINA3: .long 0xBF2A01A0,0x1A018B59,0x00000000,0x00000000 | ||
106 | |||
107 | SINA2: .long 0x3FF80000,0x88888888,0x888859AF,0x00000000 | ||
108 | |||
109 | SINA1: .long 0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000 | ||
110 | |||
111 | COSB8: .long 0x3D2AC4D0,0xD6011EE3 | ||
112 | COSB7: .long 0xBDA9396F,0x9F45AC19 | ||
113 | |||
114 | COSB6: .long 0x3E21EED9,0x0612C972 | ||
115 | COSB5: .long 0xBE927E4F,0xB79D9FCF | ||
116 | |||
117 | COSB4: .long 0x3EFA01A0,0x1A01D423,0x00000000,0x00000000 | ||
118 | |||
119 | COSB3: .long 0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000 | ||
120 | |||
121 | COSB2: .long 0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E | ||
122 | COSB1: .long 0xBF000000 | ||
123 | |||
124 | INVTWOPI: .long 0x3FFC0000,0xA2F9836E,0x4E44152A | ||
125 | |||
126 | TWOPI1: .long 0x40010000,0xC90FDAA2,0x00000000,0x00000000 | ||
127 | TWOPI2: .long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000 | ||
128 | |||
129 | |xref PITBL | ||
130 | |||
131 | .set INARG,FP_SCR4 | ||
132 | |||
133 | .set X,FP_SCR5 | ||
134 | .set XDCARE,X+2 | ||
135 | .set XFRAC,X+4 | ||
136 | |||
137 | .set RPRIME,FP_SCR1 | ||
138 | .set SPRIME,FP_SCR2 | ||
139 | |||
140 | .set POSNEG1,L_SCR1 | ||
141 | .set TWOTO63,L_SCR1 | ||
142 | |||
143 | .set ENDFLAG,L_SCR2 | ||
144 | .set N,L_SCR2 | ||
145 | |||
146 | .set ADJN,L_SCR3 | ||
147 | |||
148 | | xref t_frcinx | ||
149 | |xref t_extdnrm | ||
150 | |xref sto_cos | ||
151 | |||
152 | .global ssind | ||
153 | ssind: | ||
154 | |--SIN(X) = X FOR DENORMALIZED X | ||
155 | bra t_extdnrm | ||
156 | |||
157 | .global scosd | ||
158 | scosd: | ||
159 | |--COS(X) = 1 FOR DENORMALIZED X | ||
160 | |||
161 | fmoves #0x3F800000,%fp0 | ||
162 | | | ||
163 | | 9D25B Fix: Sometimes the previous fmove.s sets fpsr bits | ||
164 | | | ||
165 | fmovel #0,%fpsr | ||
166 | | | ||
167 | bra t_frcinx | ||
168 | |||
169 | .global ssin | ||
170 | ssin: | ||
171 | |--SET ADJN TO 0 | ||
172 | movel #0,ADJN(%a6) | ||
173 | bras SINBGN | ||
174 | |||
175 | .global scos | ||
176 | scos: | ||
177 | |--SET ADJN TO 1 | ||
178 | movel #1,ADJN(%a6) | ||
179 | |||
180 | SINBGN: | ||
181 | |--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE | ||
182 | |||
183 | fmovex (%a0),%fp0 | ...LOAD INPUT | ||
184 | |||
185 | movel (%a0),%d0 | ||
186 | movew 4(%a0),%d0 | ||
187 | fmovex %fp0,X(%a6) | ||
188 | andil #0x7FFFFFFF,%d0 | ...COMPACTIFY X | ||
189 | |||
190 | cmpil #0x3FD78000,%d0 | ...|X| >= 2**(-40)? | ||
191 | bges SOK1 | ||
192 | bra SINSM | ||
193 | |||
194 | SOK1: | ||
195 | cmpil #0x4004BC7E,%d0 | ...|X| < 15 PI? | ||
196 | blts SINMAIN | ||
197 | bra REDUCEX | ||
198 | |||
199 | SINMAIN: | ||
200 | |--THIS IS THE USUAL CASE, |X| <= 15 PI. | ||
201 | |--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. | ||
202 | fmovex %fp0,%fp1 | ||
203 | fmuld TWOBYPI,%fp1 | ...X*2/PI | ||
204 | |||
205 | |--HIDE THE NEXT THREE INSTRUCTIONS | ||
206 | lea PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32 | ||
207 | |||
208 | |||
209 | |--FP1 IS NOW READY | ||
210 | fmovel %fp1,N(%a6) | ...CONVERT TO INTEGER | ||
211 | |||
212 | movel N(%a6),%d0 | ||
213 | asll #4,%d0 | ||
214 | addal %d0,%a1 | ...A1 IS THE ADDRESS OF N*PIBY2 | ||
215 | | ...WHICH IS IN TWO PIECES Y1 & Y2 | ||
216 | |||
217 | fsubx (%a1)+,%fp0 | ...X-Y1 | ||
218 | |--HIDE THE NEXT ONE | ||
219 | fsubs (%a1),%fp0 | ...FP0 IS R = (X-Y1)-Y2 | ||
220 | |||
221 | SINCONT: | ||
222 | |--continuation from REDUCEX | ||
223 | |||
224 | |--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED | ||
225 | movel N(%a6),%d0 | ||
226 | addl ADJN(%a6),%d0 | ...SEE IF D0 IS ODD OR EVEN | ||
227 | rorl #1,%d0 | ...D0 WAS ODD IFF D0 IS NEGATIVE | ||
228 | cmpil #0,%d0 | ||
229 | blt COSPOLY | ||
230 | |||
231 | SINPOLY: | ||
232 | |--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J. | ||
233 | |--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY | ||
234 | |--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE | ||
235 | |--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS | ||
236 | |--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))]) | ||
237 | |--WHERE T=S*S. | ||
238 | |--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION | ||
239 | |--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT. | ||
240 | fmovex %fp0,X(%a6) | ...X IS R | ||
241 | fmulx %fp0,%fp0 | ...FP0 IS S | ||
242 | |---HIDE THE NEXT TWO WHILE WAITING FOR FP0 | ||
243 | fmoved SINA7,%fp3 | ||
244 | fmoved SINA6,%fp2 | ||
245 | |--FP0 IS NOW READY | ||
246 | fmovex %fp0,%fp1 | ||
247 | fmulx %fp1,%fp1 | ...FP1 IS T | ||
248 | |--HIDE THE NEXT TWO WHILE WAITING FOR FP1 | ||
249 | |||
250 | rorl #1,%d0 | ||
251 | andil #0x80000000,%d0 | ||
252 | | ...LEAST SIG. BIT OF D0 IN SIGN POSITION | ||
253 | eorl %d0,X(%a6) | ...X IS NOW R'= SGN*R | ||
254 | |||
255 | fmulx %fp1,%fp3 | ...TA7 | ||
256 | fmulx %fp1,%fp2 | ...TA6 | ||
257 | |||
258 | faddd SINA5,%fp3 | ...A5+TA7 | ||
259 | faddd SINA4,%fp2 | ...A4+TA6 | ||
260 | |||
261 | fmulx %fp1,%fp3 | ...T(A5+TA7) | ||
262 | fmulx %fp1,%fp2 | ...T(A4+TA6) | ||
263 | |||
264 | faddd SINA3,%fp3 | ...A3+T(A5+TA7) | ||
265 | faddx SINA2,%fp2 | ...A2+T(A4+TA6) | ||
266 | |||
267 | fmulx %fp3,%fp1 | ...T(A3+T(A5+TA7)) | ||
268 | |||
269 | fmulx %fp0,%fp2 | ...S(A2+T(A4+TA6)) | ||
270 | faddx SINA1,%fp1 | ...A1+T(A3+T(A5+TA7)) | ||
271 | fmulx X(%a6),%fp0 | ...R'*S | ||
272 | |||
273 | faddx %fp2,%fp1 | ...[A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))] | ||
274 | |--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING | ||
275 | |--FP2 RELEASED, RESTORE NOW AND TAKE FULL ADVANTAGE OF HIDING | ||
276 | |||
277 | |||
278 | fmulx %fp1,%fp0 | ...SIN(R')-R' | ||
279 | |--FP1 RELEASED. | ||
280 | |||
281 | fmovel %d1,%FPCR |restore users exceptions | ||
282 | faddx X(%a6),%fp0 |last inst - possible exception set | ||
283 | bra t_frcinx | ||
284 | |||
285 | |||
286 | COSPOLY: | ||
287 | |--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J. | ||
288 | |--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY | ||
289 | |--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE | ||
290 | |--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS | ||
291 | |--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))]) | ||
292 | |--WHERE T=S*S. | ||
293 | |--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION | ||
294 | |--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2 | ||
295 | |--AND IS THEREFORE STORED AS SINGLE PRECISION. | ||
296 | |||
297 | fmulx %fp0,%fp0 | ...FP0 IS S | ||
298 | |---HIDE THE NEXT TWO WHILE WAITING FOR FP0 | ||
299 | fmoved COSB8,%fp2 | ||
300 | fmoved COSB7,%fp3 | ||
301 | |--FP0 IS NOW READY | ||
302 | fmovex %fp0,%fp1 | ||
303 | fmulx %fp1,%fp1 | ...FP1 IS T | ||
304 | |--HIDE THE NEXT TWO WHILE WAITING FOR FP1 | ||
305 | fmovex %fp0,X(%a6) | ...X IS S | ||
306 | rorl #1,%d0 | ||
307 | andil #0x80000000,%d0 | ||
308 | | ...LEAST SIG. BIT OF D0 IN SIGN POSITION | ||
309 | |||
310 | fmulx %fp1,%fp2 | ...TB8 | ||
311 | |--HIDE THE NEXT TWO WHILE WAITING FOR THE XU | ||
312 | eorl %d0,X(%a6) | ...X IS NOW S'= SGN*S | ||
313 | andil #0x80000000,%d0 | ||
314 | |||
315 | fmulx %fp1,%fp3 | ...TB7 | ||
316 | |--HIDE THE NEXT TWO WHILE WAITING FOR THE XU | ||
317 | oril #0x3F800000,%d0 | ...D0 IS SGN IN SINGLE | ||
318 | movel %d0,POSNEG1(%a6) | ||
319 | |||
320 | faddd COSB6,%fp2 | ...B6+TB8 | ||
321 | faddd COSB5,%fp3 | ...B5+TB7 | ||
322 | |||
323 | fmulx %fp1,%fp2 | ...T(B6+TB8) | ||
324 | fmulx %fp1,%fp3 | ...T(B5+TB7) | ||
325 | |||
326 | faddd COSB4,%fp2 | ...B4+T(B6+TB8) | ||
327 | faddx COSB3,%fp3 | ...B3+T(B5+TB7) | ||
328 | |||
329 | fmulx %fp1,%fp2 | ...T(B4+T(B6+TB8)) | ||
330 | fmulx %fp3,%fp1 | ...T(B3+T(B5+TB7)) | ||
331 | |||
332 | faddx COSB2,%fp2 | ...B2+T(B4+T(B6+TB8)) | ||
333 | fadds COSB1,%fp1 | ...B1+T(B3+T(B5+TB7)) | ||
334 | |||
335 | fmulx %fp2,%fp0 | ...S(B2+T(B4+T(B6+TB8))) | ||
336 | |--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING | ||
337 | |--FP2 RELEASED. | ||
338 | |||
339 | |||
340 | faddx %fp1,%fp0 | ||
341 | |--FP1 RELEASED | ||
342 | |||
343 | fmulx X(%a6),%fp0 | ||
344 | |||
345 | fmovel %d1,%FPCR |restore users exceptions | ||
346 | fadds POSNEG1(%a6),%fp0 |last inst - possible exception set | ||
347 | bra t_frcinx | ||
348 | |||
349 | |||
350 | SINBORS: | ||
351 | |--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION. | ||
352 | |--IF |X| < 2**(-40), RETURN X OR 1. | ||
353 | cmpil #0x3FFF8000,%d0 | ||
354 | bgts REDUCEX | ||
355 | |||
356 | |||
357 | SINSM: | ||
358 | movel ADJN(%a6),%d0 | ||
359 | cmpil #0,%d0 | ||
360 | bgts COSTINY | ||
361 | |||
362 | SINTINY: | ||
363 | movew #0x0000,XDCARE(%a6) | ...JUST IN CASE | ||
364 | fmovel %d1,%FPCR |restore users exceptions | ||
365 | fmovex X(%a6),%fp0 |last inst - possible exception set | ||
366 | bra t_frcinx | ||
367 | |||
368 | |||
369 | COSTINY: | ||
370 | fmoves #0x3F800000,%fp0 | ||
371 | |||
372 | fmovel %d1,%FPCR |restore users exceptions | ||
373 | fsubs #0x00800000,%fp0 |last inst - possible exception set | ||
374 | bra t_frcinx | ||
375 | |||
376 | |||
377 | REDUCEX: | ||
378 | |--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW. | ||
379 | |--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING | ||
380 | |--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE. | ||
381 | |||
382 | fmovemx %fp2-%fp5,-(%a7) | ...save FP2 through FP5 | ||
383 | movel %d2,-(%a7) | ||
384 | fmoves #0x00000000,%fp1 | ||
385 | |--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that | ||
386 | |--there is a danger of unwanted overflow in first LOOP iteration. In this | ||
387 | |--case, reduce argument by one remainder step to make subsequent reduction | ||
388 | |--safe. | ||
389 | cmpil #0x7ffeffff,%d0 |is argument dangerously large? | ||
390 | bnes LOOP | ||
391 | movel #0x7ffe0000,FP_SCR2(%a6) |yes | ||
392 | | ;create 2**16383*PI/2 | ||
393 | movel #0xc90fdaa2,FP_SCR2+4(%a6) | ||
394 | clrl FP_SCR2+8(%a6) | ||
395 | ftstx %fp0 |test sign of argument | ||
396 | movel #0x7fdc0000,FP_SCR3(%a6) |create low half of 2**16383* | ||
397 | | ;PI/2 at FP_SCR3 | ||
398 | movel #0x85a308d3,FP_SCR3+4(%a6) | ||
399 | clrl FP_SCR3+8(%a6) | ||
400 | fblt red_neg | ||
401 | orw #0x8000,FP_SCR2(%a6) |positive arg | ||
402 | orw #0x8000,FP_SCR3(%a6) | ||
403 | red_neg: | ||
404 | faddx FP_SCR2(%a6),%fp0 |high part of reduction is exact | ||
405 | fmovex %fp0,%fp1 |save high result in fp1 | ||
406 | faddx FP_SCR3(%a6),%fp0 |low part of reduction | ||
407 | fsubx %fp0,%fp1 |determine low component of result | ||
408 | faddx FP_SCR3(%a6),%fp1 |fp0/fp1 are reduced argument. | ||
409 | |||
410 | |--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4. | ||
411 | |--integer quotient will be stored in N | ||
412 | |--Intermediate remainder is 66-bit long; (R,r) in (FP0,FP1) | ||
413 | |||
414 | LOOP: | ||
415 | fmovex %fp0,INARG(%a6) | ...+-2**K * F, 1 <= F < 2 | ||
416 | movew INARG(%a6),%d0 | ||
417 | movel %d0,%a1 | ...save a copy of D0 | ||
418 | andil #0x00007FFF,%d0 | ||
419 | subil #0x00003FFF,%d0 | ...D0 IS K | ||
420 | cmpil #28,%d0 | ||
421 | bles LASTLOOP | ||
422 | CONTLOOP: | ||
423 | subil #27,%d0 | ...D0 IS L := K-27 | ||
424 | movel #0,ENDFLAG(%a6) | ||
425 | bras WORK | ||
426 | LASTLOOP: | ||
427 | clrl %d0 | ...D0 IS L := 0 | ||
428 | movel #1,ENDFLAG(%a6) | ||
429 | |||
430 | WORK: | ||
431 | |--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN | ||
432 | |--THAT INT( X * (2/PI) / 2**(L) ) < 2**29. | ||
433 | |||
434 | |--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63), | ||
435 | |--2**L * (PIby2_1), 2**L * (PIby2_2) | ||
436 | |||
437 | movel #0x00003FFE,%d2 | ...BIASED EXPO OF 2/PI | ||
438 | subl %d0,%d2 | ...BIASED EXPO OF 2**(-L)*(2/PI) | ||
439 | |||
440 | movel #0xA2F9836E,FP_SCR1+4(%a6) | ||
441 | movel #0x4E44152A,FP_SCR1+8(%a6) | ||
442 | movew %d2,FP_SCR1(%a6) | ...FP_SCR1 is 2**(-L)*(2/PI) | ||
443 | |||
444 | fmovex %fp0,%fp2 | ||
445 | fmulx FP_SCR1(%a6),%fp2 | ||
446 | |--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN | ||
447 | |--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N | ||
448 | |--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT | ||
449 | |--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE | ||
450 | |--US THE DESIRED VALUE IN FLOATING POINT. | ||
451 | |||
452 | |--HIDE SIX CYCLES OF INSTRUCTION | ||
453 | movel %a1,%d2 | ||
454 | swap %d2 | ||
455 | andil #0x80000000,%d2 | ||
456 | oril #0x5F000000,%d2 | ...D2 IS SIGN(INARG)*2**63 IN SGL | ||
457 | movel %d2,TWOTO63(%a6) | ||
458 | |||
459 | movel %d0,%d2 | ||
460 | addil #0x00003FFF,%d2 | ...BIASED EXPO OF 2**L * (PI/2) | ||
461 | |||
462 | |--FP2 IS READY | ||
463 | fadds TWOTO63(%a6),%fp2 | ...THE FRACTIONAL PART OF FP1 IS ROUNDED | ||
464 | |||
465 | |--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1 and 2**(L)*Piby2_2 | ||
466 | movew %d2,FP_SCR2(%a6) | ||
467 | clrw FP_SCR2+2(%a6) | ||
468 | movel #0xC90FDAA2,FP_SCR2+4(%a6) | ||
469 | clrl FP_SCR2+8(%a6) | ...FP_SCR2 is 2**(L) * Piby2_1 | ||
470 | |||
471 | |--FP2 IS READY | ||
472 | fsubs TWOTO63(%a6),%fp2 | ...FP2 is N | ||
473 | |||
474 | addil #0x00003FDD,%d0 | ||
475 | movew %d0,FP_SCR3(%a6) | ||
476 | clrw FP_SCR3+2(%a6) | ||
477 | movel #0x85A308D3,FP_SCR3+4(%a6) | ||
478 | clrl FP_SCR3+8(%a6) | ...FP_SCR3 is 2**(L) * Piby2_2 | ||
479 | |||
480 | movel ENDFLAG(%a6),%d0 | ||
481 | |||
482 | |--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and | ||
483 | |--P2 = 2**(L) * Piby2_2 | ||
484 | fmovex %fp2,%fp4 | ||
485 | fmulx FP_SCR2(%a6),%fp4 | ...W = N*P1 | ||
486 | fmovex %fp2,%fp5 | ||
487 | fmulx FP_SCR3(%a6),%fp5 | ...w = N*P2 | ||
488 | fmovex %fp4,%fp3 | ||
489 | |--we want P+p = W+w but |p| <= half ulp of P | ||
490 | |--Then, we need to compute A := R-P and a := r-p | ||
491 | faddx %fp5,%fp3 | ...FP3 is P | ||
492 | fsubx %fp3,%fp4 | ...W-P | ||
493 | |||
494 | fsubx %fp3,%fp0 | ...FP0 is A := R - P | ||
495 | faddx %fp5,%fp4 | ...FP4 is p = (W-P)+w | ||
496 | |||
497 | fmovex %fp0,%fp3 | ...FP3 A | ||
498 | fsubx %fp4,%fp1 | ...FP1 is a := r - p | ||
499 | |||
500 | |--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but | ||
501 | |--|r| <= half ulp of R. | ||
502 | faddx %fp1,%fp0 | ...FP0 is R := A+a | ||
503 | |--No need to calculate r if this is the last loop | ||
504 | cmpil #0,%d0 | ||
505 | bgt RESTORE | ||
506 | |||
507 | |--Need to calculate r | ||
508 | fsubx %fp0,%fp3 | ...A-R | ||
509 | faddx %fp3,%fp1 | ...FP1 is r := (A-R)+a | ||
510 | bra LOOP | ||
511 | |||
512 | RESTORE: | ||
513 | fmovel %fp2,N(%a6) | ||
514 | movel (%a7)+,%d2 | ||
515 | fmovemx (%a7)+,%fp2-%fp5 | ||
516 | |||
517 | |||
518 | movel ADJN(%a6),%d0 | ||
519 | cmpil #4,%d0 | ||
520 | |||
521 | blt SINCONT | ||
522 | bras SCCONT | ||
523 | |||
524 | .global ssincosd | ||
525 | ssincosd: | ||
526 | |--SIN AND COS OF X FOR DENORMALIZED X | ||
527 | |||
528 | fmoves #0x3F800000,%fp1 | ||
529 | bsr sto_cos |store cosine result | ||
530 | bra t_extdnrm | ||
531 | |||
532 | .global ssincos | ||
533 | ssincos: | ||
534 | |--SET ADJN TO 4 | ||
535 | movel #4,ADJN(%a6) | ||
536 | |||
537 | fmovex (%a0),%fp0 | ...LOAD INPUT | ||
538 | |||
539 | movel (%a0),%d0 | ||
540 | movew 4(%a0),%d0 | ||
541 | fmovex %fp0,X(%a6) | ||
542 | andil #0x7FFFFFFF,%d0 | ...COMPACTIFY X | ||
543 | |||
544 | cmpil #0x3FD78000,%d0 | ...|X| >= 2**(-40)? | ||
545 | bges SCOK1 | ||
546 | bra SCSM | ||
547 | |||
548 | SCOK1: | ||
549 | cmpil #0x4004BC7E,%d0 | ...|X| < 15 PI? | ||
550 | blts SCMAIN | ||
551 | bra REDUCEX | ||
552 | |||
553 | |||
554 | SCMAIN: | ||
555 | |--THIS IS THE USUAL CASE, |X| <= 15 PI. | ||
556 | |--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. | ||
557 | fmovex %fp0,%fp1 | ||
558 | fmuld TWOBYPI,%fp1 | ...X*2/PI | ||
559 | |||
560 | |--HIDE THE NEXT THREE INSTRUCTIONS | ||
561 | lea PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32 | ||
562 | |||
563 | |||
564 | |--FP1 IS NOW READY | ||
565 | fmovel %fp1,N(%a6) | ...CONVERT TO INTEGER | ||
566 | |||
567 | movel N(%a6),%d0 | ||
568 | asll #4,%d0 | ||
569 | addal %d0,%a1 | ...ADDRESS OF N*PIBY2, IN Y1, Y2 | ||
570 | |||
571 | fsubx (%a1)+,%fp0 | ...X-Y1 | ||
572 | fsubs (%a1),%fp0 | ...FP0 IS R = (X-Y1)-Y2 | ||
573 | |||
574 | SCCONT: | ||
575 | |--continuation point from REDUCEX | ||
576 | |||
577 | |--HIDE THE NEXT TWO | ||
578 | movel N(%a6),%d0 | ||
579 | rorl #1,%d0 | ||
580 | |||
581 | cmpil #0,%d0 | ...D0 < 0 IFF N IS ODD | ||
582 | bge NEVEN | ||
583 | |||
584 | NODD: | ||
585 | |--REGISTERS SAVED SO FAR: D0, A0, FP2. | ||
586 | |||
587 | fmovex %fp0,RPRIME(%a6) | ||
588 | fmulx %fp0,%fp0 | ...FP0 IS S = R*R | ||
589 | fmoved SINA7,%fp1 | ...A7 | ||
590 | fmoved COSB8,%fp2 | ...B8 | ||
591 | fmulx %fp0,%fp1 | ...SA7 | ||
592 | movel %d2,-(%a7) | ||
593 | movel %d0,%d2 | ||
594 | fmulx %fp0,%fp2 | ...SB8 | ||
595 | rorl #1,%d2 | ||
596 | andil #0x80000000,%d2 | ||
597 | |||
598 | faddd SINA6,%fp1 | ...A6+SA7 | ||
599 | eorl %d0,%d2 | ||
600 | andil #0x80000000,%d2 | ||
601 | faddd COSB7,%fp2 | ...B7+SB8 | ||
602 | |||
603 | fmulx %fp0,%fp1 | ...S(A6+SA7) | ||
604 | eorl %d2,RPRIME(%a6) | ||
605 | movel (%a7)+,%d2 | ||
606 | fmulx %fp0,%fp2 | ...S(B7+SB8) | ||
607 | rorl #1,%d0 | ||
608 | andil #0x80000000,%d0 | ||
609 | |||
610 | faddd SINA5,%fp1 | ...A5+S(A6+SA7) | ||
611 | movel #0x3F800000,POSNEG1(%a6) | ||
612 | eorl %d0,POSNEG1(%a6) | ||
613 | faddd COSB6,%fp2 | ...B6+S(B7+SB8) | ||
614 | |||
615 | fmulx %fp0,%fp1 | ...S(A5+S(A6+SA7)) | ||
616 | fmulx %fp0,%fp2 | ...S(B6+S(B7+SB8)) | ||
617 | fmovex %fp0,SPRIME(%a6) | ||
618 | |||
619 | faddd SINA4,%fp1 | ...A4+S(A5+S(A6+SA7)) | ||
620 | eorl %d0,SPRIME(%a6) | ||
621 | faddd COSB5,%fp2 | ...B5+S(B6+S(B7+SB8)) | ||
622 | |||
623 | fmulx %fp0,%fp1 | ...S(A4+...) | ||
624 | fmulx %fp0,%fp2 | ...S(B5+...) | ||
625 | |||
626 | faddd SINA3,%fp1 | ...A3+S(A4+...) | ||
627 | faddd COSB4,%fp2 | ...B4+S(B5+...) | ||
628 | |||
629 | fmulx %fp0,%fp1 | ...S(A3+...) | ||
630 | fmulx %fp0,%fp2 | ...S(B4+...) | ||
631 | |||
632 | faddx SINA2,%fp1 | ...A2+S(A3+...) | ||
633 | faddx COSB3,%fp2 | ...B3+S(B4+...) | ||
634 | |||
635 | fmulx %fp0,%fp1 | ...S(A2+...) | ||
636 | fmulx %fp0,%fp2 | ...S(B3+...) | ||
637 | |||
638 | faddx SINA1,%fp1 | ...A1+S(A2+...) | ||
639 | faddx COSB2,%fp2 | ...B2+S(B3+...) | ||
640 | |||
641 | fmulx %fp0,%fp1 | ...S(A1+...) | ||
642 | fmulx %fp2,%fp0 | ...S(B2+...) | ||
643 | |||
644 | |||
645 | |||
646 | fmulx RPRIME(%a6),%fp1 | ...R'S(A1+...) | ||
647 | fadds COSB1,%fp0 | ...B1+S(B2...) | ||
648 | fmulx SPRIME(%a6),%fp0 | ...S'(B1+S(B2+...)) | ||
649 | |||
650 | movel %d1,-(%sp) |restore users mode & precision | ||
651 | andil #0xff,%d1 |mask off all exceptions | ||
652 | fmovel %d1,%FPCR | ||
653 | faddx RPRIME(%a6),%fp1 | ...COS(X) | ||
654 | bsr sto_cos |store cosine result | ||
655 | fmovel (%sp)+,%FPCR |restore users exceptions | ||
656 | fadds POSNEG1(%a6),%fp0 | ...SIN(X) | ||
657 | |||
658 | bra t_frcinx | ||
659 | |||
660 | |||
661 | NEVEN: | ||
662 | |--REGISTERS SAVED SO FAR: FP2. | ||
663 | |||
664 | fmovex %fp0,RPRIME(%a6) | ||
665 | fmulx %fp0,%fp0 | ...FP0 IS S = R*R | ||
666 | fmoved COSB8,%fp1 | ...B8 | ||
667 | fmoved SINA7,%fp2 | ...A7 | ||
668 | fmulx %fp0,%fp1 | ...SB8 | ||
669 | fmovex %fp0,SPRIME(%a6) | ||
670 | fmulx %fp0,%fp2 | ...SA7 | ||
671 | rorl #1,%d0 | ||
672 | andil #0x80000000,%d0 | ||
673 | faddd COSB7,%fp1 | ...B7+SB8 | ||
674 | faddd SINA6,%fp2 | ...A6+SA7 | ||
675 | eorl %d0,RPRIME(%a6) | ||
676 | eorl %d0,SPRIME(%a6) | ||
677 | fmulx %fp0,%fp1 | ...S(B7+SB8) | ||
678 | oril #0x3F800000,%d0 | ||
679 | movel %d0,POSNEG1(%a6) | ||
680 | fmulx %fp0,%fp2 | ...S(A6+SA7) | ||
681 | |||
682 | faddd COSB6,%fp1 | ...B6+S(B7+SB8) | ||
683 | faddd SINA5,%fp2 | ...A5+S(A6+SA7) | ||
684 | |||
685 | fmulx %fp0,%fp1 | ...S(B6+S(B7+SB8)) | ||
686 | fmulx %fp0,%fp2 | ...S(A5+S(A6+SA7)) | ||
687 | |||
688 | faddd COSB5,%fp1 | ...B5+S(B6+S(B7+SB8)) | ||
689 | faddd SINA4,%fp2 | ...A4+S(A5+S(A6+SA7)) | ||
690 | |||
691 | fmulx %fp0,%fp1 | ...S(B5+...) | ||
692 | fmulx %fp0,%fp2 | ...S(A4+...) | ||
693 | |||
694 | faddd COSB4,%fp1 | ...B4+S(B5+...) | ||
695 | faddd SINA3,%fp2 | ...A3+S(A4+...) | ||
696 | |||
697 | fmulx %fp0,%fp1 | ...S(B4+...) | ||
698 | fmulx %fp0,%fp2 | ...S(A3+...) | ||
699 | |||
700 | faddx COSB3,%fp1 | ...B3+S(B4+...) | ||
701 | faddx SINA2,%fp2 | ...A2+S(A3+...) | ||
702 | |||
703 | fmulx %fp0,%fp1 | ...S(B3+...) | ||
704 | fmulx %fp0,%fp2 | ...S(A2+...) | ||
705 | |||
706 | faddx COSB2,%fp1 | ...B2+S(B3+...) | ||
707 | faddx SINA1,%fp2 | ...A1+S(A2+...) | ||
708 | |||
709 | fmulx %fp0,%fp1 | ...S(B2+...) | ||
710 | fmulx %fp2,%fp0 | ...s(a1+...) | ||
711 | |||
712 | |||
713 | |||
714 | fadds COSB1,%fp1 | ...B1+S(B2...) | ||
715 | fmulx RPRIME(%a6),%fp0 | ...R'S(A1+...) | ||
716 | fmulx SPRIME(%a6),%fp1 | ...S'(B1+S(B2+...)) | ||
717 | |||
718 | movel %d1,-(%sp) |save users mode & precision | ||
719 | andil #0xff,%d1 |mask off all exceptions | ||
720 | fmovel %d1,%FPCR | ||
721 | fadds POSNEG1(%a6),%fp1 | ...COS(X) | ||
722 | bsr sto_cos |store cosine result | ||
723 | fmovel (%sp)+,%FPCR |restore users exceptions | ||
724 | faddx RPRIME(%a6),%fp0 | ...SIN(X) | ||
725 | |||
726 | bra t_frcinx | ||
727 | |||
728 | SCBORS: | ||
729 | cmpil #0x3FFF8000,%d0 | ||
730 | bgt REDUCEX | ||
731 | |||
732 | |||
733 | SCSM: | ||
734 | movew #0x0000,XDCARE(%a6) | ||
735 | fmoves #0x3F800000,%fp1 | ||
736 | |||
737 | movel %d1,-(%sp) |save users mode & precision | ||
738 | andil #0xff,%d1 |mask off all exceptions | ||
739 | fmovel %d1,%FPCR | ||
740 | fsubs #0x00800000,%fp1 | ||
741 | bsr sto_cos |store cosine result | ||
742 | fmovel (%sp)+,%FPCR |restore users exceptions | ||
743 | fmovex X(%a6),%fp0 | ||
744 | bra t_frcinx | ||
745 | |||
746 | |end | ||
diff --git a/arch/m68k/fpsp040/ssinh.S b/arch/m68k/fpsp040/ssinh.S new file mode 100644 index 000000000000..c8b3308bb143 --- /dev/null +++ b/arch/m68k/fpsp040/ssinh.S | |||
@@ -0,0 +1,135 @@ | |||
1 | | | ||
2 | | ssinh.sa 3.1 12/10/90 | ||
3 | | | ||
4 | | The entry point sSinh computes the hyperbolic sine of | ||
5 | | an input argument; sSinhd does the same except for denormalized | ||
6 | | input. | ||
7 | | | ||
8 | | Input: Double-extended number X in location pointed to | ||
9 | | by address register a0. | ||
10 | | | ||
11 | | Output: The value sinh(X) returned in floating-point register Fp0. | ||
12 | | | ||
13 | | Accuracy and Monotonicity: The returned result is within 3 ulps in | ||
14 | | 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the | ||
15 | | result is subsequently rounded to double precision. The | ||
16 | | result is provably monotonic in double precision. | ||
17 | | | ||
18 | | Speed: The program sSINH takes approximately 280 cycles. | ||
19 | | | ||
20 | | Algorithm: | ||
21 | | | ||
22 | | SINH | ||
23 | | 1. If |X| > 16380 log2, go to 3. | ||
24 | | | ||
25 | | 2. (|X| <= 16380 log2) Sinh(X) is obtained by the formulae | ||
26 | | y = |X|, sgn = sign(X), and z = expm1(Y), | ||
27 | | sinh(X) = sgn*(1/2)*( z + z/(1+z) ). | ||
28 | | Exit. | ||
29 | | | ||
30 | | 3. If |X| > 16480 log2, go to 5. | ||
31 | | | ||
32 | | 4. (16380 log2 < |X| <= 16480 log2) | ||
33 | | sinh(X) = sign(X) * exp(|X|)/2. | ||
34 | | However, invoking exp(|X|) may cause premature overflow. | ||
35 | | Thus, we calculate sinh(X) as follows: | ||
36 | | Y := |X| | ||
37 | | sgn := sign(X) | ||
38 | | sgnFact := sgn * 2**(16380) | ||
39 | | Y' := Y - 16381 log2 | ||
40 | | sinh(X) := sgnFact * exp(Y'). | ||
41 | | Exit. | ||
42 | | | ||
43 | | 5. (|X| > 16480 log2) sinh(X) must overflow. Return | ||
44 | | sign(X)*Huge*Huge to generate overflow and an infinity with | ||
45 | | the appropriate sign. Huge is the largest finite number in | ||
46 | | extended format. Exit. | ||
47 | | | ||
48 | |||
49 | | Copyright (C) Motorola, Inc. 1990 | ||
50 | | All Rights Reserved | ||
51 | | | ||
52 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
53 | | The copyright notice above does not evidence any | ||
54 | | actual or intended publication of such source code. | ||
55 | |||
56 | |SSINH idnt 2,1 | Motorola 040 Floating Point Software Package | ||
57 | |||
58 | |section 8 | ||
59 | |||
60 | T1: .long 0x40C62D38,0xD3D64634 | ... 16381 LOG2 LEAD | ||
61 | T2: .long 0x3D6F90AE,0xB1E75CC7 | ... 16381 LOG2 TRAIL | ||
62 | |||
63 | |xref t_frcinx | ||
64 | |xref t_ovfl | ||
65 | |xref t_extdnrm | ||
66 | |xref setox | ||
67 | |xref setoxm1 | ||
68 | |||
69 | .global ssinhd | ||
70 | ssinhd: | ||
71 | |--SINH(X) = X FOR DENORMALIZED X | ||
72 | |||
73 | bra t_extdnrm | ||
74 | |||
75 | .global ssinh | ||
76 | ssinh: | ||
77 | fmovex (%a0),%fp0 | ...LOAD INPUT | ||
78 | |||
79 | movel (%a0),%d0 | ||
80 | movew 4(%a0),%d0 | ||
81 | movel %d0,%a1 | save a copy of original (compacted) operand | ||
82 | andl #0x7FFFFFFF,%d0 | ||
83 | cmpl #0x400CB167,%d0 | ||
84 | bgts SINHBIG | ||
85 | |||
86 | |--THIS IS THE USUAL CASE, |X| < 16380 LOG2 | ||
87 | |--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) ) | ||
88 | |||
89 | fabsx %fp0 | ...Y = |X| | ||
90 | |||
91 | moveml %a1/%d1,-(%sp) | ||
92 | fmovemx %fp0-%fp0,(%a0) | ||
93 | clrl %d1 | ||
94 | bsr setoxm1 | ...FP0 IS Z = EXPM1(Y) | ||
95 | fmovel #0,%fpcr | ||
96 | moveml (%sp)+,%a1/%d1 | ||
97 | |||
98 | fmovex %fp0,%fp1 | ||
99 | fadds #0x3F800000,%fp1 | ...1+Z | ||
100 | fmovex %fp0,-(%sp) | ||
101 | fdivx %fp1,%fp0 | ...Z/(1+Z) | ||
102 | movel %a1,%d0 | ||
103 | andl #0x80000000,%d0 | ||
104 | orl #0x3F000000,%d0 | ||
105 | faddx (%sp)+,%fp0 | ||
106 | movel %d0,-(%sp) | ||
107 | |||
108 | fmovel %d1,%fpcr | ||
109 | fmuls (%sp)+,%fp0 |last fp inst - possible exceptions set | ||
110 | |||
111 | bra t_frcinx | ||
112 | |||
113 | SINHBIG: | ||
114 | cmpl #0x400CB2B3,%d0 | ||
115 | bgt t_ovfl | ||
116 | fabsx %fp0 | ||
117 | fsubd T1(%pc),%fp0 | ...(|X|-16381LOG2_LEAD) | ||
118 | movel #0,-(%sp) | ||
119 | movel #0x80000000,-(%sp) | ||
120 | movel %a1,%d0 | ||
121 | andl #0x80000000,%d0 | ||
122 | orl #0x7FFB0000,%d0 | ||
123 | movel %d0,-(%sp) | ...EXTENDED FMT | ||
124 | fsubd T2(%pc),%fp0 | ...|X| - 16381 LOG2, ACCURATE | ||
125 | |||
126 | movel %d1,-(%sp) | ||
127 | clrl %d1 | ||
128 | fmovemx %fp0-%fp0,(%a0) | ||
129 | bsr setox | ||
130 | fmovel (%sp)+,%fpcr | ||
131 | |||
132 | fmulx (%sp)+,%fp0 |possible exception | ||
133 | bra t_frcinx | ||
134 | |||
135 | |end | ||
diff --git a/arch/m68k/fpsp040/stan.S b/arch/m68k/fpsp040/stan.S new file mode 100644 index 000000000000..b5c2a196e617 --- /dev/null +++ b/arch/m68k/fpsp040/stan.S | |||
@@ -0,0 +1,455 @@ | |||
1 | | | ||
2 | | stan.sa 3.3 7/29/91 | ||
3 | | | ||
4 | | The entry point stan computes the tangent of | ||
5 | | an input argument; | ||
6 | | stand does the same except for denormalized input. | ||
7 | | | ||
8 | | Input: Double-extended number X in location pointed to | ||
9 | | by address register a0. | ||
10 | | | ||
11 | | Output: The value tan(X) returned in floating-point register Fp0. | ||
12 | | | ||
13 | | Accuracy and Monotonicity: The returned result is within 3 ulp in | ||
14 | | 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the | ||
15 | | result is subsequently rounded to double precision. The | ||
16 | | result is provably monotonic in double precision. | ||
17 | | | ||
18 | | Speed: The program sTAN takes approximately 170 cycles for | ||
19 | | input argument X such that |X| < 15Pi, which is the usual | ||
20 | | situation. | ||
21 | | | ||
22 | | Algorithm: | ||
23 | | | ||
24 | | 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. | ||
25 | | | ||
26 | | 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let | ||
27 | | k = N mod 2, so in particular, k = 0 or 1. | ||
28 | | | ||
29 | | 3. If k is odd, go to 5. | ||
30 | | | ||
31 | | 4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a | ||
32 | | rational function U/V where | ||
33 | | U = r + r*s*(P1 + s*(P2 + s*P3)), and | ||
34 | | V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r. | ||
35 | | Exit. | ||
36 | | | ||
37 | | 4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by a | ||
38 | | rational function U/V where | ||
39 | | U = r + r*s*(P1 + s*(P2 + s*P3)), and | ||
40 | | V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r, | ||
41 | | -Cot(r) = -V/U. Exit. | ||
42 | | | ||
43 | | 6. If |X| > 1, go to 8. | ||
44 | | | ||
45 | | 7. (|X|<2**(-40)) Tan(X) = X. Exit. | ||
46 | | | ||
47 | | 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2. | ||
48 | | | ||
49 | |||
50 | | Copyright (C) Motorola, Inc. 1990 | ||
51 | | All Rights Reserved | ||
52 | | | ||
53 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
54 | | The copyright notice above does not evidence any | ||
55 | | actual or intended publication of such source code. | ||
56 | |||
57 | |STAN idnt 2,1 | Motorola 040 Floating Point Software Package | ||
58 | |||
59 | |section 8 | ||
60 | |||
61 | #include "fpsp.h" | ||
62 | |||
63 | BOUNDS1: .long 0x3FD78000,0x4004BC7E | ||
64 | TWOBYPI: .long 0x3FE45F30,0x6DC9C883 | ||
65 | |||
66 | TANQ4: .long 0x3EA0B759,0xF50F8688 | ||
67 | TANP3: .long 0xBEF2BAA5,0xA8924F04 | ||
68 | |||
69 | TANQ3: .long 0xBF346F59,0xB39BA65F,0x00000000,0x00000000 | ||
70 | |||
71 | TANP2: .long 0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000 | ||
72 | |||
73 | TANQ2: .long 0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000 | ||
74 | |||
75 | TANP1: .long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000 | ||
76 | |||
77 | TANQ1: .long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000 | ||
78 | |||
79 | INVTWOPI: .long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000 | ||
80 | |||
81 | TWOPI1: .long 0x40010000,0xC90FDAA2,0x00000000,0x00000000 | ||
82 | TWOPI2: .long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000 | ||
83 | |||
84 | |--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING | ||
85 | |--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT | ||
86 | |--MOST 69 BITS LONG. | ||
87 | .global PITBL | ||
88 | PITBL: | ||
89 | .long 0xC0040000,0xC90FDAA2,0x2168C235,0x21800000 | ||
90 | .long 0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000 | ||
91 | .long 0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000 | ||
92 | .long 0xC0040000,0xB6365E22,0xEE46F000,0x21480000 | ||
93 | .long 0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000 | ||
94 | .long 0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000 | ||
95 | .long 0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000 | ||
96 | .long 0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000 | ||
97 | .long 0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000 | ||
98 | .long 0xC0040000,0x90836524,0x88034B96,0x20B00000 | ||
99 | .long 0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000 | ||
100 | .long 0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000 | ||
101 | .long 0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000 | ||
102 | .long 0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000 | ||
103 | .long 0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000 | ||
104 | .long 0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000 | ||
105 | .long 0xC0030000,0xC90FDAA2,0x2168C235,0x21000000 | ||
106 | .long 0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000 | ||
107 | .long 0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000 | ||
108 | .long 0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000 | ||
109 | .long 0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000 | ||
110 | .long 0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000 | ||
111 | .long 0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000 | ||
112 | .long 0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000 | ||
113 | .long 0xC0020000,0xC90FDAA2,0x2168C235,0x20800000 | ||
114 | .long 0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000 | ||
115 | .long 0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000 | ||
116 | .long 0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000 | ||
117 | .long 0xC0010000,0xC90FDAA2,0x2168C235,0x20000000 | ||
118 | .long 0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000 | ||
119 | .long 0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000 | ||
120 | .long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000 | ||
121 | .long 0x00000000,0x00000000,0x00000000,0x00000000 | ||
122 | .long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000 | ||
123 | .long 0x40000000,0xC90FDAA2,0x2168C235,0x9F800000 | ||
124 | .long 0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000 | ||
125 | .long 0x40010000,0xC90FDAA2,0x2168C235,0xA0000000 | ||
126 | .long 0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000 | ||
127 | .long 0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000 | ||
128 | .long 0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000 | ||
129 | .long 0x40020000,0xC90FDAA2,0x2168C235,0xA0800000 | ||
130 | .long 0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000 | ||
131 | .long 0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000 | ||
132 | .long 0x40030000,0x8A3AE64F,0x76F80584,0x21080000 | ||
133 | .long 0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000 | ||
134 | .long 0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000 | ||
135 | .long 0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000 | ||
136 | .long 0x40030000,0xBC7EDCF7,0xFF523611,0x21680000 | ||
137 | .long 0x40030000,0xC90FDAA2,0x2168C235,0xA1000000 | ||
138 | .long 0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000 | ||
139 | .long 0x40030000,0xE231D5F6,0x6595DA7B,0x21300000 | ||
140 | .long 0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000 | ||
141 | .long 0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000 | ||
142 | .long 0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000 | ||
143 | .long 0x40040000,0x8A3AE64F,0x76F80584,0x21880000 | ||
144 | .long 0x40040000,0x90836524,0x88034B96,0xA0B00000 | ||
145 | .long 0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000 | ||
146 | .long 0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000 | ||
147 | .long 0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000 | ||
148 | .long 0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000 | ||
149 | .long 0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000 | ||
150 | .long 0x40040000,0xB6365E22,0xEE46F000,0xA1480000 | ||
151 | .long 0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000 | ||
152 | .long 0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000 | ||
153 | .long 0x40040000,0xC90FDAA2,0x2168C235,0xA1800000 | ||
154 | |||
155 | .set INARG,FP_SCR4 | ||
156 | |||
157 | .set TWOTO63,L_SCR1 | ||
158 | .set ENDFLAG,L_SCR2 | ||
159 | .set N,L_SCR3 | ||
160 | |||
161 | | xref t_frcinx | ||
162 | |xref t_extdnrm | ||
163 | |||
164 | .global stand | ||
165 | stand: | ||
166 | |--TAN(X) = X FOR DENORMALIZED X | ||
167 | |||
168 | bra t_extdnrm | ||
169 | |||
170 | .global stan | ||
171 | stan: | ||
172 | fmovex (%a0),%fp0 | ...LOAD INPUT | ||
173 | |||
174 | movel (%a0),%d0 | ||
175 | movew 4(%a0),%d0 | ||
176 | andil #0x7FFFFFFF,%d0 | ||
177 | |||
178 | cmpil #0x3FD78000,%d0 | ...|X| >= 2**(-40)? | ||
179 | bges TANOK1 | ||
180 | bra TANSM | ||
181 | TANOK1: | ||
182 | cmpil #0x4004BC7E,%d0 | ...|X| < 15 PI? | ||
183 | blts TANMAIN | ||
184 | bra REDUCEX | ||
185 | |||
186 | |||
187 | TANMAIN: | ||
188 | |--THIS IS THE USUAL CASE, |X| <= 15 PI. | ||
189 | |--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. | ||
190 | fmovex %fp0,%fp1 | ||
191 | fmuld TWOBYPI,%fp1 | ...X*2/PI | ||
192 | |||
193 | |--HIDE THE NEXT TWO INSTRUCTIONS | ||
194 | leal PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32 | ||
195 | |||
196 | |--FP1 IS NOW READY | ||
197 | fmovel %fp1,%d0 | ...CONVERT TO INTEGER | ||
198 | |||
199 | asll #4,%d0 | ||
200 | addal %d0,%a1 | ...ADDRESS N*PIBY2 IN Y1, Y2 | ||
201 | |||
202 | fsubx (%a1)+,%fp0 | ...X-Y1 | ||
203 | |--HIDE THE NEXT ONE | ||
204 | |||
205 | fsubs (%a1),%fp0 | ...FP0 IS R = (X-Y1)-Y2 | ||
206 | |||
207 | rorl #5,%d0 | ||
208 | andil #0x80000000,%d0 | ...D0 WAS ODD IFF D0 < 0 | ||
209 | |||
210 | TANCONT: | ||
211 | |||
212 | cmpil #0,%d0 | ||
213 | blt NODD | ||
214 | |||
215 | fmovex %fp0,%fp1 | ||
216 | fmulx %fp1,%fp1 | ...S = R*R | ||
217 | |||
218 | fmoved TANQ4,%fp3 | ||
219 | fmoved TANP3,%fp2 | ||
220 | |||
221 | fmulx %fp1,%fp3 | ...SQ4 | ||
222 | fmulx %fp1,%fp2 | ...SP3 | ||
223 | |||
224 | faddd TANQ3,%fp3 | ...Q3+SQ4 | ||
225 | faddx TANP2,%fp2 | ...P2+SP3 | ||
226 | |||
227 | fmulx %fp1,%fp3 | ...S(Q3+SQ4) | ||
228 | fmulx %fp1,%fp2 | ...S(P2+SP3) | ||
229 | |||
230 | faddx TANQ2,%fp3 | ...Q2+S(Q3+SQ4) | ||
231 | faddx TANP1,%fp2 | ...P1+S(P2+SP3) | ||
232 | |||
233 | fmulx %fp1,%fp3 | ...S(Q2+S(Q3+SQ4)) | ||
234 | fmulx %fp1,%fp2 | ...S(P1+S(P2+SP3)) | ||
235 | |||
236 | faddx TANQ1,%fp3 | ...Q1+S(Q2+S(Q3+SQ4)) | ||
237 | fmulx %fp0,%fp2 | ...RS(P1+S(P2+SP3)) | ||
238 | |||
239 | fmulx %fp3,%fp1 | ...S(Q1+S(Q2+S(Q3+SQ4))) | ||
240 | |||
241 | |||
242 | faddx %fp2,%fp0 | ...R+RS(P1+S(P2+SP3)) | ||
243 | |||
244 | |||
245 | fadds #0x3F800000,%fp1 | ...1+S(Q1+...) | ||
246 | |||
247 | fmovel %d1,%fpcr |restore users exceptions | ||
248 | fdivx %fp1,%fp0 |last inst - possible exception set | ||
249 | |||
250 | bra t_frcinx | ||
251 | |||
252 | NODD: | ||
253 | fmovex %fp0,%fp1 | ||
254 | fmulx %fp0,%fp0 | ...S = R*R | ||
255 | |||
256 | fmoved TANQ4,%fp3 | ||
257 | fmoved TANP3,%fp2 | ||
258 | |||
259 | fmulx %fp0,%fp3 | ...SQ4 | ||
260 | fmulx %fp0,%fp2 | ...SP3 | ||
261 | |||
262 | faddd TANQ3,%fp3 | ...Q3+SQ4 | ||
263 | faddx TANP2,%fp2 | ...P2+SP3 | ||
264 | |||
265 | fmulx %fp0,%fp3 | ...S(Q3+SQ4) | ||
266 | fmulx %fp0,%fp2 | ...S(P2+SP3) | ||
267 | |||
268 | faddx TANQ2,%fp3 | ...Q2+S(Q3+SQ4) | ||
269 | faddx TANP1,%fp2 | ...P1+S(P2+SP3) | ||
270 | |||
271 | fmulx %fp0,%fp3 | ...S(Q2+S(Q3+SQ4)) | ||
272 | fmulx %fp0,%fp2 | ...S(P1+S(P2+SP3)) | ||
273 | |||
274 | faddx TANQ1,%fp3 | ...Q1+S(Q2+S(Q3+SQ4)) | ||
275 | fmulx %fp1,%fp2 | ...RS(P1+S(P2+SP3)) | ||
276 | |||
277 | fmulx %fp3,%fp0 | ...S(Q1+S(Q2+S(Q3+SQ4))) | ||
278 | |||
279 | |||
280 | faddx %fp2,%fp1 | ...R+RS(P1+S(P2+SP3)) | ||
281 | fadds #0x3F800000,%fp0 | ...1+S(Q1+...) | ||
282 | |||
283 | |||
284 | fmovex %fp1,-(%sp) | ||
285 | eoril #0x80000000,(%sp) | ||
286 | |||
287 | fmovel %d1,%fpcr |restore users exceptions | ||
288 | fdivx (%sp)+,%fp0 |last inst - possible exception set | ||
289 | |||
290 | bra t_frcinx | ||
291 | |||
292 | TANBORS: | ||
293 | |--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION. | ||
294 | |--IF |X| < 2**(-40), RETURN X OR 1. | ||
295 | cmpil #0x3FFF8000,%d0 | ||
296 | bgts REDUCEX | ||
297 | |||
298 | TANSM: | ||
299 | |||
300 | fmovex %fp0,-(%sp) | ||
301 | fmovel %d1,%fpcr |restore users exceptions | ||
302 | fmovex (%sp)+,%fp0 |last inst - possible exception set | ||
303 | |||
304 | bra t_frcinx | ||
305 | |||
306 | |||
307 | REDUCEX: | ||
308 | |--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW. | ||
309 | |--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING | ||
310 | |--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE. | ||
311 | |||
312 | fmovemx %fp2-%fp5,-(%a7) | ...save FP2 through FP5 | ||
313 | movel %d2,-(%a7) | ||
314 | fmoves #0x00000000,%fp1 | ||
315 | |||
316 | |--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that | ||
317 | |--there is a danger of unwanted overflow in first LOOP iteration. In this | ||
318 | |--case, reduce argument by one remainder step to make subsequent reduction | ||
319 | |--safe. | ||
320 | cmpil #0x7ffeffff,%d0 |is argument dangerously large? | ||
321 | bnes LOOP | ||
322 | movel #0x7ffe0000,FP_SCR2(%a6) |yes | ||
323 | | ;create 2**16383*PI/2 | ||
324 | movel #0xc90fdaa2,FP_SCR2+4(%a6) | ||
325 | clrl FP_SCR2+8(%a6) | ||
326 | ftstx %fp0 |test sign of argument | ||
327 | movel #0x7fdc0000,FP_SCR3(%a6) |create low half of 2**16383* | ||
328 | | ;PI/2 at FP_SCR3 | ||
329 | movel #0x85a308d3,FP_SCR3+4(%a6) | ||
330 | clrl FP_SCR3+8(%a6) | ||
331 | fblt red_neg | ||
332 | orw #0x8000,FP_SCR2(%a6) |positive arg | ||
333 | orw #0x8000,FP_SCR3(%a6) | ||
334 | red_neg: | ||
335 | faddx FP_SCR2(%a6),%fp0 |high part of reduction is exact | ||
336 | fmovex %fp0,%fp1 |save high result in fp1 | ||
337 | faddx FP_SCR3(%a6),%fp0 |low part of reduction | ||
338 | fsubx %fp0,%fp1 |determine low component of result | ||
339 | faddx FP_SCR3(%a6),%fp1 |fp0/fp1 are reduced argument. | ||
340 | |||
341 | |--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4. | ||
342 | |--integer quotient will be stored in N | ||
343 | |--Intermediate remainder is 66-bit long; (R,r) in (FP0,FP1) | ||
344 | |||
345 | LOOP: | ||
346 | fmovex %fp0,INARG(%a6) | ...+-2**K * F, 1 <= F < 2 | ||
347 | movew INARG(%a6),%d0 | ||
348 | movel %d0,%a1 | ...save a copy of D0 | ||
349 | andil #0x00007FFF,%d0 | ||
350 | subil #0x00003FFF,%d0 | ...D0 IS K | ||
351 | cmpil #28,%d0 | ||
352 | bles LASTLOOP | ||
353 | CONTLOOP: | ||
354 | subil #27,%d0 | ...D0 IS L := K-27 | ||
355 | movel #0,ENDFLAG(%a6) | ||
356 | bras WORK | ||
357 | LASTLOOP: | ||
358 | clrl %d0 | ...D0 IS L := 0 | ||
359 | movel #1,ENDFLAG(%a6) | ||
360 | |||
361 | WORK: | ||
362 | |--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN | ||
363 | |--THAT INT( X * (2/PI) / 2**(L) ) < 2**29. | ||
364 | |||
365 | |--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63), | ||
366 | |--2**L * (PIby2_1), 2**L * (PIby2_2) | ||
367 | |||
368 | movel #0x00003FFE,%d2 | ...BIASED EXPO OF 2/PI | ||
369 | subl %d0,%d2 | ...BIASED EXPO OF 2**(-L)*(2/PI) | ||
370 | |||
371 | movel #0xA2F9836E,FP_SCR1+4(%a6) | ||
372 | movel #0x4E44152A,FP_SCR1+8(%a6) | ||
373 | movew %d2,FP_SCR1(%a6) | ...FP_SCR1 is 2**(-L)*(2/PI) | ||
374 | |||
375 | fmovex %fp0,%fp2 | ||
376 | fmulx FP_SCR1(%a6),%fp2 | ||
377 | |--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN | ||
378 | |--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N | ||
379 | |--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT | ||
380 | |--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE | ||
381 | |--US THE DESIRED VALUE IN FLOATING POINT. | ||
382 | |||
383 | |--HIDE SIX CYCLES OF INSTRUCTION | ||
384 | movel %a1,%d2 | ||
385 | swap %d2 | ||
386 | andil #0x80000000,%d2 | ||
387 | oril #0x5F000000,%d2 | ...D2 IS SIGN(INARG)*2**63 IN SGL | ||
388 | movel %d2,TWOTO63(%a6) | ||
389 | |||
390 | movel %d0,%d2 | ||
391 | addil #0x00003FFF,%d2 | ...BIASED EXPO OF 2**L * (PI/2) | ||
392 | |||
393 | |--FP2 IS READY | ||
394 | fadds TWOTO63(%a6),%fp2 | ...THE FRACTIONAL PART OF FP1 IS ROUNDED | ||
395 | |||
396 | |--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1 and 2**(L)*Piby2_2 | ||
397 | movew %d2,FP_SCR2(%a6) | ||
398 | clrw FP_SCR2+2(%a6) | ||
399 | movel #0xC90FDAA2,FP_SCR2+4(%a6) | ||
400 | clrl FP_SCR2+8(%a6) | ...FP_SCR2 is 2**(L) * Piby2_1 | ||
401 | |||
402 | |--FP2 IS READY | ||
403 | fsubs TWOTO63(%a6),%fp2 | ...FP2 is N | ||
404 | |||
405 | addil #0x00003FDD,%d0 | ||
406 | movew %d0,FP_SCR3(%a6) | ||
407 | clrw FP_SCR3+2(%a6) | ||
408 | movel #0x85A308D3,FP_SCR3+4(%a6) | ||
409 | clrl FP_SCR3+8(%a6) | ...FP_SCR3 is 2**(L) * Piby2_2 | ||
410 | |||
411 | movel ENDFLAG(%a6),%d0 | ||
412 | |||
413 | |--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and | ||
414 | |--P2 = 2**(L) * Piby2_2 | ||
415 | fmovex %fp2,%fp4 | ||
416 | fmulx FP_SCR2(%a6),%fp4 | ...W = N*P1 | ||
417 | fmovex %fp2,%fp5 | ||
418 | fmulx FP_SCR3(%a6),%fp5 | ...w = N*P2 | ||
419 | fmovex %fp4,%fp3 | ||
420 | |--we want P+p = W+w but |p| <= half ulp of P | ||
421 | |--Then, we need to compute A := R-P and a := r-p | ||
422 | faddx %fp5,%fp3 | ...FP3 is P | ||
423 | fsubx %fp3,%fp4 | ...W-P | ||
424 | |||
425 | fsubx %fp3,%fp0 | ...FP0 is A := R - P | ||
426 | faddx %fp5,%fp4 | ...FP4 is p = (W-P)+w | ||
427 | |||
428 | fmovex %fp0,%fp3 | ...FP3 A | ||
429 | fsubx %fp4,%fp1 | ...FP1 is a := r - p | ||
430 | |||
431 | |--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but | ||
432 | |--|r| <= half ulp of R. | ||
433 | faddx %fp1,%fp0 | ...FP0 is R := A+a | ||
434 | |--No need to calculate r if this is the last loop | ||
435 | cmpil #0,%d0 | ||
436 | bgt RESTORE | ||
437 | |||
438 | |--Need to calculate r | ||
439 | fsubx %fp0,%fp3 | ...A-R | ||
440 | faddx %fp3,%fp1 | ...FP1 is r := (A-R)+a | ||
441 | bra LOOP | ||
442 | |||
443 | RESTORE: | ||
444 | fmovel %fp2,N(%a6) | ||
445 | movel (%a7)+,%d2 | ||
446 | fmovemx (%a7)+,%fp2-%fp5 | ||
447 | |||
448 | |||
449 | movel N(%a6),%d0 | ||
450 | rorl #1,%d0 | ||
451 | |||
452 | |||
453 | bra TANCONT | ||
454 | |||
455 | |end | ||
diff --git a/arch/m68k/fpsp040/stanh.S b/arch/m68k/fpsp040/stanh.S new file mode 100644 index 000000000000..33b009802243 --- /dev/null +++ b/arch/m68k/fpsp040/stanh.S | |||
@@ -0,0 +1,185 @@ | |||
1 | | | ||
2 | | stanh.sa 3.1 12/10/90 | ||
3 | | | ||
4 | | The entry point sTanh computes the hyperbolic tangent of | ||
5 | | an input argument; sTanhd does the same except for denormalized | ||
6 | | input. | ||
7 | | | ||
8 | | Input: Double-extended number X in location pointed to | ||
9 | | by address register a0. | ||
10 | | | ||
11 | | Output: The value tanh(X) returned in floating-point register Fp0. | ||
12 | | | ||
13 | | Accuracy and Monotonicity: The returned result is within 3 ulps in | ||
14 | | 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the | ||
15 | | result is subsequently rounded to double precision. The | ||
16 | | result is provably monotonic in double precision. | ||
17 | | | ||
18 | | Speed: The program stanh takes approximately 270 cycles. | ||
19 | | | ||
20 | | Algorithm: | ||
21 | | | ||
22 | | TANH | ||
23 | | 1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3. | ||
24 | | | ||
25 | | 2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by | ||
26 | | sgn := sign(X), y := 2|X|, z := expm1(Y), and | ||
27 | | tanh(X) = sgn*( z/(2+z) ). | ||
28 | | Exit. | ||
29 | | | ||
30 | | 3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1, | ||
31 | | go to 7. | ||
32 | | | ||
33 | | 4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6. | ||
34 | | | ||
35 | | 5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by | ||
36 | | sgn := sign(X), y := 2|X|, z := exp(Y), | ||
37 | | tanh(X) = sgn - [ sgn*2/(1+z) ]. | ||
38 | | Exit. | ||
39 | | | ||
40 | | 6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we | ||
41 | | calculate Tanh(X) by | ||
42 | | sgn := sign(X), Tiny := 2**(-126), | ||
43 | | tanh(X) := sgn - sgn*Tiny. | ||
44 | | Exit. | ||
45 | | | ||
46 | | 7. (|X| < 2**(-40)). Tanh(X) = X. Exit. | ||
47 | | | ||
48 | |||
49 | | Copyright (C) Motorola, Inc. 1990 | ||
50 | | All Rights Reserved | ||
51 | | | ||
52 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
53 | | The copyright notice above does not evidence any | ||
54 | | actual or intended publication of such source code. | ||
55 | |||
56 | |STANH idnt 2,1 | Motorola 040 Floating Point Software Package | ||
57 | |||
58 | |section 8 | ||
59 | |||
60 | #include "fpsp.h" | ||
61 | |||
62 | .set X,FP_SCR5 | ||
63 | .set XDCARE,X+2 | ||
64 | .set XFRAC,X+4 | ||
65 | |||
66 | .set SGN,L_SCR3 | ||
67 | |||
68 | .set V,FP_SCR6 | ||
69 | |||
70 | BOUNDS1: .long 0x3FD78000,0x3FFFDDCE | ... 2^(-40), (5/2)LOG2 | ||
71 | |||
72 | |xref t_frcinx | ||
73 | |xref t_extdnrm | ||
74 | |xref setox | ||
75 | |xref setoxm1 | ||
76 | |||
77 | .global stanhd | ||
78 | stanhd: | ||
79 | |--TANH(X) = X FOR DENORMALIZED X | ||
80 | |||
81 | bra t_extdnrm | ||
82 | |||
83 | .global stanh | ||
84 | stanh: | ||
85 | fmovex (%a0),%fp0 | ...LOAD INPUT | ||
86 | |||
87 | fmovex %fp0,X(%a6) | ||
88 | movel (%a0),%d0 | ||
89 | movew 4(%a0),%d0 | ||
90 | movel %d0,X(%a6) | ||
91 | andl #0x7FFFFFFF,%d0 | ||
92 | cmp2l BOUNDS1(%pc),%d0 | ...2**(-40) < |X| < (5/2)LOG2 ? | ||
93 | bcss TANHBORS | ||
94 | |||
95 | |--THIS IS THE USUAL CASE | ||
96 | |--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2). | ||
97 | |||
98 | movel X(%a6),%d0 | ||
99 | movel %d0,SGN(%a6) | ||
100 | andl #0x7FFF0000,%d0 | ||
101 | addl #0x00010000,%d0 | ...EXPONENT OF 2|X| | ||
102 | movel %d0,X(%a6) | ||
103 | andl #0x80000000,SGN(%a6) | ||
104 | fmovex X(%a6),%fp0 | ...FP0 IS Y = 2|X| | ||
105 | |||
106 | movel %d1,-(%a7) | ||
107 | clrl %d1 | ||
108 | fmovemx %fp0-%fp0,(%a0) | ||
109 | bsr setoxm1 | ...FP0 IS Z = EXPM1(Y) | ||
110 | movel (%a7)+,%d1 | ||
111 | |||
112 | fmovex %fp0,%fp1 | ||
113 | fadds #0x40000000,%fp1 | ...Z+2 | ||
114 | movel SGN(%a6),%d0 | ||
115 | fmovex %fp1,V(%a6) | ||
116 | eorl %d0,V(%a6) | ||
117 | |||
118 | fmovel %d1,%FPCR |restore users exceptions | ||
119 | fdivx V(%a6),%fp0 | ||
120 | bra t_frcinx | ||
121 | |||
122 | TANHBORS: | ||
123 | cmpl #0x3FFF8000,%d0 | ||
124 | blt TANHSM | ||
125 | |||
126 | cmpl #0x40048AA1,%d0 | ||
127 | bgt TANHHUGE | ||
128 | |||
129 | |-- (5/2) LOG2 < |X| < 50 LOG2, | ||
130 | |--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X), | ||
131 | |--TANH(X) = SGN - SGN*2/[EXP(Y)+1]. | ||
132 | |||
133 | movel X(%a6),%d0 | ||
134 | movel %d0,SGN(%a6) | ||
135 | andl #0x7FFF0000,%d0 | ||
136 | addl #0x00010000,%d0 | ...EXPO OF 2|X| | ||
137 | movel %d0,X(%a6) | ...Y = 2|X| | ||
138 | andl #0x80000000,SGN(%a6) | ||
139 | movel SGN(%a6),%d0 | ||
140 | fmovex X(%a6),%fp0 | ...Y = 2|X| | ||
141 | |||
142 | movel %d1,-(%a7) | ||
143 | clrl %d1 | ||
144 | fmovemx %fp0-%fp0,(%a0) | ||
145 | bsr setox | ...FP0 IS EXP(Y) | ||
146 | movel (%a7)+,%d1 | ||
147 | movel SGN(%a6),%d0 | ||
148 | fadds #0x3F800000,%fp0 | ...EXP(Y)+1 | ||
149 | |||
150 | eorl #0xC0000000,%d0 | ...-SIGN(X)*2 | ||
151 | fmoves %d0,%fp1 | ...-SIGN(X)*2 IN SGL FMT | ||
152 | fdivx %fp0,%fp1 | ...-SIGN(X)2 / [EXP(Y)+1 ] | ||
153 | |||
154 | movel SGN(%a6),%d0 | ||
155 | orl #0x3F800000,%d0 | ...SGN | ||
156 | fmoves %d0,%fp0 | ...SGN IN SGL FMT | ||
157 | |||
158 | fmovel %d1,%FPCR |restore users exceptions | ||
159 | faddx %fp1,%fp0 | ||
160 | |||
161 | bra t_frcinx | ||
162 | |||
163 | TANHSM: | ||
164 | movew #0x0000,XDCARE(%a6) | ||
165 | |||
166 | fmovel %d1,%FPCR |restore users exceptions | ||
167 | fmovex X(%a6),%fp0 |last inst - possible exception set | ||
168 | |||
169 | bra t_frcinx | ||
170 | |||
171 | TANHHUGE: | ||
172 | |---RETURN SGN(X) - SGN(X)EPS | ||
173 | movel X(%a6),%d0 | ||
174 | andl #0x80000000,%d0 | ||
175 | orl #0x3F800000,%d0 | ||
176 | fmoves %d0,%fp0 | ||
177 | andl #0x80000000,%d0 | ||
178 | eorl #0x80800000,%d0 | ...-SIGN(X)*EPS | ||
179 | |||
180 | fmovel %d1,%FPCR |restore users exceptions | ||
181 | fadds %d0,%fp0 | ||
182 | |||
183 | bra t_frcinx | ||
184 | |||
185 | |end | ||
diff --git a/arch/m68k/fpsp040/sto_res.S b/arch/m68k/fpsp040/sto_res.S new file mode 100644 index 000000000000..0cdca3b060ad --- /dev/null +++ b/arch/m68k/fpsp040/sto_res.S | |||
@@ -0,0 +1,98 @@ | |||
1 | | | ||
2 | | sto_res.sa 3.1 12/10/90 | ||
3 | | | ||
4 | | Takes the result and puts it in where the user expects it. | ||
5 | | Library functions return result in fp0. If fp0 is not the | ||
6 | | users destination register then fp0 is moved to the | ||
7 | | correct floating-point destination register. fp0 and fp1 | ||
8 | | are then restored to the original contents. | ||
9 | | | ||
10 | | Input: result in fp0,fp1 | ||
11 | | | ||
12 | | d2 & a0 should be kept unmodified | ||
13 | | | ||
14 | | Output: moves the result to the true destination reg or mem | ||
15 | | | ||
16 | | Modifies: destination floating point register | ||
17 | | | ||
18 | |||
19 | | Copyright (C) Motorola, Inc. 1990 | ||
20 | | All Rights Reserved | ||
21 | | | ||
22 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
23 | | The copyright notice above does not evidence any | ||
24 | | actual or intended publication of such source code. | ||
25 | |||
26 | STO_RES: |idnt 2,1 | Motorola 040 Floating Point Software Package | ||
27 | |||
28 | |||
29 | |section 8 | ||
30 | |||
31 | #include "fpsp.h" | ||
32 | |||
33 | .global sto_cos | ||
34 | sto_cos: | ||
35 | bfextu CMDREG1B(%a6){#13:#3},%d0 |extract cos destination | ||
36 | cmpib #3,%d0 |check for fp0/fp1 cases | ||
37 | bles c_fp0123 | ||
38 | fmovemx %fp1-%fp1,-(%a7) | ||
39 | moveql #7,%d1 | ||
40 | subl %d0,%d1 |d1 = 7- (dest. reg. no.) | ||
41 | clrl %d0 | ||
42 | bsetl %d1,%d0 |d0 is dynamic register mask | ||
43 | fmovemx (%a7)+,%d0 | ||
44 | rts | ||
45 | c_fp0123: | ||
46 | cmpib #0,%d0 | ||
47 | beqs c_is_fp0 | ||
48 | cmpib #1,%d0 | ||
49 | beqs c_is_fp1 | ||
50 | cmpib #2,%d0 | ||
51 | beqs c_is_fp2 | ||
52 | c_is_fp3: | ||
53 | fmovemx %fp1-%fp1,USER_FP3(%a6) | ||
54 | rts | ||
55 | c_is_fp2: | ||
56 | fmovemx %fp1-%fp1,USER_FP2(%a6) | ||
57 | rts | ||
58 | c_is_fp1: | ||
59 | fmovemx %fp1-%fp1,USER_FP1(%a6) | ||
60 | rts | ||
61 | c_is_fp0: | ||
62 | fmovemx %fp1-%fp1,USER_FP0(%a6) | ||
63 | rts | ||
64 | |||
65 | |||
66 | .global sto_res | ||
67 | sto_res: | ||
68 | bfextu CMDREG1B(%a6){#6:#3},%d0 |extract destination register | ||
69 | cmpib #3,%d0 |check for fp0/fp1 cases | ||
70 | bles fp0123 | ||
71 | fmovemx %fp0-%fp0,-(%a7) | ||
72 | moveql #7,%d1 | ||
73 | subl %d0,%d1 |d1 = 7- (dest. reg. no.) | ||
74 | clrl %d0 | ||
75 | bsetl %d1,%d0 |d0 is dynamic register mask | ||
76 | fmovemx (%a7)+,%d0 | ||
77 | rts | ||
78 | fp0123: | ||
79 | cmpib #0,%d0 | ||
80 | beqs is_fp0 | ||
81 | cmpib #1,%d0 | ||
82 | beqs is_fp1 | ||
83 | cmpib #2,%d0 | ||
84 | beqs is_fp2 | ||
85 | is_fp3: | ||
86 | fmovemx %fp0-%fp0,USER_FP3(%a6) | ||
87 | rts | ||
88 | is_fp2: | ||
89 | fmovemx %fp0-%fp0,USER_FP2(%a6) | ||
90 | rts | ||
91 | is_fp1: | ||
92 | fmovemx %fp0-%fp0,USER_FP1(%a6) | ||
93 | rts | ||
94 | is_fp0: | ||
95 | fmovemx %fp0-%fp0,USER_FP0(%a6) | ||
96 | rts | ||
97 | |||
98 | |end | ||
diff --git a/arch/m68k/fpsp040/stwotox.S b/arch/m68k/fpsp040/stwotox.S new file mode 100644 index 000000000000..4e3c1407d3df --- /dev/null +++ b/arch/m68k/fpsp040/stwotox.S | |||
@@ -0,0 +1,427 @@ | |||
1 | | | ||
2 | | stwotox.sa 3.1 12/10/90 | ||
3 | | | ||
4 | | stwotox --- 2**X | ||
5 | | stwotoxd --- 2**X for denormalized X | ||
6 | | stentox --- 10**X | ||
7 | | stentoxd --- 10**X for denormalized X | ||
8 | | | ||
9 | | Input: Double-extended number X in location pointed to | ||
10 | | by address register a0. | ||
11 | | | ||
12 | | Output: The function values are returned in Fp0. | ||
13 | | | ||
14 | | Accuracy and Monotonicity: The returned result is within 2 ulps in | ||
15 | | 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the | ||
16 | | result is subsequently rounded to double precision. The | ||
17 | | result is provably monotonic in double precision. | ||
18 | | | ||
19 | | Speed: The program stwotox takes approximately 190 cycles and the | ||
20 | | program stentox takes approximately 200 cycles. | ||
21 | | | ||
22 | | Algorithm: | ||
23 | | | ||
24 | | twotox | ||
25 | | 1. If |X| > 16480, go to ExpBig. | ||
26 | | | ||
27 | | 2. If |X| < 2**(-70), go to ExpSm. | ||
28 | | | ||
29 | | 3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore | ||
30 | | decompose N as | ||
31 | | N = 64(M + M') + j, j = 0,1,2,...,63. | ||
32 | | | ||
33 | | 4. Overwrite r := r * log2. Then | ||
34 | | 2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). | ||
35 | | Go to expr to compute that expression. | ||
36 | | | ||
37 | | tentox | ||
38 | | 1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig. | ||
39 | | | ||
40 | | 2. If |X| < 2**(-70), go to ExpSm. | ||
41 | | | ||
42 | | 3. Set y := X*log_2(10)*64 (base 2 log of 10). Set | ||
43 | | N := round-to-int(y). Decompose N as | ||
44 | | N = 64(M + M') + j, j = 0,1,2,...,63. | ||
45 | | | ||
46 | | 4. Define r as | ||
47 | | r := ((X - N*L1)-N*L2) * L10 | ||
48 | | where L1, L2 are the leading and trailing parts of log_10(2)/64 | ||
49 | | and L10 is the natural log of 10. Then | ||
50 | | 10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). | ||
51 | | Go to expr to compute that expression. | ||
52 | | | ||
53 | | expr | ||
54 | | 1. Fetch 2**(j/64) from table as Fact1 and Fact2. | ||
55 | | | ||
56 | | 2. Overwrite Fact1 and Fact2 by | ||
57 | | Fact1 := 2**(M) * Fact1 | ||
58 | | Fact2 := 2**(M) * Fact2 | ||
59 | | Thus Fact1 + Fact2 = 2**(M) * 2**(j/64). | ||
60 | | | ||
61 | | 3. Calculate P where 1 + P approximates exp(r): | ||
62 | | P = r + r*r*(A1+r*(A2+...+r*A5)). | ||
63 | | | ||
64 | | 4. Let AdjFact := 2**(M'). Return | ||
65 | | AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ). | ||
66 | | Exit. | ||
67 | | | ||
68 | | ExpBig | ||
69 | | 1. Generate overflow by Huge * Huge if X > 0; otherwise, generate | ||
70 | | underflow by Tiny * Tiny. | ||
71 | | | ||
72 | | ExpSm | ||
73 | | 1. Return 1 + X. | ||
74 | | | ||
75 | |||
76 | | Copyright (C) Motorola, Inc. 1990 | ||
77 | | All Rights Reserved | ||
78 | | | ||
79 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
80 | | The copyright notice above does not evidence any | ||
81 | | actual or intended publication of such source code. | ||
82 | |||
83 | |STWOTOX idnt 2,1 | Motorola 040 Floating Point Software Package | ||
84 | |||
85 | |section 8 | ||
86 | |||
87 | #include "fpsp.h" | ||
88 | |||
89 | BOUNDS1: .long 0x3FB98000,0x400D80C0 | ... 2^(-70),16480 | ||
90 | BOUNDS2: .long 0x3FB98000,0x400B9B07 | ... 2^(-70),16480 LOG2/LOG10 | ||
91 | |||
92 | L2TEN64: .long 0x406A934F,0x0979A371 | ... 64LOG10/LOG2 | ||
93 | L10TWO1: .long 0x3F734413,0x509F8000 | ... LOG2/64LOG10 | ||
94 | |||
95 | L10TWO2: .long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000 | ||
96 | |||
97 | LOG10: .long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000 | ||
98 | |||
99 | LOG2: .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000 | ||
100 | |||
101 | EXPA5: .long 0x3F56C16D,0x6F7BD0B2 | ||
102 | EXPA4: .long 0x3F811112,0x302C712C | ||
103 | EXPA3: .long 0x3FA55555,0x55554CC1 | ||
104 | EXPA2: .long 0x3FC55555,0x55554A54 | ||
105 | EXPA1: .long 0x3FE00000,0x00000000,0x00000000,0x00000000 | ||
106 | |||
107 | HUGE: .long 0x7FFE0000,0xFFFFFFFF,0xFFFFFFFF,0x00000000 | ||
108 | TINY: .long 0x00010000,0xFFFFFFFF,0xFFFFFFFF,0x00000000 | ||
109 | |||
110 | EXPTBL: | ||
111 | .long 0x3FFF0000,0x80000000,0x00000000,0x3F738000 | ||
112 | .long 0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA | ||
113 | .long 0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9 | ||
114 | .long 0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9 | ||
115 | .long 0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA | ||
116 | .long 0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C | ||
117 | .long 0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1 | ||
118 | .long 0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA | ||
119 | .long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373 | ||
120 | .long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670 | ||
121 | .long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700 | ||
122 | .long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0 | ||
123 | .long 0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D | ||
124 | .long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319 | ||
125 | .long 0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B | ||
126 | .long 0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5 | ||
127 | .long 0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A | ||
128 | .long 0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B | ||
129 | .long 0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF | ||
130 | .long 0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA | ||
131 | .long 0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD | ||
132 | .long 0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E | ||
133 | .long 0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B | ||
134 | .long 0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB | ||
135 | .long 0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB | ||
136 | .long 0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274 | ||
137 | .long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C | ||
138 | .long 0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00 | ||
139 | .long 0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301 | ||
140 | .long 0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367 | ||
141 | .long 0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F | ||
142 | .long 0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C | ||
143 | .long 0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB | ||
144 | .long 0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB | ||
145 | .long 0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C | ||
146 | .long 0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA | ||
147 | .long 0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD | ||
148 | .long 0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51 | ||
149 | .long 0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A | ||
150 | .long 0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2 | ||
151 | .long 0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB | ||
152 | .long 0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17 | ||
153 | .long 0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C | ||
154 | .long 0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8 | ||
155 | .long 0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53 | ||
156 | .long 0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE | ||
157 | .long 0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124 | ||
158 | .long 0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243 | ||
159 | .long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A | ||
160 | .long 0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61 | ||
161 | .long 0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610 | ||
162 | .long 0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1 | ||
163 | .long 0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12 | ||
164 | .long 0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE | ||
165 | .long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4 | ||
166 | .long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F | ||
167 | .long 0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A | ||
168 | .long 0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A | ||
169 | .long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC | ||
170 | .long 0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F | ||
171 | .long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A | ||
172 | .long 0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795 | ||
173 | .long 0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B | ||
174 | .long 0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581 | ||
175 | |||
176 | .set N,L_SCR1 | ||
177 | |||
178 | .set X,FP_SCR1 | ||
179 | .set XDCARE,X+2 | ||
180 | .set XFRAC,X+4 | ||
181 | |||
182 | .set ADJFACT,FP_SCR2 | ||
183 | |||
184 | .set FACT1,FP_SCR3 | ||
185 | .set FACT1HI,FACT1+4 | ||
186 | .set FACT1LOW,FACT1+8 | ||
187 | |||
188 | .set FACT2,FP_SCR4 | ||
189 | .set FACT2HI,FACT2+4 | ||
190 | .set FACT2LOW,FACT2+8 | ||
191 | |||
192 | | xref t_unfl | ||
193 | |xref t_ovfl | ||
194 | |xref t_frcinx | ||
195 | |||
196 | .global stwotoxd | ||
197 | stwotoxd: | ||
198 | |--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT | ||
199 | |||
200 | fmovel %d1,%fpcr | ...set user's rounding mode/precision | ||
201 | fmoves #0x3F800000,%fp0 | ...RETURN 1 + X | ||
202 | movel (%a0),%d0 | ||
203 | orl #0x00800001,%d0 | ||
204 | fadds %d0,%fp0 | ||
205 | bra t_frcinx | ||
206 | |||
207 | .global stwotox | ||
208 | stwotox: | ||
209 | |--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S | ||
210 | fmovemx (%a0),%fp0-%fp0 | ...LOAD INPUT, do not set cc's | ||
211 | |||
212 | movel (%a0),%d0 | ||
213 | movew 4(%a0),%d0 | ||
214 | fmovex %fp0,X(%a6) | ||
215 | andil #0x7FFFFFFF,%d0 | ||
216 | |||
217 | cmpil #0x3FB98000,%d0 | ...|X| >= 2**(-70)? | ||
218 | bges TWOOK1 | ||
219 | bra EXPBORS | ||
220 | |||
221 | TWOOK1: | ||
222 | cmpil #0x400D80C0,%d0 | ...|X| > 16480? | ||
223 | bles TWOMAIN | ||
224 | bra EXPBORS | ||
225 | |||
226 | |||
227 | TWOMAIN: | ||
228 | |--USUAL CASE, 2^(-70) <= |X| <= 16480 | ||
229 | |||
230 | fmovex %fp0,%fp1 | ||
231 | fmuls #0x42800000,%fp1 | ...64 * X | ||
232 | |||
233 | fmovel %fp1,N(%a6) | ...N = ROUND-TO-INT(64 X) | ||
234 | movel %d2,-(%sp) | ||
235 | lea EXPTBL,%a1 | ...LOAD ADDRESS OF TABLE OF 2^(J/64) | ||
236 | fmovel N(%a6),%fp1 | ...N --> FLOATING FMT | ||
237 | movel N(%a6),%d0 | ||
238 | movel %d0,%d2 | ||
239 | andil #0x3F,%d0 | ...D0 IS J | ||
240 | asll #4,%d0 | ...DISPLACEMENT FOR 2^(J/64) | ||
241 | addal %d0,%a1 | ...ADDRESS FOR 2^(J/64) | ||
242 | asrl #6,%d2 | ...d2 IS L, N = 64L + J | ||
243 | movel %d2,%d0 | ||
244 | asrl #1,%d0 | ...D0 IS M | ||
245 | subl %d0,%d2 | ...d2 IS M', N = 64(M+M') + J | ||
246 | addil #0x3FFF,%d2 | ||
247 | movew %d2,ADJFACT(%a6) | ...ADJFACT IS 2^(M') | ||
248 | movel (%sp)+,%d2 | ||
249 | |--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64), | ||
250 | |--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN. | ||
251 | |--ADJFACT = 2^(M'). | ||
252 | |--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2. | ||
253 | |||
254 | fmuls #0x3C800000,%fp1 | ...(1/64)*N | ||
255 | movel (%a1)+,FACT1(%a6) | ||
256 | movel (%a1)+,FACT1HI(%a6) | ||
257 | movel (%a1)+,FACT1LOW(%a6) | ||
258 | movew (%a1)+,FACT2(%a6) | ||
259 | clrw FACT2+2(%a6) | ||
260 | |||
261 | fsubx %fp1,%fp0 | ...X - (1/64)*INT(64 X) | ||
262 | |||
263 | movew (%a1)+,FACT2HI(%a6) | ||
264 | clrw FACT2HI+2(%a6) | ||
265 | clrl FACT2LOW(%a6) | ||
266 | addw %d0,FACT1(%a6) | ||
267 | |||
268 | fmulx LOG2,%fp0 | ...FP0 IS R | ||
269 | addw %d0,FACT2(%a6) | ||
270 | |||
271 | bra expr | ||
272 | |||
273 | EXPBORS: | ||
274 | |--FPCR, D0 SAVED | ||
275 | cmpil #0x3FFF8000,%d0 | ||
276 | bgts EXPBIG | ||
277 | |||
278 | EXPSM: | ||
279 | |--|X| IS SMALL, RETURN 1 + X | ||
280 | |||
281 | fmovel %d1,%FPCR |restore users exceptions | ||
282 | fadds #0x3F800000,%fp0 | ...RETURN 1 + X | ||
283 | |||
284 | bra t_frcinx | ||
285 | |||
286 | EXPBIG: | ||
287 | |--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW | ||
288 | |--REGISTERS SAVE SO FAR ARE FPCR AND D0 | ||
289 | movel X(%a6),%d0 | ||
290 | cmpil #0,%d0 | ||
291 | blts EXPNEG | ||
292 | |||
293 | bclrb #7,(%a0) |t_ovfl expects positive value | ||
294 | bra t_ovfl | ||
295 | |||
296 | EXPNEG: | ||
297 | bclrb #7,(%a0) |t_unfl expects positive value | ||
298 | bra t_unfl | ||
299 | |||
300 | .global stentoxd | ||
301 | stentoxd: | ||
302 | |--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT | ||
303 | |||
304 | fmovel %d1,%fpcr | ...set user's rounding mode/precision | ||
305 | fmoves #0x3F800000,%fp0 | ...RETURN 1 + X | ||
306 | movel (%a0),%d0 | ||
307 | orl #0x00800001,%d0 | ||
308 | fadds %d0,%fp0 | ||
309 | bra t_frcinx | ||
310 | |||
311 | .global stentox | ||
312 | stentox: | ||
313 | |--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S | ||
314 | fmovemx (%a0),%fp0-%fp0 | ...LOAD INPUT, do not set cc's | ||
315 | |||
316 | movel (%a0),%d0 | ||
317 | movew 4(%a0),%d0 | ||
318 | fmovex %fp0,X(%a6) | ||
319 | andil #0x7FFFFFFF,%d0 | ||
320 | |||
321 | cmpil #0x3FB98000,%d0 | ...|X| >= 2**(-70)? | ||
322 | bges TENOK1 | ||
323 | bra EXPBORS | ||
324 | |||
325 | TENOK1: | ||
326 | cmpil #0x400B9B07,%d0 | ...|X| <= 16480*log2/log10 ? | ||
327 | bles TENMAIN | ||
328 | bra EXPBORS | ||
329 | |||
330 | TENMAIN: | ||
331 | |--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10 | ||
332 | |||
333 | fmovex %fp0,%fp1 | ||
334 | fmuld L2TEN64,%fp1 | ...X*64*LOG10/LOG2 | ||
335 | |||
336 | fmovel %fp1,N(%a6) | ...N=INT(X*64*LOG10/LOG2) | ||
337 | movel %d2,-(%sp) | ||
338 | lea EXPTBL,%a1 | ...LOAD ADDRESS OF TABLE OF 2^(J/64) | ||
339 | fmovel N(%a6),%fp1 | ...N --> FLOATING FMT | ||
340 | movel N(%a6),%d0 | ||
341 | movel %d0,%d2 | ||
342 | andil #0x3F,%d0 | ...D0 IS J | ||
343 | asll #4,%d0 | ...DISPLACEMENT FOR 2^(J/64) | ||
344 | addal %d0,%a1 | ...ADDRESS FOR 2^(J/64) | ||
345 | asrl #6,%d2 | ...d2 IS L, N = 64L + J | ||
346 | movel %d2,%d0 | ||
347 | asrl #1,%d0 | ...D0 IS M | ||
348 | subl %d0,%d2 | ...d2 IS M', N = 64(M+M') + J | ||
349 | addil #0x3FFF,%d2 | ||
350 | movew %d2,ADJFACT(%a6) | ...ADJFACT IS 2^(M') | ||
351 | movel (%sp)+,%d2 | ||
352 | |||
353 | |--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64), | ||
354 | |--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN. | ||
355 | |--ADJFACT = 2^(M'). | ||
356 | |--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2. | ||
357 | |||
358 | fmovex %fp1,%fp2 | ||
359 | |||
360 | fmuld L10TWO1,%fp1 | ...N*(LOG2/64LOG10)_LEAD | ||
361 | movel (%a1)+,FACT1(%a6) | ||
362 | |||
363 | fmulx L10TWO2,%fp2 | ...N*(LOG2/64LOG10)_TRAIL | ||
364 | |||
365 | movel (%a1)+,FACT1HI(%a6) | ||
366 | movel (%a1)+,FACT1LOW(%a6) | ||
367 | fsubx %fp1,%fp0 | ...X - N L_LEAD | ||
368 | movew (%a1)+,FACT2(%a6) | ||
369 | |||
370 | fsubx %fp2,%fp0 | ...X - N L_TRAIL | ||
371 | |||
372 | clrw FACT2+2(%a6) | ||
373 | movew (%a1)+,FACT2HI(%a6) | ||
374 | clrw FACT2HI+2(%a6) | ||
375 | clrl FACT2LOW(%a6) | ||
376 | |||
377 | fmulx LOG10,%fp0 | ...FP0 IS R | ||
378 | |||
379 | addw %d0,FACT1(%a6) | ||
380 | addw %d0,FACT2(%a6) | ||
381 | |||
382 | expr: | ||
383 | |--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN. | ||
384 | |--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64). | ||
385 | |--FP0 IS R. THE FOLLOWING CODE COMPUTES | ||
386 | |-- 2**(M'+M) * 2**(J/64) * EXP(R) | ||
387 | |||
388 | fmovex %fp0,%fp1 | ||
389 | fmulx %fp1,%fp1 | ...FP1 IS S = R*R | ||
390 | |||
391 | fmoved EXPA5,%fp2 | ...FP2 IS A5 | ||
392 | fmoved EXPA4,%fp3 | ...FP3 IS A4 | ||
393 | |||
394 | fmulx %fp1,%fp2 | ...FP2 IS S*A5 | ||
395 | fmulx %fp1,%fp3 | ...FP3 IS S*A4 | ||
396 | |||
397 | faddd EXPA3,%fp2 | ...FP2 IS A3+S*A5 | ||
398 | faddd EXPA2,%fp3 | ...FP3 IS A2+S*A4 | ||
399 | |||
400 | fmulx %fp1,%fp2 | ...FP2 IS S*(A3+S*A5) | ||
401 | fmulx %fp1,%fp3 | ...FP3 IS S*(A2+S*A4) | ||
402 | |||
403 | faddd EXPA1,%fp2 | ...FP2 IS A1+S*(A3+S*A5) | ||
404 | fmulx %fp0,%fp3 | ...FP3 IS R*S*(A2+S*A4) | ||
405 | |||
406 | fmulx %fp1,%fp2 | ...FP2 IS S*(A1+S*(A3+S*A5)) | ||
407 | faddx %fp3,%fp0 | ...FP0 IS R+R*S*(A2+S*A4) | ||
408 | |||
409 | faddx %fp2,%fp0 | ...FP0 IS EXP(R) - 1 | ||
410 | |||
411 | |||
412 | |--FINAL RECONSTRUCTION PROCESS | ||
413 | |--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1) - (1 OR 0) | ||
414 | |||
415 | fmulx FACT1(%a6),%fp0 | ||
416 | faddx FACT2(%a6),%fp0 | ||
417 | faddx FACT1(%a6),%fp0 | ||
418 | |||
419 | fmovel %d1,%FPCR |restore users exceptions | ||
420 | clrw ADJFACT+2(%a6) | ||
421 | movel #0x80000000,ADJFACT+4(%a6) | ||
422 | clrl ADJFACT+8(%a6) | ||
423 | fmulx ADJFACT(%a6),%fp0 | ...FINAL ADJUSTMENT | ||
424 | |||
425 | bra t_frcinx | ||
426 | |||
427 | |end | ||
diff --git a/arch/m68k/fpsp040/tbldo.S b/arch/m68k/fpsp040/tbldo.S new file mode 100644 index 000000000000..fe60cf4d20d7 --- /dev/null +++ b/arch/m68k/fpsp040/tbldo.S | |||
@@ -0,0 +1,554 @@ | |||
1 | | | ||
2 | | tbldo.sa 3.1 12/10/90 | ||
3 | | | ||
4 | | Modified: | ||
5 | | 8/16/90 chinds The table was constructed to use only one level | ||
6 | | of indirection in do_func for monadic | ||
7 | | functions. Dyadic functions require two | ||
8 | | levels, and the tables are still contained | ||
9 | | in do_func. The table is arranged for | ||
10 | | index with a 10-bit index, with the first | ||
11 | | 7 bits the opcode, and the remaining 3 | ||
12 | | the stag. For dyadic functions, all | ||
13 | | valid addresses are to the generic entry | ||
14 | | point. | ||
15 | | | ||
16 | |||
17 | | Copyright (C) Motorola, Inc. 1990 | ||
18 | | All Rights Reserved | ||
19 | | | ||
20 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
21 | | The copyright notice above does not evidence any | ||
22 | | actual or intended publication of such source code. | ||
23 | |||
24 | |TBLDO idnt 2,1 | Motorola 040 Floating Point Software Package | ||
25 | |||
26 | |section 8 | ||
27 | |||
28 | |xref ld_pinf,ld_pone,ld_ppi2 | ||
29 | |xref t_dz2,t_operr | ||
30 | |xref serror,sone,szero,sinf,snzrinx | ||
31 | |xref sopr_inf,spi_2,src_nan,szr_inf | ||
32 | |||
33 | |xref smovcr | ||
34 | |xref pmod,prem,pscale | ||
35 | |xref satanh,satanhd | ||
36 | |xref sacos,sacosd,sasin,sasind,satan,satand | ||
37 | |xref setox,setoxd,setoxm1,setoxm1d,setoxm1i | ||
38 | |xref sgetexp,sgetexpd,sgetman,sgetmand | ||
39 | |xref sint,sintd,sintrz | ||
40 | |xref ssincos,ssincosd,ssincosi,ssincosnan,ssincosz | ||
41 | |xref scos,scosd,ssin,ssind,stan,stand | ||
42 | |xref scosh,scoshd,ssinh,ssinhd,stanh,stanhd | ||
43 | |xref sslog10,sslog2,sslogn,sslognp1 | ||
44 | |xref sslog10d,sslog2d,sslognd,slognp1d | ||
45 | |xref stentox,stentoxd,stwotox,stwotoxd | ||
46 | |||
47 | | instruction ;opcode-stag Notes | ||
48 | .global tblpre | ||
49 | tblpre: | ||
50 | .long smovcr |$00-0 fmovecr all | ||
51 | .long smovcr |$00-1 fmovecr all | ||
52 | .long smovcr |$00-2 fmovecr all | ||
53 | .long smovcr |$00-3 fmovecr all | ||
54 | .long smovcr |$00-4 fmovecr all | ||
55 | .long smovcr |$00-5 fmovecr all | ||
56 | .long smovcr |$00-6 fmovecr all | ||
57 | .long smovcr |$00-7 fmovecr all | ||
58 | |||
59 | .long sint |$01-0 fint norm | ||
60 | .long szero |$01-1 fint zero | ||
61 | .long sinf |$01-2 fint inf | ||
62 | .long src_nan |$01-3 fint nan | ||
63 | .long sintd |$01-4 fint denorm inx | ||
64 | .long serror |$01-5 fint ERROR | ||
65 | .long serror |$01-6 fint ERROR | ||
66 | .long serror |$01-7 fint ERROR | ||
67 | |||
68 | .long ssinh |$02-0 fsinh norm | ||
69 | .long szero |$02-1 fsinh zero | ||
70 | .long sinf |$02-2 fsinh inf | ||
71 | .long src_nan |$02-3 fsinh nan | ||
72 | .long ssinhd |$02-4 fsinh denorm | ||
73 | .long serror |$02-5 fsinh ERROR | ||
74 | .long serror |$02-6 fsinh ERROR | ||
75 | .long serror |$02-7 fsinh ERROR | ||
76 | |||
77 | .long sintrz |$03-0 fintrz norm | ||
78 | .long szero |$03-1 fintrz zero | ||
79 | .long sinf |$03-2 fintrz inf | ||
80 | .long src_nan |$03-3 fintrz nan | ||
81 | .long snzrinx |$03-4 fintrz denorm inx | ||
82 | .long serror |$03-5 fintrz ERROR | ||
83 | .long serror |$03-6 fintrz ERROR | ||
84 | .long serror |$03-7 fintrz ERROR | ||
85 | |||
86 | .long serror |$04-0 ERROR - illegal extension | ||
87 | .long serror |$04-1 ERROR - illegal extension | ||
88 | .long serror |$04-2 ERROR - illegal extension | ||
89 | .long serror |$04-3 ERROR - illegal extension | ||
90 | .long serror |$04-4 ERROR - illegal extension | ||
91 | .long serror |$04-5 ERROR - illegal extension | ||
92 | .long serror |$04-6 ERROR - illegal extension | ||
93 | .long serror |$04-7 ERROR - illegal extension | ||
94 | |||
95 | .long serror |$05-0 ERROR - illegal extension | ||
96 | .long serror |$05-1 ERROR - illegal extension | ||
97 | .long serror |$05-2 ERROR - illegal extension | ||
98 | .long serror |$05-3 ERROR - illegal extension | ||
99 | .long serror |$05-4 ERROR - illegal extension | ||
100 | .long serror |$05-5 ERROR - illegal extension | ||
101 | .long serror |$05-6 ERROR - illegal extension | ||
102 | .long serror |$05-7 ERROR - illegal extension | ||
103 | |||
104 | .long sslognp1 |$06-0 flognp1 norm | ||
105 | .long szero |$06-1 flognp1 zero | ||
106 | .long sopr_inf |$06-2 flognp1 inf | ||
107 | .long src_nan |$06-3 flognp1 nan | ||
108 | .long slognp1d |$06-4 flognp1 denorm | ||
109 | .long serror |$06-5 flognp1 ERROR | ||
110 | .long serror |$06-6 flognp1 ERROR | ||
111 | .long serror |$06-7 flognp1 ERROR | ||
112 | |||
113 | .long serror |$07-0 ERROR - illegal extension | ||
114 | .long serror |$07-1 ERROR - illegal extension | ||
115 | .long serror |$07-2 ERROR - illegal extension | ||
116 | .long serror |$07-3 ERROR - illegal extension | ||
117 | .long serror |$07-4 ERROR - illegal extension | ||
118 | .long serror |$07-5 ERROR - illegal extension | ||
119 | .long serror |$07-6 ERROR - illegal extension | ||
120 | .long serror |$07-7 ERROR - illegal extension | ||
121 | |||
122 | .long setoxm1 |$08-0 fetoxm1 norm | ||
123 | .long szero |$08-1 fetoxm1 zero | ||
124 | .long setoxm1i |$08-2 fetoxm1 inf | ||
125 | .long src_nan |$08-3 fetoxm1 nan | ||
126 | .long setoxm1d |$08-4 fetoxm1 denorm | ||
127 | .long serror |$08-5 fetoxm1 ERROR | ||
128 | .long serror |$08-6 fetoxm1 ERROR | ||
129 | .long serror |$08-7 fetoxm1 ERROR | ||
130 | |||
131 | .long stanh |$09-0 ftanh norm | ||
132 | .long szero |$09-1 ftanh zero | ||
133 | .long sone |$09-2 ftanh inf | ||
134 | .long src_nan |$09-3 ftanh nan | ||
135 | .long stanhd |$09-4 ftanh denorm | ||
136 | .long serror |$09-5 ftanh ERROR | ||
137 | .long serror |$09-6 ftanh ERROR | ||
138 | .long serror |$09-7 ftanh ERROR | ||
139 | |||
140 | .long satan |$0a-0 fatan norm | ||
141 | .long szero |$0a-1 fatan zero | ||
142 | .long spi_2 |$0a-2 fatan inf | ||
143 | .long src_nan |$0a-3 fatan nan | ||
144 | .long satand |$0a-4 fatan denorm | ||
145 | .long serror |$0a-5 fatan ERROR | ||
146 | .long serror |$0a-6 fatan ERROR | ||
147 | .long serror |$0a-7 fatan ERROR | ||
148 | |||
149 | .long serror |$0b-0 ERROR - illegal extension | ||
150 | .long serror |$0b-1 ERROR - illegal extension | ||
151 | .long serror |$0b-2 ERROR - illegal extension | ||
152 | .long serror |$0b-3 ERROR - illegal extension | ||
153 | .long serror |$0b-4 ERROR - illegal extension | ||
154 | .long serror |$0b-5 ERROR - illegal extension | ||
155 | .long serror |$0b-6 ERROR - illegal extension | ||
156 | .long serror |$0b-7 ERROR - illegal extension | ||
157 | |||
158 | .long sasin |$0c-0 fasin norm | ||
159 | .long szero |$0c-1 fasin zero | ||
160 | .long t_operr |$0c-2 fasin inf | ||
161 | .long src_nan |$0c-3 fasin nan | ||
162 | .long sasind |$0c-4 fasin denorm | ||
163 | .long serror |$0c-5 fasin ERROR | ||
164 | .long serror |$0c-6 fasin ERROR | ||
165 | .long serror |$0c-7 fasin ERROR | ||
166 | |||
167 | .long satanh |$0d-0 fatanh norm | ||
168 | .long szero |$0d-1 fatanh zero | ||
169 | .long t_operr |$0d-2 fatanh inf | ||
170 | .long src_nan |$0d-3 fatanh nan | ||
171 | .long satanhd |$0d-4 fatanh denorm | ||
172 | .long serror |$0d-5 fatanh ERROR | ||
173 | .long serror |$0d-6 fatanh ERROR | ||
174 | .long serror |$0d-7 fatanh ERROR | ||
175 | |||
176 | .long ssin |$0e-0 fsin norm | ||
177 | .long szero |$0e-1 fsin zero | ||
178 | .long t_operr |$0e-2 fsin inf | ||
179 | .long src_nan |$0e-3 fsin nan | ||
180 | .long ssind |$0e-4 fsin denorm | ||
181 | .long serror |$0e-5 fsin ERROR | ||
182 | .long serror |$0e-6 fsin ERROR | ||
183 | .long serror |$0e-7 fsin ERROR | ||
184 | |||
185 | .long stan |$0f-0 ftan norm | ||
186 | .long szero |$0f-1 ftan zero | ||
187 | .long t_operr |$0f-2 ftan inf | ||
188 | .long src_nan |$0f-3 ftan nan | ||
189 | .long stand |$0f-4 ftan denorm | ||
190 | .long serror |$0f-5 ftan ERROR | ||
191 | .long serror |$0f-6 ftan ERROR | ||
192 | .long serror |$0f-7 ftan ERROR | ||
193 | |||
194 | .long setox |$10-0 fetox norm | ||
195 | .long ld_pone |$10-1 fetox zero | ||
196 | .long szr_inf |$10-2 fetox inf | ||
197 | .long src_nan |$10-3 fetox nan | ||
198 | .long setoxd |$10-4 fetox denorm | ||
199 | .long serror |$10-5 fetox ERROR | ||
200 | .long serror |$10-6 fetox ERROR | ||
201 | .long serror |$10-7 fetox ERROR | ||
202 | |||
203 | .long stwotox |$11-0 ftwotox norm | ||
204 | .long ld_pone |$11-1 ftwotox zero | ||
205 | .long szr_inf |$11-2 ftwotox inf | ||
206 | .long src_nan |$11-3 ftwotox nan | ||
207 | .long stwotoxd |$11-4 ftwotox denorm | ||
208 | .long serror |$11-5 ftwotox ERROR | ||
209 | .long serror |$11-6 ftwotox ERROR | ||
210 | .long serror |$11-7 ftwotox ERROR | ||
211 | |||
212 | .long stentox |$12-0 ftentox norm | ||
213 | .long ld_pone |$12-1 ftentox zero | ||
214 | .long szr_inf |$12-2 ftentox inf | ||
215 | .long src_nan |$12-3 ftentox nan | ||
216 | .long stentoxd |$12-4 ftentox denorm | ||
217 | .long serror |$12-5 ftentox ERROR | ||
218 | .long serror |$12-6 ftentox ERROR | ||
219 | .long serror |$12-7 ftentox ERROR | ||
220 | |||
221 | .long serror |$13-0 ERROR - illegal extension | ||
222 | .long serror |$13-1 ERROR - illegal extension | ||
223 | .long serror |$13-2 ERROR - illegal extension | ||
224 | .long serror |$13-3 ERROR - illegal extension | ||
225 | .long serror |$13-4 ERROR - illegal extension | ||
226 | .long serror |$13-5 ERROR - illegal extension | ||
227 | .long serror |$13-6 ERROR - illegal extension | ||
228 | .long serror |$13-7 ERROR - illegal extension | ||
229 | |||
230 | .long sslogn |$14-0 flogn norm | ||
231 | .long t_dz2 |$14-1 flogn zero | ||
232 | .long sopr_inf |$14-2 flogn inf | ||
233 | .long src_nan |$14-3 flogn nan | ||
234 | .long sslognd |$14-4 flogn denorm | ||
235 | .long serror |$14-5 flogn ERROR | ||
236 | .long serror |$14-6 flogn ERROR | ||
237 | .long serror |$14-7 flogn ERROR | ||
238 | |||
239 | .long sslog10 |$15-0 flog10 norm | ||
240 | .long t_dz2 |$15-1 flog10 zero | ||
241 | .long sopr_inf |$15-2 flog10 inf | ||
242 | .long src_nan |$15-3 flog10 nan | ||
243 | .long sslog10d |$15-4 flog10 denorm | ||
244 | .long serror |$15-5 flog10 ERROR | ||
245 | .long serror |$15-6 flog10 ERROR | ||
246 | .long serror |$15-7 flog10 ERROR | ||
247 | |||
248 | .long sslog2 |$16-0 flog2 norm | ||
249 | .long t_dz2 |$16-1 flog2 zero | ||
250 | .long sopr_inf |$16-2 flog2 inf | ||
251 | .long src_nan |$16-3 flog2 nan | ||
252 | .long sslog2d |$16-4 flog2 denorm | ||
253 | .long serror |$16-5 flog2 ERROR | ||
254 | .long serror |$16-6 flog2 ERROR | ||
255 | .long serror |$16-7 flog2 ERROR | ||
256 | |||
257 | .long serror |$17-0 ERROR - illegal extension | ||
258 | .long serror |$17-1 ERROR - illegal extension | ||
259 | .long serror |$17-2 ERROR - illegal extension | ||
260 | .long serror |$17-3 ERROR - illegal extension | ||
261 | .long serror |$17-4 ERROR - illegal extension | ||
262 | .long serror |$17-5 ERROR - illegal extension | ||
263 | .long serror |$17-6 ERROR - illegal extension | ||
264 | .long serror |$17-7 ERROR - illegal extension | ||
265 | |||
266 | .long serror |$18-0 ERROR - illegal extension | ||
267 | .long serror |$18-1 ERROR - illegal extension | ||
268 | .long serror |$18-2 ERROR - illegal extension | ||
269 | .long serror |$18-3 ERROR - illegal extension | ||
270 | .long serror |$18-4 ERROR - illegal extension | ||
271 | .long serror |$18-5 ERROR - illegal extension | ||
272 | .long serror |$18-6 ERROR - illegal extension | ||
273 | .long serror |$18-7 ERROR - illegal extension | ||
274 | |||
275 | .long scosh |$19-0 fcosh norm | ||
276 | .long ld_pone |$19-1 fcosh zero | ||
277 | .long ld_pinf |$19-2 fcosh inf | ||
278 | .long src_nan |$19-3 fcosh nan | ||
279 | .long scoshd |$19-4 fcosh denorm | ||
280 | .long serror |$19-5 fcosh ERROR | ||
281 | .long serror |$19-6 fcosh ERROR | ||
282 | .long serror |$19-7 fcosh ERROR | ||
283 | |||
284 | .long serror |$1a-0 ERROR - illegal extension | ||
285 | .long serror |$1a-1 ERROR - illegal extension | ||
286 | .long serror |$1a-2 ERROR - illegal extension | ||
287 | .long serror |$1a-3 ERROR - illegal extension | ||
288 | .long serror |$1a-4 ERROR - illegal extension | ||
289 | .long serror |$1a-5 ERROR - illegal extension | ||
290 | .long serror |$1a-6 ERROR - illegal extension | ||
291 | .long serror |$1a-7 ERROR - illegal extension | ||
292 | |||
293 | .long serror |$1b-0 ERROR - illegal extension | ||
294 | .long serror |$1b-1 ERROR - illegal extension | ||
295 | .long serror |$1b-2 ERROR - illegal extension | ||
296 | .long serror |$1b-3 ERROR - illegal extension | ||
297 | .long serror |$1b-4 ERROR - illegal extension | ||
298 | .long serror |$1b-5 ERROR - illegal extension | ||
299 | .long serror |$1b-6 ERROR - illegal extension | ||
300 | .long serror |$1b-7 ERROR - illegal extension | ||
301 | |||
302 | .long sacos |$1c-0 facos norm | ||
303 | .long ld_ppi2 |$1c-1 facos zero | ||
304 | .long t_operr |$1c-2 facos inf | ||
305 | .long src_nan |$1c-3 facos nan | ||
306 | .long sacosd |$1c-4 facos denorm | ||
307 | .long serror |$1c-5 facos ERROR | ||
308 | .long serror |$1c-6 facos ERROR | ||
309 | .long serror |$1c-7 facos ERROR | ||
310 | |||
311 | .long scos |$1d-0 fcos norm | ||
312 | .long ld_pone |$1d-1 fcos zero | ||
313 | .long t_operr |$1d-2 fcos inf | ||
314 | .long src_nan |$1d-3 fcos nan | ||
315 | .long scosd |$1d-4 fcos denorm | ||
316 | .long serror |$1d-5 fcos ERROR | ||
317 | .long serror |$1d-6 fcos ERROR | ||
318 | .long serror |$1d-7 fcos ERROR | ||
319 | |||
320 | .long sgetexp |$1e-0 fgetexp norm | ||
321 | .long szero |$1e-1 fgetexp zero | ||
322 | .long t_operr |$1e-2 fgetexp inf | ||
323 | .long src_nan |$1e-3 fgetexp nan | ||
324 | .long sgetexpd |$1e-4 fgetexp denorm | ||
325 | .long serror |$1e-5 fgetexp ERROR | ||
326 | .long serror |$1e-6 fgetexp ERROR | ||
327 | .long serror |$1e-7 fgetexp ERROR | ||
328 | |||
329 | .long sgetman |$1f-0 fgetman norm | ||
330 | .long szero |$1f-1 fgetman zero | ||
331 | .long t_operr |$1f-2 fgetman inf | ||
332 | .long src_nan |$1f-3 fgetman nan | ||
333 | .long sgetmand |$1f-4 fgetman denorm | ||
334 | .long serror |$1f-5 fgetman ERROR | ||
335 | .long serror |$1f-6 fgetman ERROR | ||
336 | .long serror |$1f-7 fgetman ERROR | ||
337 | |||
338 | .long serror |$20-0 ERROR - illegal extension | ||
339 | .long serror |$20-1 ERROR - illegal extension | ||
340 | .long serror |$20-2 ERROR - illegal extension | ||
341 | .long serror |$20-3 ERROR - illegal extension | ||
342 | .long serror |$20-4 ERROR - illegal extension | ||
343 | .long serror |$20-5 ERROR - illegal extension | ||
344 | .long serror |$20-6 ERROR - illegal extension | ||
345 | .long serror |$20-7 ERROR - illegal extension | ||
346 | |||
347 | .long pmod |$21-0 fmod all | ||
348 | .long pmod |$21-1 fmod all | ||
349 | .long pmod |$21-2 fmod all | ||
350 | .long pmod |$21-3 fmod all | ||
351 | .long pmod |$21-4 fmod all | ||
352 | .long serror |$21-5 fmod ERROR | ||
353 | .long serror |$21-6 fmod ERROR | ||
354 | .long serror |$21-7 fmod ERROR | ||
355 | |||
356 | .long serror |$22-0 ERROR - illegal extension | ||
357 | .long serror |$22-1 ERROR - illegal extension | ||
358 | .long serror |$22-2 ERROR - illegal extension | ||
359 | .long serror |$22-3 ERROR - illegal extension | ||
360 | .long serror |$22-4 ERROR - illegal extension | ||
361 | .long serror |$22-5 ERROR - illegal extension | ||
362 | .long serror |$22-6 ERROR - illegal extension | ||
363 | .long serror |$22-7 ERROR - illegal extension | ||
364 | |||
365 | .long serror |$23-0 ERROR - illegal extension | ||
366 | .long serror |$23-1 ERROR - illegal extension | ||
367 | .long serror |$23-2 ERROR - illegal extension | ||
368 | .long serror |$23-3 ERROR - illegal extension | ||
369 | .long serror |$23-4 ERROR - illegal extension | ||
370 | .long serror |$23-5 ERROR - illegal extension | ||
371 | .long serror |$23-6 ERROR - illegal extension | ||
372 | .long serror |$23-7 ERROR - illegal extension | ||
373 | |||
374 | .long serror |$24-0 ERROR - illegal extension | ||
375 | .long serror |$24-1 ERROR - illegal extension | ||
376 | .long serror |$24-2 ERROR - illegal extension | ||
377 | .long serror |$24-3 ERROR - illegal extension | ||
378 | .long serror |$24-4 ERROR - illegal extension | ||
379 | .long serror |$24-5 ERROR - illegal extension | ||
380 | .long serror |$24-6 ERROR - illegal extension | ||
381 | .long serror |$24-7 ERROR - illegal extension | ||
382 | |||
383 | .long prem |$25-0 frem all | ||
384 | .long prem |$25-1 frem all | ||
385 | .long prem |$25-2 frem all | ||
386 | .long prem |$25-3 frem all | ||
387 | .long prem |$25-4 frem all | ||
388 | .long serror |$25-5 frem ERROR | ||
389 | .long serror |$25-6 frem ERROR | ||
390 | .long serror |$25-7 frem ERROR | ||
391 | |||
392 | .long pscale |$26-0 fscale all | ||
393 | .long pscale |$26-1 fscale all | ||
394 | .long pscale |$26-2 fscale all | ||
395 | .long pscale |$26-3 fscale all | ||
396 | .long pscale |$26-4 fscale all | ||
397 | .long serror |$26-5 fscale ERROR | ||
398 | .long serror |$26-6 fscale ERROR | ||
399 | .long serror |$26-7 fscale ERROR | ||
400 | |||
401 | .long serror |$27-0 ERROR - illegal extension | ||
402 | .long serror |$27-1 ERROR - illegal extension | ||
403 | .long serror |$27-2 ERROR - illegal extension | ||
404 | .long serror |$27-3 ERROR - illegal extension | ||
405 | .long serror |$27-4 ERROR - illegal extension | ||
406 | .long serror |$27-5 ERROR - illegal extension | ||
407 | .long serror |$27-6 ERROR - illegal extension | ||
408 | .long serror |$27-7 ERROR - illegal extension | ||
409 | |||
410 | .long serror |$28-0 ERROR - illegal extension | ||
411 | .long serror |$28-1 ERROR - illegal extension | ||
412 | .long serror |$28-2 ERROR - illegal extension | ||
413 | .long serror |$28-3 ERROR - illegal extension | ||
414 | .long serror |$28-4 ERROR - illegal extension | ||
415 | .long serror |$28-5 ERROR - illegal extension | ||
416 | .long serror |$28-6 ERROR - illegal extension | ||
417 | .long serror |$28-7 ERROR - illegal extension | ||
418 | |||
419 | .long serror |$29-0 ERROR - illegal extension | ||
420 | .long serror |$29-1 ERROR - illegal extension | ||
421 | .long serror |$29-2 ERROR - illegal extension | ||
422 | .long serror |$29-3 ERROR - illegal extension | ||
423 | .long serror |$29-4 ERROR - illegal extension | ||
424 | .long serror |$29-5 ERROR - illegal extension | ||
425 | .long serror |$29-6 ERROR - illegal extension | ||
426 | .long serror |$29-7 ERROR - illegal extension | ||
427 | |||
428 | .long serror |$2a-0 ERROR - illegal extension | ||
429 | .long serror |$2a-1 ERROR - illegal extension | ||
430 | .long serror |$2a-2 ERROR - illegal extension | ||
431 | .long serror |$2a-3 ERROR - illegal extension | ||
432 | .long serror |$2a-4 ERROR - illegal extension | ||
433 | .long serror |$2a-5 ERROR - illegal extension | ||
434 | .long serror |$2a-6 ERROR - illegal extension | ||
435 | .long serror |$2a-7 ERROR - illegal extension | ||
436 | |||
437 | .long serror |$2b-0 ERROR - illegal extension | ||
438 | .long serror |$2b-1 ERROR - illegal extension | ||
439 | .long serror |$2b-2 ERROR - illegal extension | ||
440 | .long serror |$2b-3 ERROR - illegal extension | ||
441 | .long serror |$2b-4 ERROR - illegal extension | ||
442 | .long serror |$2b-5 ERROR - illegal extension | ||
443 | .long serror |$2b-6 ERROR - illegal extension | ||
444 | .long serror |$2b-7 ERROR - illegal extension | ||
445 | |||
446 | .long serror |$2c-0 ERROR - illegal extension | ||
447 | .long serror |$2c-1 ERROR - illegal extension | ||
448 | .long serror |$2c-2 ERROR - illegal extension | ||
449 | .long serror |$2c-3 ERROR - illegal extension | ||
450 | .long serror |$2c-4 ERROR - illegal extension | ||
451 | .long serror |$2c-5 ERROR - illegal extension | ||
452 | .long serror |$2c-6 ERROR - illegal extension | ||
453 | .long serror |$2c-7 ERROR - illegal extension | ||
454 | |||
455 | .long serror |$2d-0 ERROR - illegal extension | ||
456 | .long serror |$2d-1 ERROR - illegal extension | ||
457 | .long serror |$2d-2 ERROR - illegal extension | ||
458 | .long serror |$2d-3 ERROR - illegal extension | ||
459 | .long serror |$2d-4 ERROR - illegal extension | ||
460 | .long serror |$2d-5 ERROR - illegal extension | ||
461 | .long serror |$2d-6 ERROR - illegal extension | ||
462 | .long serror |$2d-7 ERROR - illegal extension | ||
463 | |||
464 | .long serror |$2e-0 ERROR - illegal extension | ||
465 | .long serror |$2e-1 ERROR - illegal extension | ||
466 | .long serror |$2e-2 ERROR - illegal extension | ||
467 | .long serror |$2e-3 ERROR - illegal extension | ||
468 | .long serror |$2e-4 ERROR - illegal extension | ||
469 | .long serror |$2e-5 ERROR - illegal extension | ||
470 | .long serror |$2e-6 ERROR - illegal extension | ||
471 | .long serror |$2e-7 ERROR - illegal extension | ||
472 | |||
473 | .long serror |$2f-0 ERROR - illegal extension | ||
474 | .long serror |$2f-1 ERROR - illegal extension | ||
475 | .long serror |$2f-2 ERROR - illegal extension | ||
476 | .long serror |$2f-3 ERROR - illegal extension | ||
477 | .long serror |$2f-4 ERROR - illegal extension | ||
478 | .long serror |$2f-5 ERROR - illegal extension | ||
479 | .long serror |$2f-6 ERROR - illegal extension | ||
480 | .long serror |$2f-7 ERROR - illegal extension | ||
481 | |||
482 | .long ssincos |$30-0 fsincos norm | ||
483 | .long ssincosz |$30-1 fsincos zero | ||
484 | .long ssincosi |$30-2 fsincos inf | ||
485 | .long ssincosnan |$30-3 fsincos nan | ||
486 | .long ssincosd |$30-4 fsincos denorm | ||
487 | .long serror |$30-5 fsincos ERROR | ||
488 | .long serror |$30-6 fsincos ERROR | ||
489 | .long serror |$30-7 fsincos ERROR | ||
490 | |||
491 | .long ssincos |$31-0 fsincos norm | ||
492 | .long ssincosz |$31-1 fsincos zero | ||
493 | .long ssincosi |$31-2 fsincos inf | ||
494 | .long ssincosnan |$31-3 fsincos nan | ||
495 | .long ssincosd |$31-4 fsincos denorm | ||
496 | .long serror |$31-5 fsincos ERROR | ||
497 | .long serror |$31-6 fsincos ERROR | ||
498 | .long serror |$31-7 fsincos ERROR | ||
499 | |||
500 | .long ssincos |$32-0 fsincos norm | ||
501 | .long ssincosz |$32-1 fsincos zero | ||
502 | .long ssincosi |$32-2 fsincos inf | ||
503 | .long ssincosnan |$32-3 fsincos nan | ||
504 | .long ssincosd |$32-4 fsincos denorm | ||
505 | .long serror |$32-5 fsincos ERROR | ||
506 | .long serror |$32-6 fsincos ERROR | ||
507 | .long serror |$32-7 fsincos ERROR | ||
508 | |||
509 | .long ssincos |$33-0 fsincos norm | ||
510 | .long ssincosz |$33-1 fsincos zero | ||
511 | .long ssincosi |$33-2 fsincos inf | ||
512 | .long ssincosnan |$33-3 fsincos nan | ||
513 | .long ssincosd |$33-4 fsincos denorm | ||
514 | .long serror |$33-5 fsincos ERROR | ||
515 | .long serror |$33-6 fsincos ERROR | ||
516 | .long serror |$33-7 fsincos ERROR | ||
517 | |||
518 | .long ssincos |$34-0 fsincos norm | ||
519 | .long ssincosz |$34-1 fsincos zero | ||
520 | .long ssincosi |$34-2 fsincos inf | ||
521 | .long ssincosnan |$34-3 fsincos nan | ||
522 | .long ssincosd |$34-4 fsincos denorm | ||
523 | .long serror |$34-5 fsincos ERROR | ||
524 | .long serror |$34-6 fsincos ERROR | ||
525 | .long serror |$34-7 fsincos ERROR | ||
526 | |||
527 | .long ssincos |$35-0 fsincos norm | ||
528 | .long ssincosz |$35-1 fsincos zero | ||
529 | .long ssincosi |$35-2 fsincos inf | ||
530 | .long ssincosnan |$35-3 fsincos nan | ||
531 | .long ssincosd |$35-4 fsincos denorm | ||
532 | .long serror |$35-5 fsincos ERROR | ||
533 | .long serror |$35-6 fsincos ERROR | ||
534 | .long serror |$35-7 fsincos ERROR | ||
535 | |||
536 | .long ssincos |$36-0 fsincos norm | ||
537 | .long ssincosz |$36-1 fsincos zero | ||
538 | .long ssincosi |$36-2 fsincos inf | ||
539 | .long ssincosnan |$36-3 fsincos nan | ||
540 | .long ssincosd |$36-4 fsincos denorm | ||
541 | .long serror |$36-5 fsincos ERROR | ||
542 | .long serror |$36-6 fsincos ERROR | ||
543 | .long serror |$36-7 fsincos ERROR | ||
544 | |||
545 | .long ssincos |$37-0 fsincos norm | ||
546 | .long ssincosz |$37-1 fsincos zero | ||
547 | .long ssincosi |$37-2 fsincos inf | ||
548 | .long ssincosnan |$37-3 fsincos nan | ||
549 | .long ssincosd |$37-4 fsincos denorm | ||
550 | .long serror |$37-5 fsincos ERROR | ||
551 | .long serror |$37-6 fsincos ERROR | ||
552 | .long serror |$37-7 fsincos ERROR | ||
553 | |||
554 | |end | ||
diff --git a/arch/m68k/fpsp040/util.S b/arch/m68k/fpsp040/util.S new file mode 100644 index 000000000000..452f3d65857b --- /dev/null +++ b/arch/m68k/fpsp040/util.S | |||
@@ -0,0 +1,748 @@ | |||
1 | | | ||
2 | | util.sa 3.7 7/29/91 | ||
3 | | | ||
4 | | This file contains routines used by other programs. | ||
5 | | | ||
6 | | ovf_res: used by overflow to force the correct | ||
7 | | result. ovf_r_k, ovf_r_x2, ovf_r_x3 are | ||
8 | | derivatives of this routine. | ||
9 | | get_fline: get user's opcode word | ||
10 | | g_dfmtou: returns the destination format. | ||
11 | | g_opcls: returns the opclass of the float instruction. | ||
12 | | g_rndpr: returns the rounding precision. | ||
13 | | reg_dest: write byte, word, or long data to Dn | ||
14 | | | ||
15 | | | ||
16 | | Copyright (C) Motorola, Inc. 1990 | ||
17 | | All Rights Reserved | ||
18 | | | ||
19 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
20 | | The copyright notice above does not evidence any | ||
21 | | actual or intended publication of such source code. | ||
22 | |||
23 | |UTIL idnt 2,1 | Motorola 040 Floating Point Software Package | ||
24 | |||
25 | |section 8 | ||
26 | |||
27 | #include "fpsp.h" | ||
28 | |||
29 | |xref mem_read | ||
30 | |||
31 | .global g_dfmtou | ||
32 | .global g_opcls | ||
33 | .global g_rndpr | ||
34 | .global get_fline | ||
35 | .global reg_dest | ||
36 | |||
37 | | | ||
38 | | Final result table for ovf_res. Note that the negative counterparts | ||
39 | | are unnecessary as ovf_res always returns the sign separately from | ||
40 | | the exponent. | ||
41 | | ;+inf | ||
42 | EXT_PINF: .long 0x7fff0000,0x00000000,0x00000000,0x00000000 | ||
43 | | ;largest +ext | ||
44 | EXT_PLRG: .long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 | ||
45 | | ;largest magnitude +sgl in ext | ||
46 | SGL_PLRG: .long 0x407e0000,0xffffff00,0x00000000,0x00000000 | ||
47 | | ;largest magnitude +dbl in ext | ||
48 | DBL_PLRG: .long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 | ||
49 | | ;largest -ext | ||
50 | |||
51 | tblovfl: | ||
52 | .long EXT_RN | ||
53 | .long EXT_RZ | ||
54 | .long EXT_RM | ||
55 | .long EXT_RP | ||
56 | .long SGL_RN | ||
57 | .long SGL_RZ | ||
58 | .long SGL_RM | ||
59 | .long SGL_RP | ||
60 | .long DBL_RN | ||
61 | .long DBL_RZ | ||
62 | .long DBL_RM | ||
63 | .long DBL_RP | ||
64 | .long error | ||
65 | .long error | ||
66 | .long error | ||
67 | .long error | ||
68 | |||
69 | |||
70 | | | ||
71 | | ovf_r_k --- overflow result calculation | ||
72 | | | ||
73 | | This entry point is used by kernel_ex. | ||
74 | | | ||
75 | | This forces the destination precision to be extended | ||
76 | | | ||
77 | | Input: operand in ETEMP | ||
78 | | Output: a result is in ETEMP (internal extended format) | ||
79 | | | ||
80 | .global ovf_r_k | ||
81 | ovf_r_k: | ||
82 | lea ETEMP(%a6),%a0 |a0 points to source operand | ||
83 | bclrb #sign_bit,ETEMP_EX(%a6) | ||
84 | sne ETEMP_SGN(%a6) |convert to internal IEEE format | ||
85 | |||
86 | | | ||
87 | | ovf_r_x2 --- overflow result calculation | ||
88 | | | ||
89 | | This entry point used by x_ovfl. (opclass 0 and 2) | ||
90 | | | ||
91 | | Input a0 points to an operand in the internal extended format | ||
92 | | Output a0 points to the result in the internal extended format | ||
93 | | | ||
94 | | This sets the round precision according to the user's FPCR unless the | ||
95 | | instruction is fsgldiv or fsglmul or fsadd, fdadd, fsub, fdsub, fsmul, | ||
96 | | fdmul, fsdiv, fddiv, fssqrt, fsmove, fdmove, fsabs, fdabs, fsneg, fdneg. | ||
97 | | If the instruction is fsgldiv of fsglmul, the rounding precision must be | ||
98 | | extended. If the instruction is not fsgldiv or fsglmul but a force- | ||
99 | | precision instruction, the rounding precision is then set to the force | ||
100 | | precision. | ||
101 | |||
102 | .global ovf_r_x2 | ||
103 | ovf_r_x2: | ||
104 | btstb #E3,E_BYTE(%a6) |check for nu exception | ||
105 | beql ovf_e1_exc |it is cu exception | ||
106 | ovf_e3_exc: | ||
107 | movew CMDREG3B(%a6),%d0 |get the command word | ||
108 | andiw #0x00000060,%d0 |clear all bits except 6 and 5 | ||
109 | cmpil #0x00000040,%d0 | ||
110 | beql ovff_sgl |force precision is single | ||
111 | cmpil #0x00000060,%d0 | ||
112 | beql ovff_dbl |force precision is double | ||
113 | movew CMDREG3B(%a6),%d0 |get the command word again | ||
114 | andil #0x7f,%d0 |clear all except operation | ||
115 | cmpil #0x33,%d0 | ||
116 | beql ovf_fsgl |fsglmul or fsgldiv | ||
117 | cmpil #0x30,%d0 | ||
118 | beql ovf_fsgl | ||
119 | bra ovf_fpcr |instruction is none of the above | ||
120 | | ;use FPCR | ||
121 | ovf_e1_exc: | ||
122 | movew CMDREG1B(%a6),%d0 |get command word | ||
123 | andil #0x00000044,%d0 |clear all bits except 6 and 2 | ||
124 | cmpil #0x00000040,%d0 | ||
125 | beql ovff_sgl |the instruction is force single | ||
126 | cmpil #0x00000044,%d0 | ||
127 | beql ovff_dbl |the instruction is force double | ||
128 | movew CMDREG1B(%a6),%d0 |again get the command word | ||
129 | andil #0x0000007f,%d0 |clear all except the op code | ||
130 | cmpil #0x00000027,%d0 | ||
131 | beql ovf_fsgl |fsglmul | ||
132 | cmpil #0x00000024,%d0 | ||
133 | beql ovf_fsgl |fsgldiv | ||
134 | bra ovf_fpcr |none of the above, use FPCR | ||
135 | | | ||
136 | | | ||
137 | | Inst is either fsgldiv or fsglmul. Force extended precision. | ||
138 | | | ||
139 | ovf_fsgl: | ||
140 | clrl %d0 | ||
141 | bra ovf_res | ||
142 | |||
143 | ovff_sgl: | ||
144 | movel #0x00000001,%d0 |set single | ||
145 | bra ovf_res | ||
146 | ovff_dbl: | ||
147 | movel #0x00000002,%d0 |set double | ||
148 | bra ovf_res | ||
149 | | | ||
150 | | The precision is in the fpcr. | ||
151 | | | ||
152 | ovf_fpcr: | ||
153 | bfextu FPCR_MODE(%a6){#0:#2},%d0 |set round precision | ||
154 | bra ovf_res | ||
155 | |||
156 | | | ||
157 | | | ||
158 | | ovf_r_x3 --- overflow result calculation | ||
159 | | | ||
160 | | This entry point used by x_ovfl. (opclass 3 only) | ||
161 | | | ||
162 | | Input a0 points to an operand in the internal extended format | ||
163 | | Output a0 points to the result in the internal extended format | ||
164 | | | ||
165 | | This sets the round precision according to the destination size. | ||
166 | | | ||
167 | .global ovf_r_x3 | ||
168 | ovf_r_x3: | ||
169 | bsr g_dfmtou |get dest fmt in d0{1:0} | ||
170 | | ;for fmovout, the destination format | ||
171 | | ;is the rounding precision | ||
172 | |||
173 | | | ||
174 | | ovf_res --- overflow result calculation | ||
175 | | | ||
176 | | Input: | ||
177 | | a0 points to operand in internal extended format | ||
178 | | Output: | ||
179 | | a0 points to result in internal extended format | ||
180 | | | ||
181 | .global ovf_res | ||
182 | ovf_res: | ||
183 | lsll #2,%d0 |move round precision to d0{3:2} | ||
184 | bfextu FPCR_MODE(%a6){#2:#2},%d1 |set round mode | ||
185 | orl %d1,%d0 |index is fmt:mode in d0{3:0} | ||
186 | leal tblovfl,%a1 |load a1 with table address | ||
187 | movel %a1@(%d0:l:4),%a1 |use d0 as index to the table | ||
188 | jmp (%a1) |go to the correct routine | ||
189 | | | ||
190 | |case DEST_FMT = EXT | ||
191 | | | ||
192 | EXT_RN: | ||
193 | leal EXT_PINF,%a1 |answer is +/- infinity | ||
194 | bsetb #inf_bit,FPSR_CC(%a6) | ||
195 | bra set_sign |now go set the sign | ||
196 | EXT_RZ: | ||
197 | leal EXT_PLRG,%a1 |answer is +/- large number | ||
198 | bra set_sign |now go set the sign | ||
199 | EXT_RM: | ||
200 | tstb LOCAL_SGN(%a0) |if negative overflow | ||
201 | beqs e_rm_pos | ||
202 | e_rm_neg: | ||
203 | leal EXT_PINF,%a1 |answer is negative infinity | ||
204 | orl #neginf_mask,USER_FPSR(%a6) | ||
205 | bra end_ovfr | ||
206 | e_rm_pos: | ||
207 | leal EXT_PLRG,%a1 |answer is large positive number | ||
208 | bra end_ovfr | ||
209 | EXT_RP: | ||
210 | tstb LOCAL_SGN(%a0) |if negative overflow | ||
211 | beqs e_rp_pos | ||
212 | e_rp_neg: | ||
213 | leal EXT_PLRG,%a1 |answer is large negative number | ||
214 | bsetb #neg_bit,FPSR_CC(%a6) | ||
215 | bra end_ovfr | ||
216 | e_rp_pos: | ||
217 | leal EXT_PINF,%a1 |answer is positive infinity | ||
218 | bsetb #inf_bit,FPSR_CC(%a6) | ||
219 | bra end_ovfr | ||
220 | | | ||
221 | |case DEST_FMT = DBL | ||
222 | | | ||
223 | DBL_RN: | ||
224 | leal EXT_PINF,%a1 |answer is +/- infinity | ||
225 | bsetb #inf_bit,FPSR_CC(%a6) | ||
226 | bra set_sign | ||
227 | DBL_RZ: | ||
228 | leal DBL_PLRG,%a1 |answer is +/- large number | ||
229 | bra set_sign |now go set the sign | ||
230 | DBL_RM: | ||
231 | tstb LOCAL_SGN(%a0) |if negative overflow | ||
232 | beqs d_rm_pos | ||
233 | d_rm_neg: | ||
234 | leal EXT_PINF,%a1 |answer is negative infinity | ||
235 | orl #neginf_mask,USER_FPSR(%a6) | ||
236 | bra end_ovfr |inf is same for all precisions (ext,dbl,sgl) | ||
237 | d_rm_pos: | ||
238 | leal DBL_PLRG,%a1 |answer is large positive number | ||
239 | bra end_ovfr | ||
240 | DBL_RP: | ||
241 | tstb LOCAL_SGN(%a0) |if negative overflow | ||
242 | beqs d_rp_pos | ||
243 | d_rp_neg: | ||
244 | leal DBL_PLRG,%a1 |answer is large negative number | ||
245 | bsetb #neg_bit,FPSR_CC(%a6) | ||
246 | bra end_ovfr | ||
247 | d_rp_pos: | ||
248 | leal EXT_PINF,%a1 |answer is positive infinity | ||
249 | bsetb #inf_bit,FPSR_CC(%a6) | ||
250 | bra end_ovfr | ||
251 | | | ||
252 | |case DEST_FMT = SGL | ||
253 | | | ||
254 | SGL_RN: | ||
255 | leal EXT_PINF,%a1 |answer is +/- infinity | ||
256 | bsetb #inf_bit,FPSR_CC(%a6) | ||
257 | bras set_sign | ||
258 | SGL_RZ: | ||
259 | leal SGL_PLRG,%a1 |answer is +/- large number | ||
260 | bras set_sign | ||
261 | SGL_RM: | ||
262 | tstb LOCAL_SGN(%a0) |if negative overflow | ||
263 | beqs s_rm_pos | ||
264 | s_rm_neg: | ||
265 | leal EXT_PINF,%a1 |answer is negative infinity | ||
266 | orl #neginf_mask,USER_FPSR(%a6) | ||
267 | bras end_ovfr | ||
268 | s_rm_pos: | ||
269 | leal SGL_PLRG,%a1 |answer is large positive number | ||
270 | bras end_ovfr | ||
271 | SGL_RP: | ||
272 | tstb LOCAL_SGN(%a0) |if negative overflow | ||
273 | beqs s_rp_pos | ||
274 | s_rp_neg: | ||
275 | leal SGL_PLRG,%a1 |answer is large negative number | ||
276 | bsetb #neg_bit,FPSR_CC(%a6) | ||
277 | bras end_ovfr | ||
278 | s_rp_pos: | ||
279 | leal EXT_PINF,%a1 |answer is positive infinity | ||
280 | bsetb #inf_bit,FPSR_CC(%a6) | ||
281 | bras end_ovfr | ||
282 | |||
283 | set_sign: | ||
284 | tstb LOCAL_SGN(%a0) |if negative overflow | ||
285 | beqs end_ovfr | ||
286 | neg_sign: | ||
287 | bsetb #neg_bit,FPSR_CC(%a6) | ||
288 | |||
289 | end_ovfr: | ||
290 | movew LOCAL_EX(%a1),LOCAL_EX(%a0) |do not overwrite sign | ||
291 | movel LOCAL_HI(%a1),LOCAL_HI(%a0) | ||
292 | movel LOCAL_LO(%a1),LOCAL_LO(%a0) | ||
293 | rts | ||
294 | |||
295 | |||
296 | | | ||
297 | | ERROR | ||
298 | | | ||
299 | error: | ||
300 | rts | ||
301 | | | ||
302 | | get_fline --- get f-line opcode of interrupted instruction | ||
303 | | | ||
304 | | Returns opcode in the low word of d0. | ||
305 | | | ||
306 | get_fline: | ||
307 | movel USER_FPIAR(%a6),%a0 |opcode address | ||
308 | movel #0,-(%a7) |reserve a word on the stack | ||
309 | leal 2(%a7),%a1 |point to low word of temporary | ||
310 | movel #2,%d0 |count | ||
311 | bsrl mem_read | ||
312 | movel (%a7)+,%d0 | ||
313 | rts | ||
314 | | | ||
315 | | g_rndpr --- put rounding precision in d0{1:0} | ||
316 | | | ||
317 | | valid return codes are: | ||
318 | | 00 - extended | ||
319 | | 01 - single | ||
320 | | 10 - double | ||
321 | | | ||
322 | | begin | ||
323 | | get rounding precision (cmdreg3b{6:5}) | ||
324 | | begin | ||
325 | | case opclass = 011 (move out) | ||
326 | | get destination format - this is the also the rounding precision | ||
327 | | | ||
328 | | case opclass = 0x0 | ||
329 | | if E3 | ||
330 | | *case RndPr(from cmdreg3b{6:5} = 11 then RND_PREC = DBL | ||
331 | | *case RndPr(from cmdreg3b{6:5} = 10 then RND_PREC = SGL | ||
332 | | case RndPr(from cmdreg3b{6:5} = 00 | 01 | ||
333 | | use precision from FPCR{7:6} | ||
334 | | case 00 then RND_PREC = EXT | ||
335 | | case 01 then RND_PREC = SGL | ||
336 | | case 10 then RND_PREC = DBL | ||
337 | | else E1 | ||
338 | | use precision in FPCR{7:6} | ||
339 | | case 00 then RND_PREC = EXT | ||
340 | | case 01 then RND_PREC = SGL | ||
341 | | case 10 then RND_PREC = DBL | ||
342 | | end | ||
343 | | | ||
344 | g_rndpr: | ||
345 | bsr g_opcls |get opclass in d0{2:0} | ||
346 | cmpw #0x0003,%d0 |check for opclass 011 | ||
347 | bnes op_0x0 | ||
348 | |||
349 | | | ||
350 | | For move out instructions (opclass 011) the destination format | ||
351 | | is the same as the rounding precision. Pass results from g_dfmtou. | ||
352 | | | ||
353 | bsr g_dfmtou | ||
354 | rts | ||
355 | op_0x0: | ||
356 | btstb #E3,E_BYTE(%a6) | ||
357 | beql unf_e1_exc |branch to e1 underflow | ||
358 | unf_e3_exc: | ||
359 | movel CMDREG3B(%a6),%d0 |rounding precision in d0{10:9} | ||
360 | bfextu %d0{#9:#2},%d0 |move the rounding prec bits to d0{1:0} | ||
361 | cmpil #0x2,%d0 | ||
362 | beql unff_sgl |force precision is single | ||
363 | cmpil #0x3,%d0 |force precision is double | ||
364 | beql unff_dbl | ||
365 | movew CMDREG3B(%a6),%d0 |get the command word again | ||
366 | andil #0x7f,%d0 |clear all except operation | ||
367 | cmpil #0x33,%d0 | ||
368 | beql unf_fsgl |fsglmul or fsgldiv | ||
369 | cmpil #0x30,%d0 | ||
370 | beql unf_fsgl |fsgldiv or fsglmul | ||
371 | bra unf_fpcr | ||
372 | unf_e1_exc: | ||
373 | movel CMDREG1B(%a6),%d0 |get 32 bits off the stack, 1st 16 bits | ||
374 | | ;are the command word | ||
375 | andil #0x00440000,%d0 |clear all bits except bits 6 and 2 | ||
376 | cmpil #0x00400000,%d0 | ||
377 | beql unff_sgl |force single | ||
378 | cmpil #0x00440000,%d0 |force double | ||
379 | beql unff_dbl | ||
380 | movel CMDREG1B(%a6),%d0 |get the command word again | ||
381 | andil #0x007f0000,%d0 |clear all bits except the operation | ||
382 | cmpil #0x00270000,%d0 | ||
383 | beql unf_fsgl |fsglmul | ||
384 | cmpil #0x00240000,%d0 | ||
385 | beql unf_fsgl |fsgldiv | ||
386 | bra unf_fpcr | ||
387 | |||
388 | | | ||
389 | | Convert to return format. The values from cmdreg3b and the return | ||
390 | | values are: | ||
391 | | cmdreg3b return precision | ||
392 | | -------- ------ --------- | ||
393 | | 00,01 0 ext | ||
394 | | 10 1 sgl | ||
395 | | 11 2 dbl | ||
396 | | Force single | ||
397 | | | ||
398 | unff_sgl: | ||
399 | movel #1,%d0 |return 1 | ||
400 | rts | ||
401 | | | ||
402 | | Force double | ||
403 | | | ||
404 | unff_dbl: | ||
405 | movel #2,%d0 |return 2 | ||
406 | rts | ||
407 | | | ||
408 | | Force extended | ||
409 | | | ||
410 | unf_fsgl: | ||
411 | movel #0,%d0 | ||
412 | rts | ||
413 | | | ||
414 | | Get rounding precision set in FPCR{7:6}. | ||
415 | | | ||
416 | unf_fpcr: | ||
417 | movel USER_FPCR(%a6),%d0 |rounding precision bits in d0{7:6} | ||
418 | bfextu %d0{#24:#2},%d0 |move the rounding prec bits to d0{1:0} | ||
419 | rts | ||
420 | | | ||
421 | | g_opcls --- put opclass in d0{2:0} | ||
422 | | | ||
423 | g_opcls: | ||
424 | btstb #E3,E_BYTE(%a6) | ||
425 | beqs opc_1b |if set, go to cmdreg1b | ||
426 | opc_3b: | ||
427 | clrl %d0 |if E3, only opclass 0x0 is possible | ||
428 | rts | ||
429 | opc_1b: | ||
430 | movel CMDREG1B(%a6),%d0 | ||
431 | bfextu %d0{#0:#3},%d0 |shift opclass bits d0{31:29} to d0{2:0} | ||
432 | rts | ||
433 | | | ||
434 | | g_dfmtou --- put destination format in d0{1:0} | ||
435 | | | ||
436 | | If E1, the format is from cmdreg1b{12:10} | ||
437 | | If E3, the format is extended. | ||
438 | | | ||
439 | | Dest. Fmt. | ||
440 | | extended 010 -> 00 | ||
441 | | single 001 -> 01 | ||
442 | | double 101 -> 10 | ||
443 | | | ||
444 | g_dfmtou: | ||
445 | btstb #E3,E_BYTE(%a6) | ||
446 | beqs op011 | ||
447 | clrl %d0 |if E1, size is always ext | ||
448 | rts | ||
449 | op011: | ||
450 | movel CMDREG1B(%a6),%d0 | ||
451 | bfextu %d0{#3:#3},%d0 |dest fmt from cmdreg1b{12:10} | ||
452 | cmpb #1,%d0 |check for single | ||
453 | bnes not_sgl | ||
454 | movel #1,%d0 | ||
455 | rts | ||
456 | not_sgl: | ||
457 | cmpb #5,%d0 |check for double | ||
458 | bnes not_dbl | ||
459 | movel #2,%d0 | ||
460 | rts | ||
461 | not_dbl: | ||
462 | clrl %d0 |must be extended | ||
463 | rts | ||
464 | |||
465 | | | ||
466 | | | ||
467 | | Final result table for unf_sub. Note that the negative counterparts | ||
468 | | are unnecessary as unf_sub always returns the sign separately from | ||
469 | | the exponent. | ||
470 | | ;+zero | ||
471 | EXT_PZRO: .long 0x00000000,0x00000000,0x00000000,0x00000000 | ||
472 | | ;+zero | ||
473 | SGL_PZRO: .long 0x3f810000,0x00000000,0x00000000,0x00000000 | ||
474 | | ;+zero | ||
475 | DBL_PZRO: .long 0x3c010000,0x00000000,0x00000000,0x00000000 | ||
476 | | ;smallest +ext denorm | ||
477 | EXT_PSML: .long 0x00000000,0x00000000,0x00000001,0x00000000 | ||
478 | | ;smallest +sgl denorm | ||
479 | SGL_PSML: .long 0x3f810000,0x00000100,0x00000000,0x00000000 | ||
480 | | ;smallest +dbl denorm | ||
481 | DBL_PSML: .long 0x3c010000,0x00000000,0x00000800,0x00000000 | ||
482 | | | ||
483 | | UNF_SUB --- underflow result calculation | ||
484 | | | ||
485 | | Input: | ||
486 | | d0 contains round precision | ||
487 | | a0 points to input operand in the internal extended format | ||
488 | | | ||
489 | | Output: | ||
490 | | a0 points to correct internal extended precision result. | ||
491 | | | ||
492 | |||
493 | tblunf: | ||
494 | .long uEXT_RN | ||
495 | .long uEXT_RZ | ||
496 | .long uEXT_RM | ||
497 | .long uEXT_RP | ||
498 | .long uSGL_RN | ||
499 | .long uSGL_RZ | ||
500 | .long uSGL_RM | ||
501 | .long uSGL_RP | ||
502 | .long uDBL_RN | ||
503 | .long uDBL_RZ | ||
504 | .long uDBL_RM | ||
505 | .long uDBL_RP | ||
506 | .long uDBL_RN | ||
507 | .long uDBL_RZ | ||
508 | .long uDBL_RM | ||
509 | .long uDBL_RP | ||
510 | |||
511 | .global unf_sub | ||
512 | unf_sub: | ||
513 | lsll #2,%d0 |move round precision to d0{3:2} | ||
514 | bfextu FPCR_MODE(%a6){#2:#2},%d1 |set round mode | ||
515 | orl %d1,%d0 |index is fmt:mode in d0{3:0} | ||
516 | leal tblunf,%a1 |load a1 with table address | ||
517 | movel %a1@(%d0:l:4),%a1 |use d0 as index to the table | ||
518 | jmp (%a1) |go to the correct routine | ||
519 | | | ||
520 | |case DEST_FMT = EXT | ||
521 | | | ||
522 | uEXT_RN: | ||
523 | leal EXT_PZRO,%a1 |answer is +/- zero | ||
524 | bsetb #z_bit,FPSR_CC(%a6) | ||
525 | bra uset_sign |now go set the sign | ||
526 | uEXT_RZ: | ||
527 | leal EXT_PZRO,%a1 |answer is +/- zero | ||
528 | bsetb #z_bit,FPSR_CC(%a6) | ||
529 | bra uset_sign |now go set the sign | ||
530 | uEXT_RM: | ||
531 | tstb LOCAL_SGN(%a0) |if negative underflow | ||
532 | beqs ue_rm_pos | ||
533 | ue_rm_neg: | ||
534 | leal EXT_PSML,%a1 |answer is negative smallest denorm | ||
535 | bsetb #neg_bit,FPSR_CC(%a6) | ||
536 | bra end_unfr | ||
537 | ue_rm_pos: | ||
538 | leal EXT_PZRO,%a1 |answer is positive zero | ||
539 | bsetb #z_bit,FPSR_CC(%a6) | ||
540 | bra end_unfr | ||
541 | uEXT_RP: | ||
542 | tstb LOCAL_SGN(%a0) |if negative underflow | ||
543 | beqs ue_rp_pos | ||
544 | ue_rp_neg: | ||
545 | leal EXT_PZRO,%a1 |answer is negative zero | ||
546 | oril #negz_mask,USER_FPSR(%a6) | ||
547 | bra end_unfr | ||
548 | ue_rp_pos: | ||
549 | leal EXT_PSML,%a1 |answer is positive smallest denorm | ||
550 | bra end_unfr | ||
551 | | | ||
552 | |case DEST_FMT = DBL | ||
553 | | | ||
554 | uDBL_RN: | ||
555 | leal DBL_PZRO,%a1 |answer is +/- zero | ||
556 | bsetb #z_bit,FPSR_CC(%a6) | ||
557 | bra uset_sign | ||
558 | uDBL_RZ: | ||
559 | leal DBL_PZRO,%a1 |answer is +/- zero | ||
560 | bsetb #z_bit,FPSR_CC(%a6) | ||
561 | bra uset_sign |now go set the sign | ||
562 | uDBL_RM: | ||
563 | tstb LOCAL_SGN(%a0) |if negative overflow | ||
564 | beqs ud_rm_pos | ||
565 | ud_rm_neg: | ||
566 | leal DBL_PSML,%a1 |answer is smallest denormalized negative | ||
567 | bsetb #neg_bit,FPSR_CC(%a6) | ||
568 | bra end_unfr | ||
569 | ud_rm_pos: | ||
570 | leal DBL_PZRO,%a1 |answer is positive zero | ||
571 | bsetb #z_bit,FPSR_CC(%a6) | ||
572 | bra end_unfr | ||
573 | uDBL_RP: | ||
574 | tstb LOCAL_SGN(%a0) |if negative overflow | ||
575 | beqs ud_rp_pos | ||
576 | ud_rp_neg: | ||
577 | leal DBL_PZRO,%a1 |answer is negative zero | ||
578 | oril #negz_mask,USER_FPSR(%a6) | ||
579 | bra end_unfr | ||
580 | ud_rp_pos: | ||
581 | leal DBL_PSML,%a1 |answer is smallest denormalized negative | ||
582 | bra end_unfr | ||
583 | | | ||
584 | |case DEST_FMT = SGL | ||
585 | | | ||
586 | uSGL_RN: | ||
587 | leal SGL_PZRO,%a1 |answer is +/- zero | ||
588 | bsetb #z_bit,FPSR_CC(%a6) | ||
589 | bras uset_sign | ||
590 | uSGL_RZ: | ||
591 | leal SGL_PZRO,%a1 |answer is +/- zero | ||
592 | bsetb #z_bit,FPSR_CC(%a6) | ||
593 | bras uset_sign | ||
594 | uSGL_RM: | ||
595 | tstb LOCAL_SGN(%a0) |if negative overflow | ||
596 | beqs us_rm_pos | ||
597 | us_rm_neg: | ||
598 | leal SGL_PSML,%a1 |answer is smallest denormalized negative | ||
599 | bsetb #neg_bit,FPSR_CC(%a6) | ||
600 | bras end_unfr | ||
601 | us_rm_pos: | ||
602 | leal SGL_PZRO,%a1 |answer is positive zero | ||
603 | bsetb #z_bit,FPSR_CC(%a6) | ||
604 | bras end_unfr | ||
605 | uSGL_RP: | ||
606 | tstb LOCAL_SGN(%a0) |if negative overflow | ||
607 | beqs us_rp_pos | ||
608 | us_rp_neg: | ||
609 | leal SGL_PZRO,%a1 |answer is negative zero | ||
610 | oril #negz_mask,USER_FPSR(%a6) | ||
611 | bras end_unfr | ||
612 | us_rp_pos: | ||
613 | leal SGL_PSML,%a1 |answer is smallest denormalized positive | ||
614 | bras end_unfr | ||
615 | |||
616 | uset_sign: | ||
617 | tstb LOCAL_SGN(%a0) |if negative overflow | ||
618 | beqs end_unfr | ||
619 | uneg_sign: | ||
620 | bsetb #neg_bit,FPSR_CC(%a6) | ||
621 | |||
622 | end_unfr: | ||
623 | movew LOCAL_EX(%a1),LOCAL_EX(%a0) |be careful not to overwrite sign | ||
624 | movel LOCAL_HI(%a1),LOCAL_HI(%a0) | ||
625 | movel LOCAL_LO(%a1),LOCAL_LO(%a0) | ||
626 | rts | ||
627 | | | ||
628 | | reg_dest --- write byte, word, or long data to Dn | ||
629 | | | ||
630 | | | ||
631 | | Input: | ||
632 | | L_SCR1: Data | ||
633 | | d1: data size and dest register number formatted as: | ||
634 | | | ||
635 | | 32 5 4 3 2 1 0 | ||
636 | | ----------------------------------------------- | ||
637 | | | 0 | Size | Dest Reg # | | ||
638 | | ----------------------------------------------- | ||
639 | | | ||
640 | | Size is: | ||
641 | | 0 - Byte | ||
642 | | 1 - Word | ||
643 | | 2 - Long/Single | ||
644 | | | ||
645 | pregdst: | ||
646 | .long byte_d0 | ||
647 | .long byte_d1 | ||
648 | .long byte_d2 | ||
649 | .long byte_d3 | ||
650 | .long byte_d4 | ||
651 | .long byte_d5 | ||
652 | .long byte_d6 | ||
653 | .long byte_d7 | ||
654 | .long word_d0 | ||
655 | .long word_d1 | ||
656 | .long word_d2 | ||
657 | .long word_d3 | ||
658 | .long word_d4 | ||
659 | .long word_d5 | ||
660 | .long word_d6 | ||
661 | .long word_d7 | ||
662 | .long long_d0 | ||
663 | .long long_d1 | ||
664 | .long long_d2 | ||
665 | .long long_d3 | ||
666 | .long long_d4 | ||
667 | .long long_d5 | ||
668 | .long long_d6 | ||
669 | .long long_d7 | ||
670 | |||
671 | reg_dest: | ||
672 | leal pregdst,%a0 | ||
673 | movel %a0@(%d1:l:4),%a0 | ||
674 | jmp (%a0) | ||
675 | |||
676 | byte_d0: | ||
677 | moveb L_SCR1(%a6),USER_D0+3(%a6) | ||
678 | rts | ||
679 | byte_d1: | ||
680 | moveb L_SCR1(%a6),USER_D1+3(%a6) | ||
681 | rts | ||
682 | byte_d2: | ||
683 | moveb L_SCR1(%a6),%d2 | ||
684 | rts | ||
685 | byte_d3: | ||
686 | moveb L_SCR1(%a6),%d3 | ||
687 | rts | ||
688 | byte_d4: | ||
689 | moveb L_SCR1(%a6),%d4 | ||
690 | rts | ||
691 | byte_d5: | ||
692 | moveb L_SCR1(%a6),%d5 | ||
693 | rts | ||
694 | byte_d6: | ||
695 | moveb L_SCR1(%a6),%d6 | ||
696 | rts | ||
697 | byte_d7: | ||
698 | moveb L_SCR1(%a6),%d7 | ||
699 | rts | ||
700 | word_d0: | ||
701 | movew L_SCR1(%a6),USER_D0+2(%a6) | ||
702 | rts | ||
703 | word_d1: | ||
704 | movew L_SCR1(%a6),USER_D1+2(%a6) | ||
705 | rts | ||
706 | word_d2: | ||
707 | movew L_SCR1(%a6),%d2 | ||
708 | rts | ||
709 | word_d3: | ||
710 | movew L_SCR1(%a6),%d3 | ||
711 | rts | ||
712 | word_d4: | ||
713 | movew L_SCR1(%a6),%d4 | ||
714 | rts | ||
715 | word_d5: | ||
716 | movew L_SCR1(%a6),%d5 | ||
717 | rts | ||
718 | word_d6: | ||
719 | movew L_SCR1(%a6),%d6 | ||
720 | rts | ||
721 | word_d7: | ||
722 | movew L_SCR1(%a6),%d7 | ||
723 | rts | ||
724 | long_d0: | ||
725 | movel L_SCR1(%a6),USER_D0(%a6) | ||
726 | rts | ||
727 | long_d1: | ||
728 | movel L_SCR1(%a6),USER_D1(%a6) | ||
729 | rts | ||
730 | long_d2: | ||
731 | movel L_SCR1(%a6),%d2 | ||
732 | rts | ||
733 | long_d3: | ||
734 | movel L_SCR1(%a6),%d3 | ||
735 | rts | ||
736 | long_d4: | ||
737 | movel L_SCR1(%a6),%d4 | ||
738 | rts | ||
739 | long_d5: | ||
740 | movel L_SCR1(%a6),%d5 | ||
741 | rts | ||
742 | long_d6: | ||
743 | movel L_SCR1(%a6),%d6 | ||
744 | rts | ||
745 | long_d7: | ||
746 | movel L_SCR1(%a6),%d7 | ||
747 | rts | ||
748 | |end | ||
diff --git a/arch/m68k/fpsp040/x_bsun.S b/arch/m68k/fpsp040/x_bsun.S new file mode 100644 index 000000000000..039247b09c8b --- /dev/null +++ b/arch/m68k/fpsp040/x_bsun.S | |||
@@ -0,0 +1,47 @@ | |||
1 | | | ||
2 | | x_bsun.sa 3.3 7/1/91 | ||
3 | | | ||
4 | | fpsp_bsun --- FPSP handler for branch/set on unordered exception | ||
5 | | | ||
6 | | Copy the PC to FPIAR to maintain 881/882 compatibility | ||
7 | | | ||
8 | | The real_bsun handler will need to perform further corrective | ||
9 | | measures as outlined in the 040 User's Manual on pages | ||
10 | | 9-41f, section 9.8.3. | ||
11 | | | ||
12 | |||
13 | | Copyright (C) Motorola, Inc. 1990 | ||
14 | | All Rights Reserved | ||
15 | | | ||
16 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
17 | | The copyright notice above does not evidence any | ||
18 | | actual or intended publication of such source code. | ||
19 | |||
20 | X_BSUN: |idnt 2,1 | Motorola 040 Floating Point Software Package | ||
21 | |||
22 | |section 8 | ||
23 | |||
24 | #include "fpsp.h" | ||
25 | |||
26 | |xref real_bsun | ||
27 | |||
28 | .global fpsp_bsun | ||
29 | fpsp_bsun: | ||
30 | | | ||
31 | link %a6,#-LOCAL_SIZE | ||
32 | fsave -(%a7) | ||
33 | moveml %d0-%d1/%a0-%a1,USER_DA(%a6) | ||
34 | fmovemx %fp0-%fp3,USER_FP0(%a6) | ||
35 | fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6) | ||
36 | |||
37 | | | ||
38 | movel EXC_PC(%a6),USER_FPIAR(%a6) | ||
39 | | | ||
40 | moveml USER_DA(%a6),%d0-%d1/%a0-%a1 | ||
41 | fmovemx USER_FP0(%a6),%fp0-%fp3 | ||
42 | fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar | ||
43 | frestore (%a7)+ | ||
44 | unlk %a6 | ||
45 | bral real_bsun | ||
46 | | | ||
47 | |end | ||
diff --git a/arch/m68k/fpsp040/x_fline.S b/arch/m68k/fpsp040/x_fline.S new file mode 100644 index 000000000000..3917710b0fde --- /dev/null +++ b/arch/m68k/fpsp040/x_fline.S | |||
@@ -0,0 +1,104 @@ | |||
1 | | | ||
2 | | x_fline.sa 3.3 1/10/91 | ||
3 | | | ||
4 | | fpsp_fline --- FPSP handler for fline exception | ||
5 | | | ||
6 | | First determine if the exception is one of the unimplemented | ||
7 | | floating point instructions. If so, let fpsp_unimp handle it. | ||
8 | | Next, determine if the instruction is an fmovecr with a non-zero | ||
9 | | <ea> field. If so, handle here and return. Otherwise, it | ||
10 | | must be a real F-line exception. | ||
11 | | | ||
12 | |||
13 | | Copyright (C) Motorola, Inc. 1990 | ||
14 | | All Rights Reserved | ||
15 | | | ||
16 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
17 | | The copyright notice above does not evidence any | ||
18 | | actual or intended publication of such source code. | ||
19 | |||
20 | X_FLINE: |idnt 2,1 | Motorola 040 Floating Point Software Package | ||
21 | |||
22 | |section 8 | ||
23 | |||
24 | #include "fpsp.h" | ||
25 | |||
26 | |xref real_fline | ||
27 | |xref fpsp_unimp | ||
28 | |xref uni_2 | ||
29 | |xref mem_read | ||
30 | |xref fpsp_fmt_error | ||
31 | |||
32 | .global fpsp_fline | ||
33 | fpsp_fline: | ||
34 | | | ||
35 | | check for unimplemented vector first. Use EXC_VEC-4 because | ||
36 | | the equate is valid only after a 'link a6' has pushed one more | ||
37 | | long onto the stack. | ||
38 | | | ||
39 | cmpw #UNIMP_VEC,EXC_VEC-4(%a7) | ||
40 | beql fpsp_unimp | ||
41 | |||
42 | | | ||
43 | | fmovecr with non-zero <ea> handling here | ||
44 | | | ||
45 | subl #4,%a7 |4 accounts for 2-word difference | ||
46 | | ;between six word frame (unimp) and | ||
47 | | ;four word frame | ||
48 | link %a6,#-LOCAL_SIZE | ||
49 | fsave -(%a7) | ||
50 | moveml %d0-%d1/%a0-%a1,USER_DA(%a6) | ||
51 | moveal EXC_PC+4(%a6),%a0 |get address of fline instruction | ||
52 | leal L_SCR1(%a6),%a1 |use L_SCR1 as scratch | ||
53 | movel #4,%d0 | ||
54 | addl #4,%a6 |to offset the sub.l #4,a7 above so that | ||
55 | | ;a6 can point correctly to the stack frame | ||
56 | | ;before branching to mem_read | ||
57 | bsrl mem_read | ||
58 | subl #4,%a6 | ||
59 | movel L_SCR1(%a6),%d0 |d0 contains the fline and command word | ||
60 | bfextu %d0{#4:#3},%d1 |extract coprocessor id | ||
61 | cmpib #1,%d1 |check if cpid=1 | ||
62 | bne not_mvcr |exit if not | ||
63 | bfextu %d0{#16:#6},%d1 | ||
64 | cmpib #0x17,%d1 |check if it is an FMOVECR encoding | ||
65 | bne not_mvcr | ||
66 | | ;if an FMOVECR instruction, fix stack | ||
67 | | ;and go to FPSP_UNIMP | ||
68 | fix_stack: | ||
69 | cmpib #VER_40,(%a7) |test for orig unimp frame | ||
70 | bnes ck_rev | ||
71 | subl #UNIMP_40_SIZE-4,%a7 |emulate an orig fsave | ||
72 | moveb #VER_40,(%a7) | ||
73 | moveb #UNIMP_40_SIZE-4,1(%a7) | ||
74 | clrw 2(%a7) | ||
75 | bras fix_con | ||
76 | ck_rev: | ||
77 | cmpib #VER_41,(%a7) |test for rev unimp frame | ||
78 | bnel fpsp_fmt_error |if not $40 or $41, exit with error | ||
79 | subl #UNIMP_41_SIZE-4,%a7 |emulate a rev fsave | ||
80 | moveb #VER_41,(%a7) | ||
81 | moveb #UNIMP_41_SIZE-4,1(%a7) | ||
82 | clrw 2(%a7) | ||
83 | fix_con: | ||
84 | movew EXC_SR+4(%a6),EXC_SR(%a6) |move stacked sr to new position | ||
85 | movel EXC_PC+4(%a6),EXC_PC(%a6) |move stacked pc to new position | ||
86 | fmovel EXC_PC(%a6),%FPIAR |point FPIAR to fline inst | ||
87 | movel #4,%d1 | ||
88 | addl %d1,EXC_PC(%a6) |increment stacked pc value to next inst | ||
89 | movew #0x202c,EXC_VEC(%a6) |reformat vector to unimp | ||
90 | clrl EXC_EA(%a6) |clear the EXC_EA field | ||
91 | movew %d0,CMDREG1B(%a6) |move the lower word into CMDREG1B | ||
92 | clrl E_BYTE(%a6) | ||
93 | bsetb #UFLAG,T_BYTE(%a6) | ||
94 | moveml USER_DA(%a6),%d0-%d1/%a0-%a1 |restore data registers | ||
95 | bral uni_2 | ||
96 | |||
97 | not_mvcr: | ||
98 | moveml USER_DA(%a6),%d0-%d1/%a0-%a1 |restore data registers | ||
99 | frestore (%a7)+ | ||
100 | unlk %a6 | ||
101 | addl #4,%a7 | ||
102 | bral real_fline | ||
103 | |||
104 | |end | ||
diff --git a/arch/m68k/fpsp040/x_operr.S b/arch/m68k/fpsp040/x_operr.S new file mode 100644 index 000000000000..b0f54bcb49a7 --- /dev/null +++ b/arch/m68k/fpsp040/x_operr.S | |||
@@ -0,0 +1,356 @@ | |||
1 | | | ||
2 | | x_operr.sa 3.5 7/1/91 | ||
3 | | | ||
4 | | fpsp_operr --- FPSP handler for operand error exception | ||
5 | | | ||
6 | | See 68040 User's Manual pp. 9-44f | ||
7 | | | ||
8 | | Note 1: For trap disabled 040 does the following: | ||
9 | | If the dest is a fp reg, then an extended precision non_signaling | ||
10 | | NAN is stored in the dest reg. If the dest format is b, w, or l and | ||
11 | | the source op is a NAN, then garbage is stored as the result (actually | ||
12 | | the upper 32 bits of the mantissa are sent to the integer unit). If | ||
13 | | the dest format is integer (b, w, l) and the operr is caused by | ||
14 | | integer overflow, or the source op is inf, then the result stored is | ||
15 | | garbage. | ||
16 | | There are three cases in which operr is incorrectly signaled on the | ||
17 | | 040. This occurs for move_out of format b, w, or l for the largest | ||
18 | | negative integer (-2^7 for b, -2^15 for w, -2^31 for l). | ||
19 | | | ||
20 | | On opclass = 011 fmove.(b,w,l) that causes a conversion | ||
21 | | overflow -> OPERR, the exponent in wbte (and fpte) is: | ||
22 | | byte 56 - (62 - exp) | ||
23 | | word 48 - (62 - exp) | ||
24 | | long 32 - (62 - exp) | ||
25 | | | ||
26 | | where exp = (true exp) - 1 | ||
27 | | | ||
28 | | So, wbtemp and fptemp will contain the following on erroneously | ||
29 | | signalled operr: | ||
30 | | fpts = 1 | ||
31 | | fpte = $4000 (15 bit externally) | ||
32 | | byte fptm = $ffffffff ffffff80 | ||
33 | | word fptm = $ffffffff ffff8000 | ||
34 | | long fptm = $ffffffff 80000000 | ||
35 | | | ||
36 | | Note 2: For trap enabled 040 does the following: | ||
37 | | If the inst is move_out, then same as Note 1. | ||
38 | | If the inst is not move_out, the dest is not modified. | ||
39 | | The exceptional operand is not defined for integer overflow | ||
40 | | during a move_out. | ||
41 | | | ||
42 | |||
43 | | Copyright (C) Motorola, Inc. 1990 | ||
44 | | All Rights Reserved | ||
45 | | | ||
46 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
47 | | The copyright notice above does not evidence any | ||
48 | | actual or intended publication of such source code. | ||
49 | |||
50 | X_OPERR: |idnt 2,1 | Motorola 040 Floating Point Software Package | ||
51 | |||
52 | |section 8 | ||
53 | |||
54 | #include "fpsp.h" | ||
55 | |||
56 | |xref mem_write | ||
57 | |xref real_operr | ||
58 | |xref real_inex | ||
59 | |xref get_fline | ||
60 | |xref fpsp_done | ||
61 | |xref reg_dest | ||
62 | |||
63 | .global fpsp_operr | ||
64 | fpsp_operr: | ||
65 | | | ||
66 | link %a6,#-LOCAL_SIZE | ||
67 | fsave -(%a7) | ||
68 | moveml %d0-%d1/%a0-%a1,USER_DA(%a6) | ||
69 | fmovemx %fp0-%fp3,USER_FP0(%a6) | ||
70 | fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6) | ||
71 | |||
72 | | | ||
73 | | Check if this is an opclass 3 instruction. | ||
74 | | If so, fall through, else branch to operr_end | ||
75 | | | ||
76 | btstb #TFLAG,T_BYTE(%a6) | ||
77 | beqs operr_end | ||
78 | |||
79 | | | ||
80 | | If the destination size is B,W,or L, the operr must be | ||
81 | | handled here. | ||
82 | | | ||
83 | movel CMDREG1B(%a6),%d0 | ||
84 | bfextu %d0{#3:#3},%d0 |0=long, 4=word, 6=byte | ||
85 | cmpib #0,%d0 |determine size; check long | ||
86 | beq operr_long | ||
87 | cmpib #4,%d0 |check word | ||
88 | beq operr_word | ||
89 | cmpib #6,%d0 |check byte | ||
90 | beq operr_byte | ||
91 | |||
92 | | | ||
93 | | The size is not B,W,or L, so the operr is handled by the | ||
94 | | kernel handler. Set the operr bits and clean up, leaving | ||
95 | | only the integer exception frame on the stack, and the | ||
96 | | fpu in the original exceptional state. | ||
97 | | | ||
98 | operr_end: | ||
99 | bsetb #operr_bit,FPSR_EXCEPT(%a6) | ||
100 | bsetb #aiop_bit,FPSR_AEXCEPT(%a6) | ||
101 | |||
102 | moveml USER_DA(%a6),%d0-%d1/%a0-%a1 | ||
103 | fmovemx USER_FP0(%a6),%fp0-%fp3 | ||
104 | fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar | ||
105 | frestore (%a7)+ | ||
106 | unlk %a6 | ||
107 | bral real_operr | ||
108 | |||
109 | operr_long: | ||
110 | moveql #4,%d1 |write size to d1 | ||
111 | moveb STAG(%a6),%d0 |test stag for nan | ||
112 | andib #0xe0,%d0 |clr all but tag | ||
113 | cmpib #0x60,%d0 |check for nan | ||
114 | beq operr_nan | ||
115 | cmpil #0x80000000,FPTEMP_LO(%a6) |test if ls lword is special | ||
116 | bnes chklerr |if not equal, check for incorrect operr | ||
117 | bsr check_upper |check if exp and ms mant are special | ||
118 | tstl %d0 | ||
119 | bnes chklerr |if d0 is true, check for incorrect operr | ||
120 | movel #0x80000000,%d0 |store special case result | ||
121 | bsr operr_store | ||
122 | bra not_enabled |clean and exit | ||
123 | | | ||
124 | | CHECK FOR INCORRECTLY GENERATED OPERR EXCEPTION HERE | ||
125 | | | ||
126 | chklerr: | ||
127 | movew FPTEMP_EX(%a6),%d0 | ||
128 | andw #0x7FFF,%d0 |ignore sign bit | ||
129 | cmpw #0x3FFE,%d0 |this is the only possible exponent value | ||
130 | bnes chklerr2 | ||
131 | fixlong: | ||
132 | movel FPTEMP_LO(%a6),%d0 | ||
133 | bsr operr_store | ||
134 | bra not_enabled | ||
135 | chklerr2: | ||
136 | movew FPTEMP_EX(%a6),%d0 | ||
137 | andw #0x7FFF,%d0 |ignore sign bit | ||
138 | cmpw #0x4000,%d0 | ||
139 | bcc store_max |exponent out of range | ||
140 | |||
141 | movel FPTEMP_LO(%a6),%d0 | ||
142 | andl #0x7FFF0000,%d0 |look for all 1's on bits 30-16 | ||
143 | cmpl #0x7FFF0000,%d0 | ||
144 | beqs fixlong | ||
145 | |||
146 | tstl FPTEMP_LO(%a6) | ||
147 | bpls chklepos | ||
148 | cmpl #0xFFFFFFFF,FPTEMP_HI(%a6) | ||
149 | beqs fixlong | ||
150 | bra store_max | ||
151 | chklepos: | ||
152 | tstl FPTEMP_HI(%a6) | ||
153 | beqs fixlong | ||
154 | bra store_max | ||
155 | |||
156 | operr_word: | ||
157 | moveql #2,%d1 |write size to d1 | ||
158 | moveb STAG(%a6),%d0 |test stag for nan | ||
159 | andib #0xe0,%d0 |clr all but tag | ||
160 | cmpib #0x60,%d0 |check for nan | ||
161 | beq operr_nan | ||
162 | cmpil #0xffff8000,FPTEMP_LO(%a6) |test if ls lword is special | ||
163 | bnes chkwerr |if not equal, check for incorrect operr | ||
164 | bsr check_upper |check if exp and ms mant are special | ||
165 | tstl %d0 | ||
166 | bnes chkwerr |if d0 is true, check for incorrect operr | ||
167 | movel #0x80000000,%d0 |store special case result | ||
168 | bsr operr_store | ||
169 | bra not_enabled |clean and exit | ||
170 | | | ||
171 | | CHECK FOR INCORRECTLY GENERATED OPERR EXCEPTION HERE | ||
172 | | | ||
173 | chkwerr: | ||
174 | movew FPTEMP_EX(%a6),%d0 | ||
175 | andw #0x7FFF,%d0 |ignore sign bit | ||
176 | cmpw #0x3FFE,%d0 |this is the only possible exponent value | ||
177 | bnes store_max | ||
178 | movel FPTEMP_LO(%a6),%d0 | ||
179 | swap %d0 | ||
180 | bsr operr_store | ||
181 | bra not_enabled | ||
182 | |||
183 | operr_byte: | ||
184 | moveql #1,%d1 |write size to d1 | ||
185 | moveb STAG(%a6),%d0 |test stag for nan | ||
186 | andib #0xe0,%d0 |clr all but tag | ||
187 | cmpib #0x60,%d0 |check for nan | ||
188 | beqs operr_nan | ||
189 | cmpil #0xffffff80,FPTEMP_LO(%a6) |test if ls lword is special | ||
190 | bnes chkberr |if not equal, check for incorrect operr | ||
191 | bsr check_upper |check if exp and ms mant are special | ||
192 | tstl %d0 | ||
193 | bnes chkberr |if d0 is true, check for incorrect operr | ||
194 | movel #0x80000000,%d0 |store special case result | ||
195 | bsr operr_store | ||
196 | bra not_enabled |clean and exit | ||
197 | | | ||
198 | | CHECK FOR INCORRECTLY GENERATED OPERR EXCEPTION HERE | ||
199 | | | ||
200 | chkberr: | ||
201 | movew FPTEMP_EX(%a6),%d0 | ||
202 | andw #0x7FFF,%d0 |ignore sign bit | ||
203 | cmpw #0x3FFE,%d0 |this is the only possible exponent value | ||
204 | bnes store_max | ||
205 | movel FPTEMP_LO(%a6),%d0 | ||
206 | asll #8,%d0 | ||
207 | swap %d0 | ||
208 | bsr operr_store | ||
209 | bra not_enabled | ||
210 | |||
211 | | | ||
212 | | This operr condition is not of the special case. Set operr | ||
213 | | and aiop and write the portion of the nan to memory for the | ||
214 | | given size. | ||
215 | | | ||
216 | operr_nan: | ||
217 | orl #opaop_mask,USER_FPSR(%a6) |set operr & aiop | ||
218 | |||
219 | movel ETEMP_HI(%a6),%d0 |output will be from upper 32 bits | ||
220 | bsr operr_store | ||
221 | bra end_operr | ||
222 | | | ||
223 | | Store_max loads the max pos or negative for the size, sets | ||
224 | | the operr and aiop bits, and clears inex and ainex, incorrectly | ||
225 | | set by the 040. | ||
226 | | | ||
227 | store_max: | ||
228 | orl #opaop_mask,USER_FPSR(%a6) |set operr & aiop | ||
229 | bclrb #inex2_bit,FPSR_EXCEPT(%a6) | ||
230 | bclrb #ainex_bit,FPSR_AEXCEPT(%a6) | ||
231 | fmovel #0,%FPSR | ||
232 | |||
233 | tstw FPTEMP_EX(%a6) |check sign | ||
234 | blts load_neg | ||
235 | movel #0x7fffffff,%d0 | ||
236 | bsr operr_store | ||
237 | bra end_operr | ||
238 | load_neg: | ||
239 | movel #0x80000000,%d0 | ||
240 | bsr operr_store | ||
241 | bra end_operr | ||
242 | |||
243 | | | ||
244 | | This routine stores the data in d0, for the given size in d1, | ||
245 | | to memory or data register as required. A read of the fline | ||
246 | | is required to determine the destination. | ||
247 | | | ||
248 | operr_store: | ||
249 | movel %d0,L_SCR1(%a6) |move write data to L_SCR1 | ||
250 | movel %d1,-(%a7) |save register size | ||
251 | bsrl get_fline |fline returned in d0 | ||
252 | movel (%a7)+,%d1 | ||
253 | bftst %d0{#26:#3} |if mode is zero, dest is Dn | ||
254 | bnes dest_mem | ||
255 | | | ||
256 | | Destination is Dn. Get register number from d0. Data is on | ||
257 | | the stack at (a7). D1 has size: 1=byte,2=word,4=long/single | ||
258 | | | ||
259 | andil #7,%d0 |isolate register number | ||
260 | cmpil #4,%d1 | ||
261 | beqs op_long |the most frequent case | ||
262 | cmpil #2,%d1 | ||
263 | bnes op_con | ||
264 | orl #8,%d0 | ||
265 | bras op_con | ||
266 | op_long: | ||
267 | orl #0x10,%d0 | ||
268 | op_con: | ||
269 | movel %d0,%d1 |format size:reg for reg_dest | ||
270 | bral reg_dest |call to reg_dest returns to caller | ||
271 | | ;of operr_store | ||
272 | | | ||
273 | | Destination is memory. Get <ea> from integer exception frame | ||
274 | | and call mem_write. | ||
275 | | | ||
276 | dest_mem: | ||
277 | leal L_SCR1(%a6),%a0 |put ptr to write data in a0 | ||
278 | movel EXC_EA(%a6),%a1 |put user destination address in a1 | ||
279 | movel %d1,%d0 |put size in d0 | ||
280 | bsrl mem_write | ||
281 | rts | ||
282 | | | ||
283 | | Check the exponent for $c000 and the upper 32 bits of the | ||
284 | | mantissa for $ffffffff. If both are true, return d0 clr | ||
285 | | and store the lower n bits of the least lword of FPTEMP | ||
286 | | to d0 for write out. If not, it is a real operr, and set d0. | ||
287 | | | ||
288 | check_upper: | ||
289 | cmpil #0xffffffff,FPTEMP_HI(%a6) |check if first byte is all 1's | ||
290 | bnes true_operr |if not all 1's then was true operr | ||
291 | cmpiw #0xc000,FPTEMP_EX(%a6) |check if incorrectly signalled | ||
292 | beqs not_true_operr |branch if not true operr | ||
293 | cmpiw #0xbfff,FPTEMP_EX(%a6) |check if incorrectly signalled | ||
294 | beqs not_true_operr |branch if not true operr | ||
295 | true_operr: | ||
296 | movel #1,%d0 |signal real operr | ||
297 | rts | ||
298 | not_true_operr: | ||
299 | clrl %d0 |signal no real operr | ||
300 | rts | ||
301 | |||
302 | | | ||
303 | | End_operr tests for operr enabled. If not, it cleans up the stack | ||
304 | | and does an rte. If enabled, it cleans up the stack and branches | ||
305 | | to the kernel operr handler with only the integer exception | ||
306 | | frame on the stack and the fpu in the original exceptional state | ||
307 | | with correct data written to the destination. | ||
308 | | | ||
309 | end_operr: | ||
310 | btstb #operr_bit,FPCR_ENABLE(%a6) | ||
311 | beqs not_enabled | ||
312 | enabled: | ||
313 | moveml USER_DA(%a6),%d0-%d1/%a0-%a1 | ||
314 | fmovemx USER_FP0(%a6),%fp0-%fp3 | ||
315 | fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar | ||
316 | frestore (%a7)+ | ||
317 | unlk %a6 | ||
318 | bral real_operr | ||
319 | |||
320 | not_enabled: | ||
321 | | | ||
322 | | It is possible to have either inex2 or inex1 exceptions with the | ||
323 | | operr. If the inex enable bit is set in the FPCR, and either | ||
324 | | inex2 or inex1 occurred, we must clean up and branch to the | ||
325 | | real inex handler. | ||
326 | | | ||
327 | ck_inex: | ||
328 | moveb FPCR_ENABLE(%a6),%d0 | ||
329 | andb FPSR_EXCEPT(%a6),%d0 | ||
330 | andib #0x3,%d0 | ||
331 | beq operr_exit | ||
332 | | | ||
333 | | Inexact enabled and reported, and we must take an inexact exception. | ||
334 | | | ||
335 | take_inex: | ||
336 | moveb #INEX_VEC,EXC_VEC+1(%a6) | ||
337 | movel USER_FPSR(%a6),FPSR_SHADOW(%a6) | ||
338 | orl #sx_mask,E_BYTE(%a6) | ||
339 | moveml USER_DA(%a6),%d0-%d1/%a0-%a1 | ||
340 | fmovemx USER_FP0(%a6),%fp0-%fp3 | ||
341 | fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar | ||
342 | frestore (%a7)+ | ||
343 | unlk %a6 | ||
344 | bral real_inex | ||
345 | | | ||
346 | | Since operr is only an E1 exception, there is no need to frestore | ||
347 | | any state back to the fpu. | ||
348 | | | ||
349 | operr_exit: | ||
350 | moveml USER_DA(%a6),%d0-%d1/%a0-%a1 | ||
351 | fmovemx USER_FP0(%a6),%fp0-%fp3 | ||
352 | fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar | ||
353 | unlk %a6 | ||
354 | bral fpsp_done | ||
355 | |||
356 | |end | ||
diff --git a/arch/m68k/fpsp040/x_ovfl.S b/arch/m68k/fpsp040/x_ovfl.S new file mode 100644 index 000000000000..22cb8b42c7b6 --- /dev/null +++ b/arch/m68k/fpsp040/x_ovfl.S | |||
@@ -0,0 +1,186 @@ | |||
1 | | | ||
2 | | x_ovfl.sa 3.5 7/1/91 | ||
3 | | | ||
4 | | fpsp_ovfl --- FPSP handler for overflow exception | ||
5 | | | ||
6 | | Overflow occurs when a floating-point intermediate result is | ||
7 | | too large to be represented in a floating-point data register, | ||
8 | | or when storing to memory, the contents of a floating-point | ||
9 | | data register are too large to be represented in the | ||
10 | | destination format. | ||
11 | | | ||
12 | | Trap disabled results | ||
13 | | | ||
14 | | If the instruction is move_out, then garbage is stored in the | ||
15 | | destination. If the instruction is not move_out, then the | ||
16 | | destination is not affected. For 68881 compatibility, the | ||
17 | | following values should be stored at the destination, based | ||
18 | | on the current rounding mode: | ||
19 | | | ||
20 | | RN Infinity with the sign of the intermediate result. | ||
21 | | RZ Largest magnitude number, with the sign of the | ||
22 | | intermediate result. | ||
23 | | RM For pos overflow, the largest pos number. For neg overflow, | ||
24 | | -infinity | ||
25 | | RP For pos overflow, +infinity. For neg overflow, the largest | ||
26 | | neg number | ||
27 | | | ||
28 | | Trap enabled results | ||
29 | | All trap disabled code applies. In addition the exceptional | ||
30 | | operand needs to be made available to the users exception handler | ||
31 | | with a bias of $6000 subtracted from the exponent. | ||
32 | | | ||
33 | | | ||
34 | |||
35 | | Copyright (C) Motorola, Inc. 1990 | ||
36 | | All Rights Reserved | ||
37 | | | ||
38 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
39 | | The copyright notice above does not evidence any | ||
40 | | actual or intended publication of such source code. | ||
41 | |||
42 | X_OVFL: |idnt 2,1 | Motorola 040 Floating Point Software Package | ||
43 | |||
44 | |section 8 | ||
45 | |||
46 | #include "fpsp.h" | ||
47 | |||
48 | |xref ovf_r_x2 | ||
49 | |xref ovf_r_x3 | ||
50 | |xref store | ||
51 | |xref real_ovfl | ||
52 | |xref real_inex | ||
53 | |xref fpsp_done | ||
54 | |xref g_opcls | ||
55 | |xref b1238_fix | ||
56 | |||
57 | .global fpsp_ovfl | ||
58 | fpsp_ovfl: | ||
59 | link %a6,#-LOCAL_SIZE | ||
60 | fsave -(%a7) | ||
61 | moveml %d0-%d1/%a0-%a1,USER_DA(%a6) | ||
62 | fmovemx %fp0-%fp3,USER_FP0(%a6) | ||
63 | fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6) | ||
64 | |||
65 | | | ||
66 | | The 040 doesn't set the AINEX bit in the FPSR, the following | ||
67 | | line temporarily rectifies this error. | ||
68 | | | ||
69 | bsetb #ainex_bit,FPSR_AEXCEPT(%a6) | ||
70 | | | ||
71 | bsrl ovf_adj |denormalize, round & store interm op | ||
72 | | | ||
73 | | if overflow traps not enabled check for inexact exception | ||
74 | | | ||
75 | btstb #ovfl_bit,FPCR_ENABLE(%a6) | ||
76 | beqs ck_inex | ||
77 | | | ||
78 | btstb #E3,E_BYTE(%a6) | ||
79 | beqs no_e3_1 | ||
80 | bfextu CMDREG3B(%a6){#6:#3},%d0 |get dest reg no | ||
81 | bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit | ||
82 | bsrl b1238_fix | ||
83 | movel USER_FPSR(%a6),FPSR_SHADOW(%a6) | ||
84 | orl #sx_mask,E_BYTE(%a6) | ||
85 | no_e3_1: | ||
86 | moveml USER_DA(%a6),%d0-%d1/%a0-%a1 | ||
87 | fmovemx USER_FP0(%a6),%fp0-%fp3 | ||
88 | fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar | ||
89 | frestore (%a7)+ | ||
90 | unlk %a6 | ||
91 | bral real_ovfl | ||
92 | | | ||
93 | | It is possible to have either inex2 or inex1 exceptions with the | ||
94 | | ovfl. If the inex enable bit is set in the FPCR, and either | ||
95 | | inex2 or inex1 occurred, we must clean up and branch to the | ||
96 | | real inex handler. | ||
97 | | | ||
98 | ck_inex: | ||
99 | | move.b FPCR_ENABLE(%a6),%d0 | ||
100 | | and.b FPSR_EXCEPT(%a6),%d0 | ||
101 | | andi.b #$3,%d0 | ||
102 | btstb #inex2_bit,FPCR_ENABLE(%a6) | ||
103 | beqs ovfl_exit | ||
104 | | | ||
105 | | Inexact enabled and reported, and we must take an inexact exception. | ||
106 | | | ||
107 | take_inex: | ||
108 | btstb #E3,E_BYTE(%a6) | ||
109 | beqs no_e3_2 | ||
110 | bfextu CMDREG3B(%a6){#6:#3},%d0 |get dest reg no | ||
111 | bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit | ||
112 | bsrl b1238_fix | ||
113 | movel USER_FPSR(%a6),FPSR_SHADOW(%a6) | ||
114 | orl #sx_mask,E_BYTE(%a6) | ||
115 | no_e3_2: | ||
116 | moveb #INEX_VEC,EXC_VEC+1(%a6) | ||
117 | moveml USER_DA(%a6),%d0-%d1/%a0-%a1 | ||
118 | fmovemx USER_FP0(%a6),%fp0-%fp3 | ||
119 | fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar | ||
120 | frestore (%a7)+ | ||
121 | unlk %a6 | ||
122 | bral real_inex | ||
123 | |||
124 | ovfl_exit: | ||
125 | bclrb #E3,E_BYTE(%a6) |test and clear E3 bit | ||
126 | beqs e1_set | ||
127 | | | ||
128 | | Clear dirty bit on dest resister in the frame before branching | ||
129 | | to b1238_fix. | ||
130 | | | ||
131 | bfextu CMDREG3B(%a6){#6:#3},%d0 |get dest reg no | ||
132 | bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit | ||
133 | bsrl b1238_fix |test for bug1238 case | ||
134 | |||
135 | movel USER_FPSR(%a6),FPSR_SHADOW(%a6) | ||
136 | orl #sx_mask,E_BYTE(%a6) | ||
137 | moveml USER_DA(%a6),%d0-%d1/%a0-%a1 | ||
138 | fmovemx USER_FP0(%a6),%fp0-%fp3 | ||
139 | fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar | ||
140 | frestore (%a7)+ | ||
141 | unlk %a6 | ||
142 | bral fpsp_done | ||
143 | e1_set: | ||
144 | moveml USER_DA(%a6),%d0-%d1/%a0-%a1 | ||
145 | fmovemx USER_FP0(%a6),%fp0-%fp3 | ||
146 | fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar | ||
147 | unlk %a6 | ||
148 | bral fpsp_done | ||
149 | |||
150 | | | ||
151 | | ovf_adj | ||
152 | | | ||
153 | ovf_adj: | ||
154 | | | ||
155 | | Have a0 point to the correct operand. | ||
156 | | | ||
157 | btstb #E3,E_BYTE(%a6) |test E3 bit | ||
158 | beqs ovf_e1 | ||
159 | |||
160 | lea WBTEMP(%a6),%a0 | ||
161 | bras ovf_com | ||
162 | ovf_e1: | ||
163 | lea ETEMP(%a6),%a0 | ||
164 | |||
165 | ovf_com: | ||
166 | bclrb #sign_bit,LOCAL_EX(%a0) | ||
167 | sne LOCAL_SGN(%a0) | ||
168 | |||
169 | bsrl g_opcls |returns opclass in d0 | ||
170 | cmpiw #3,%d0 |check for opclass3 | ||
171 | bnes not_opc011 | ||
172 | |||
173 | | | ||
174 | | FPSR_CC is saved and restored because ovf_r_x3 affects it. The | ||
175 | | CCs are defined to be 'not affected' for the opclass3 instruction. | ||
176 | | | ||
177 | moveb FPSR_CC(%a6),L_SCR1(%a6) | ||
178 | bsrl ovf_r_x3 |returns a0 pointing to result | ||
179 | moveb L_SCR1(%a6),FPSR_CC(%a6) | ||
180 | bral store |stores to memory or register | ||
181 | |||
182 | not_opc011: | ||
183 | bsrl ovf_r_x2 |returns a0 pointing to result | ||
184 | bral store |stores to memory or register | ||
185 | |||
186 | |end | ||
diff --git a/arch/m68k/fpsp040/x_snan.S b/arch/m68k/fpsp040/x_snan.S new file mode 100644 index 000000000000..039af573312e --- /dev/null +++ b/arch/m68k/fpsp040/x_snan.S | |||
@@ -0,0 +1,277 @@ | |||
1 | | | ||
2 | | x_snan.sa 3.3 7/1/91 | ||
3 | | | ||
4 | | fpsp_snan --- FPSP handler for signalling NAN exception | ||
5 | | | ||
6 | | SNAN for float -> integer conversions (integer conversion of | ||
7 | | an SNAN) is a non-maskable run-time exception. | ||
8 | | | ||
9 | | For trap disabled the 040 does the following: | ||
10 | | If the dest data format is s, d, or x, then the SNAN bit in the NAN | ||
11 | | is set to one and the resulting non-signaling NAN (truncated if | ||
12 | | necessary) is transferred to the dest. If the dest format is b, w, | ||
13 | | or l, then garbage is written to the dest (actually the upper 32 bits | ||
14 | | of the mantissa are sent to the integer unit). | ||
15 | | | ||
16 | | For trap enabled the 040 does the following: | ||
17 | | If the inst is move_out, then the results are the same as for trap | ||
18 | | disabled with the exception posted. If the instruction is not move_ | ||
19 | | out, the dest. is not modified, and the exception is posted. | ||
20 | | | ||
21 | |||
22 | | Copyright (C) Motorola, Inc. 1990 | ||
23 | | All Rights Reserved | ||
24 | | | ||
25 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
26 | | The copyright notice above does not evidence any | ||
27 | | actual or intended publication of such source code. | ||
28 | |||
29 | X_SNAN: |idnt 2,1 | Motorola 040 Floating Point Software Package | ||
30 | |||
31 | |section 8 | ||
32 | |||
33 | #include "fpsp.h" | ||
34 | |||
35 | |xref get_fline | ||
36 | |xref mem_write | ||
37 | |xref real_snan | ||
38 | |xref real_inex | ||
39 | |xref fpsp_done | ||
40 | |xref reg_dest | ||
41 | |||
42 | .global fpsp_snan | ||
43 | fpsp_snan: | ||
44 | link %a6,#-LOCAL_SIZE | ||
45 | fsave -(%a7) | ||
46 | moveml %d0-%d1/%a0-%a1,USER_DA(%a6) | ||
47 | fmovemx %fp0-%fp3,USER_FP0(%a6) | ||
48 | fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6) | ||
49 | |||
50 | | | ||
51 | | Check if trap enabled | ||
52 | | | ||
53 | btstb #snan_bit,FPCR_ENABLE(%a6) | ||
54 | bnes ena |If enabled, then branch | ||
55 | |||
56 | bsrl move_out |else SNAN disabled | ||
57 | | | ||
58 | | It is possible to have an inex1 exception with the | ||
59 | | snan. If the inex enable bit is set in the FPCR, and either | ||
60 | | inex2 or inex1 occurred, we must clean up and branch to the | ||
61 | | real inex handler. | ||
62 | | | ||
63 | ck_inex: | ||
64 | moveb FPCR_ENABLE(%a6),%d0 | ||
65 | andb FPSR_EXCEPT(%a6),%d0 | ||
66 | andib #0x3,%d0 | ||
67 | beq end_snan | ||
68 | | | ||
69 | | Inexact enabled and reported, and we must take an inexact exception. | ||
70 | | | ||
71 | take_inex: | ||
72 | moveb #INEX_VEC,EXC_VEC+1(%a6) | ||
73 | moveml USER_DA(%a6),%d0-%d1/%a0-%a1 | ||
74 | fmovemx USER_FP0(%a6),%fp0-%fp3 | ||
75 | fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar | ||
76 | frestore (%a7)+ | ||
77 | unlk %a6 | ||
78 | bral real_inex | ||
79 | | | ||
80 | | SNAN is enabled. Check if inst is move_out. | ||
81 | | Make any corrections to the 040 output as necessary. | ||
82 | | | ||
83 | ena: | ||
84 | btstb #5,CMDREG1B(%a6) |if set, inst is move out | ||
85 | beq not_out | ||
86 | |||
87 | bsrl move_out | ||
88 | |||
89 | report_snan: | ||
90 | moveb (%a7),VER_TMP(%a6) | ||
91 | cmpib #VER_40,(%a7) |test for orig unimp frame | ||
92 | bnes ck_rev | ||
93 | moveql #13,%d0 |need to zero 14 lwords | ||
94 | bras rep_con | ||
95 | ck_rev: | ||
96 | moveql #11,%d0 |need to zero 12 lwords | ||
97 | rep_con: | ||
98 | clrl (%a7) | ||
99 | loop1: | ||
100 | clrl -(%a7) |clear and dec a7 | ||
101 | dbra %d0,loop1 | ||
102 | moveb VER_TMP(%a6),(%a7) |format a busy frame | ||
103 | moveb #BUSY_SIZE-4,1(%a7) | ||
104 | movel USER_FPSR(%a6),FPSR_SHADOW(%a6) | ||
105 | orl #sx_mask,E_BYTE(%a6) | ||
106 | moveml USER_DA(%a6),%d0-%d1/%a0-%a1 | ||
107 | fmovemx USER_FP0(%a6),%fp0-%fp3 | ||
108 | fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar | ||
109 | frestore (%a7)+ | ||
110 | unlk %a6 | ||
111 | bral real_snan | ||
112 | | | ||
113 | | Exit snan handler by expanding the unimp frame into a busy frame | ||
114 | | | ||
115 | end_snan: | ||
116 | bclrb #E1,E_BYTE(%a6) | ||
117 | |||
118 | moveb (%a7),VER_TMP(%a6) | ||
119 | cmpib #VER_40,(%a7) |test for orig unimp frame | ||
120 | bnes ck_rev2 | ||
121 | moveql #13,%d0 |need to zero 14 lwords | ||
122 | bras rep_con2 | ||
123 | ck_rev2: | ||
124 | moveql #11,%d0 |need to zero 12 lwords | ||
125 | rep_con2: | ||
126 | clrl (%a7) | ||
127 | loop2: | ||
128 | clrl -(%a7) |clear and dec a7 | ||
129 | dbra %d0,loop2 | ||
130 | moveb VER_TMP(%a6),(%a7) |format a busy frame | ||
131 | moveb #BUSY_SIZE-4,1(%a7) |write busy size | ||
132 | movel USER_FPSR(%a6),FPSR_SHADOW(%a6) | ||
133 | orl #sx_mask,E_BYTE(%a6) | ||
134 | moveml USER_DA(%a6),%d0-%d1/%a0-%a1 | ||
135 | fmovemx USER_FP0(%a6),%fp0-%fp3 | ||
136 | fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar | ||
137 | frestore (%a7)+ | ||
138 | unlk %a6 | ||
139 | bral fpsp_done | ||
140 | |||
141 | | | ||
142 | | Move_out | ||
143 | | | ||
144 | move_out: | ||
145 | movel EXC_EA(%a6),%a0 |get <ea> from exc frame | ||
146 | |||
147 | bfextu CMDREG1B(%a6){#3:#3},%d0 |move rx field to d0{2:0} | ||
148 | cmpil #0,%d0 |check for long | ||
149 | beqs sto_long |branch if move_out long | ||
150 | |||
151 | cmpil #4,%d0 |check for word | ||
152 | beqs sto_word |branch if move_out word | ||
153 | |||
154 | cmpil #6,%d0 |check for byte | ||
155 | beqs sto_byte |branch if move_out byte | ||
156 | |||
157 | | | ||
158 | | Not byte, word or long | ||
159 | | | ||
160 | rts | ||
161 | | | ||
162 | | Get the 32 most significant bits of etemp mantissa | ||
163 | | | ||
164 | sto_long: | ||
165 | movel ETEMP_HI(%a6),%d1 | ||
166 | movel #4,%d0 |load byte count | ||
167 | | | ||
168 | | Set signalling nan bit | ||
169 | | | ||
170 | bsetl #30,%d1 | ||
171 | | | ||
172 | | Store to the users destination address | ||
173 | | | ||
174 | tstl %a0 |check if <ea> is 0 | ||
175 | beqs wrt_dn |destination is a data register | ||
176 | |||
177 | movel %d1,-(%a7) |move the snan onto the stack | ||
178 | movel %a0,%a1 |load dest addr into a1 | ||
179 | movel %a7,%a0 |load src addr of snan into a0 | ||
180 | bsrl mem_write |write snan to user memory | ||
181 | movel (%a7)+,%d1 |clear off stack | ||
182 | rts | ||
183 | | | ||
184 | | Get the 16 most significant bits of etemp mantissa | ||
185 | | | ||
186 | sto_word: | ||
187 | movel ETEMP_HI(%a6),%d1 | ||
188 | movel #2,%d0 |load byte count | ||
189 | | | ||
190 | | Set signalling nan bit | ||
191 | | | ||
192 | bsetl #30,%d1 | ||
193 | | | ||
194 | | Store to the users destination address | ||
195 | | | ||
196 | tstl %a0 |check if <ea> is 0 | ||
197 | beqs wrt_dn |destination is a data register | ||
198 | |||
199 | movel %d1,-(%a7) |move the snan onto the stack | ||
200 | movel %a0,%a1 |load dest addr into a1 | ||
201 | movel %a7,%a0 |point to low word | ||
202 | bsrl mem_write |write snan to user memory | ||
203 | movel (%a7)+,%d1 |clear off stack | ||
204 | rts | ||
205 | | | ||
206 | | Get the 8 most significant bits of etemp mantissa | ||
207 | | | ||
208 | sto_byte: | ||
209 | movel ETEMP_HI(%a6),%d1 | ||
210 | movel #1,%d0 |load byte count | ||
211 | | | ||
212 | | Set signalling nan bit | ||
213 | | | ||
214 | bsetl #30,%d1 | ||
215 | | | ||
216 | | Store to the users destination address | ||
217 | | | ||
218 | tstl %a0 |check if <ea> is 0 | ||
219 | beqs wrt_dn |destination is a data register | ||
220 | movel %d1,-(%a7) |move the snan onto the stack | ||
221 | movel %a0,%a1 |load dest addr into a1 | ||
222 | movel %a7,%a0 |point to source byte | ||
223 | bsrl mem_write |write snan to user memory | ||
224 | movel (%a7)+,%d1 |clear off stack | ||
225 | rts | ||
226 | |||
227 | | | ||
228 | | wrt_dn --- write to a data register | ||
229 | | | ||
230 | | We get here with D1 containing the data to write and D0 the | ||
231 | | number of bytes to write: 1=byte,2=word,4=long. | ||
232 | | | ||
233 | wrt_dn: | ||
234 | movel %d1,L_SCR1(%a6) |data | ||
235 | movel %d0,-(%a7) |size | ||
236 | bsrl get_fline |returns fline word in d0 | ||
237 | movel %d0,%d1 | ||
238 | andil #0x7,%d1 |d1 now holds register number | ||
239 | movel (%sp)+,%d0 |get original size | ||
240 | cmpil #4,%d0 | ||
241 | beqs wrt_long | ||
242 | cmpil #2,%d0 | ||
243 | bnes wrt_byte | ||
244 | wrt_word: | ||
245 | orl #0x8,%d1 | ||
246 | bral reg_dest | ||
247 | wrt_long: | ||
248 | orl #0x10,%d1 | ||
249 | bral reg_dest | ||
250 | wrt_byte: | ||
251 | bral reg_dest | ||
252 | | | ||
253 | | Check if it is a src nan or dst nan | ||
254 | | | ||
255 | not_out: | ||
256 | movel DTAG(%a6),%d0 | ||
257 | bfextu %d0{#0:#3},%d0 |isolate dtag in lsbs | ||
258 | |||
259 | cmpib #3,%d0 |check for nan in destination | ||
260 | bnes issrc |destination nan has priority | ||
261 | dst_nan: | ||
262 | btstb #6,FPTEMP_HI(%a6) |check if dest nan is an snan | ||
263 | bnes issrc |no, so check source for snan | ||
264 | movew FPTEMP_EX(%a6),%d0 | ||
265 | bras cont | ||
266 | issrc: | ||
267 | movew ETEMP_EX(%a6),%d0 | ||
268 | cont: | ||
269 | btstl #15,%d0 |test for sign of snan | ||
270 | beqs clr_neg | ||
271 | bsetb #neg_bit,FPSR_CC(%a6) | ||
272 | bra report_snan | ||
273 | clr_neg: | ||
274 | bclrb #neg_bit,FPSR_CC(%a6) | ||
275 | bra report_snan | ||
276 | |||
277 | |end | ||
diff --git a/arch/m68k/fpsp040/x_store.S b/arch/m68k/fpsp040/x_store.S new file mode 100644 index 000000000000..4282fa67d449 --- /dev/null +++ b/arch/m68k/fpsp040/x_store.S | |||
@@ -0,0 +1,256 @@ | |||
1 | | | ||
2 | | x_store.sa 3.2 1/24/91 | ||
3 | | | ||
4 | | store --- store operand to memory or register | ||
5 | | | ||
6 | | Used by underflow and overflow handlers. | ||
7 | | | ||
8 | | a6 = points to fp value to be stored. | ||
9 | | | ||
10 | |||
11 | | Copyright (C) Motorola, Inc. 1990 | ||
12 | | All Rights Reserved | ||
13 | | | ||
14 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
15 | | The copyright notice above does not evidence any | ||
16 | | actual or intended publication of such source code. | ||
17 | |||
18 | X_STORE: |idnt 2,1 | Motorola 040 Floating Point Software Package | ||
19 | |||
20 | |section 8 | ||
21 | |||
22 | fpreg_mask: | ||
23 | .byte 0x80,0x40,0x20,0x10,0x08,0x04,0x02,0x01 | ||
24 | |||
25 | #include "fpsp.h" | ||
26 | |||
27 | |xref mem_write | ||
28 | |xref get_fline | ||
29 | |xref g_opcls | ||
30 | |xref g_dfmtou | ||
31 | |xref reg_dest | ||
32 | |||
33 | .global dest_ext | ||
34 | .global dest_dbl | ||
35 | .global dest_sgl | ||
36 | |||
37 | .global store | ||
38 | store: | ||
39 | btstb #E3,E_BYTE(%a6) | ||
40 | beqs E1_sto | ||
41 | E3_sto: | ||
42 | movel CMDREG3B(%a6),%d0 | ||
43 | bfextu %d0{#6:#3},%d0 |isolate dest. reg from cmdreg3b | ||
44 | sto_fp: | ||
45 | lea fpreg_mask,%a1 | ||
46 | moveb (%a1,%d0.w),%d0 |convert reg# to dynamic register mask | ||
47 | tstb LOCAL_SGN(%a0) | ||
48 | beqs is_pos | ||
49 | bsetb #sign_bit,LOCAL_EX(%a0) | ||
50 | is_pos: | ||
51 | fmovemx (%a0),%d0 |move to correct register | ||
52 | | | ||
53 | | if fp0-fp3 is being modified, we must put a copy | ||
54 | | in the USER_FPn variable on the stack because all exception | ||
55 | | handlers restore fp0-fp3 from there. | ||
56 | | | ||
57 | cmpb #0x80,%d0 | ||
58 | bnes not_fp0 | ||
59 | fmovemx %fp0-%fp0,USER_FP0(%a6) | ||
60 | rts | ||
61 | not_fp0: | ||
62 | cmpb #0x40,%d0 | ||
63 | bnes not_fp1 | ||
64 | fmovemx %fp1-%fp1,USER_FP1(%a6) | ||
65 | rts | ||
66 | not_fp1: | ||
67 | cmpb #0x20,%d0 | ||
68 | bnes not_fp2 | ||
69 | fmovemx %fp2-%fp2,USER_FP2(%a6) | ||
70 | rts | ||
71 | not_fp2: | ||
72 | cmpb #0x10,%d0 | ||
73 | bnes not_fp3 | ||
74 | fmovemx %fp3-%fp3,USER_FP3(%a6) | ||
75 | rts | ||
76 | not_fp3: | ||
77 | rts | ||
78 | |||
79 | E1_sto: | ||
80 | bsrl g_opcls |returns opclass in d0 | ||
81 | cmpib #3,%d0 | ||
82 | beq opc011 |branch if opclass 3 | ||
83 | movel CMDREG1B(%a6),%d0 | ||
84 | bfextu %d0{#6:#3},%d0 |extract destination register | ||
85 | bras sto_fp | ||
86 | |||
87 | opc011: | ||
88 | bsrl g_dfmtou |returns dest format in d0 | ||
89 | | ;ext=00, sgl=01, dbl=10 | ||
90 | movel %a0,%a1 |save source addr in a1 | ||
91 | movel EXC_EA(%a6),%a0 |get the address | ||
92 | cmpil #0,%d0 |if dest format is extended | ||
93 | beq dest_ext |then branch | ||
94 | cmpil #1,%d0 |if dest format is single | ||
95 | beq dest_sgl |then branch | ||
96 | | | ||
97 | | fall through to dest_dbl | ||
98 | | | ||
99 | |||
100 | | | ||
101 | | dest_dbl --- write double precision value to user space | ||
102 | | | ||
103 | |Input | ||
104 | | a0 -> destination address | ||
105 | | a1 -> source in extended precision | ||
106 | |Output | ||
107 | | a0 -> destroyed | ||
108 | | a1 -> destroyed | ||
109 | | d0 -> 0 | ||
110 | | | ||
111 | |Changes extended precision to double precision. | ||
112 | | Note: no attempt is made to round the extended value to double. | ||
113 | | dbl_sign = ext_sign | ||
114 | | dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias) | ||
115 | | get rid of ext integer bit | ||
116 | | dbl_mant = ext_mant{62:12} | ||
117 | | | ||
118 | | --------------- --------------- --------------- | ||
119 | | extended -> |s| exp | |1| ms mant | | ls mant | | ||
120 | | --------------- --------------- --------------- | ||
121 | | 95 64 63 62 32 31 11 0 | ||
122 | | | | | ||
123 | | | | | ||
124 | | | | | ||
125 | | v v | ||
126 | | --------------- --------------- | ||
127 | | double -> |s|exp| mant | | mant | | ||
128 | | --------------- --------------- | ||
129 | | 63 51 32 31 0 | ||
130 | | | ||
131 | dest_dbl: | ||
132 | clrl %d0 |clear d0 | ||
133 | movew LOCAL_EX(%a1),%d0 |get exponent | ||
134 | subw #0x3fff,%d0 |subtract extended precision bias | ||
135 | cmpw #0x4000,%d0 |check if inf | ||
136 | beqs inf |if so, special case | ||
137 | addw #0x3ff,%d0 |add double precision bias | ||
138 | swap %d0 |d0 now in upper word | ||
139 | lsll #4,%d0 |d0 now in proper place for dbl prec exp | ||
140 | tstb LOCAL_SGN(%a1) | ||
141 | beqs get_mant |if positive, go process mantissa | ||
142 | bsetl #31,%d0 |if negative, put in sign information | ||
143 | | ; before continuing | ||
144 | bras get_mant |go process mantissa | ||
145 | inf: | ||
146 | movel #0x7ff00000,%d0 |load dbl inf exponent | ||
147 | clrl LOCAL_HI(%a1) |clear msb | ||
148 | tstb LOCAL_SGN(%a1) | ||
149 | beqs dbl_inf |if positive, go ahead and write it | ||
150 | bsetl #31,%d0 |if negative put in sign information | ||
151 | dbl_inf: | ||
152 | movel %d0,LOCAL_EX(%a1) |put the new exp back on the stack | ||
153 | bras dbl_wrt | ||
154 | get_mant: | ||
155 | movel LOCAL_HI(%a1),%d1 |get ms mantissa | ||
156 | bfextu %d1{#1:#20},%d1 |get upper 20 bits of ms | ||
157 | orl %d1,%d0 |put these bits in ms word of double | ||
158 | movel %d0,LOCAL_EX(%a1) |put the new exp back on the stack | ||
159 | movel LOCAL_HI(%a1),%d1 |get ms mantissa | ||
160 | movel #21,%d0 |load shift count | ||
161 | lsll %d0,%d1 |put lower 11 bits in upper bits | ||
162 | movel %d1,LOCAL_HI(%a1) |build lower lword in memory | ||
163 | movel LOCAL_LO(%a1),%d1 |get ls mantissa | ||
164 | bfextu %d1{#0:#21},%d0 |get ls 21 bits of double | ||
165 | orl %d0,LOCAL_HI(%a1) |put them in double result | ||
166 | dbl_wrt: | ||
167 | movel #0x8,%d0 |byte count for double precision number | ||
168 | exg %a0,%a1 |a0=supervisor source, a1=user dest | ||
169 | bsrl mem_write |move the number to the user's memory | ||
170 | rts | ||
171 | | | ||
172 | | dest_sgl --- write single precision value to user space | ||
173 | | | ||
174 | |Input | ||
175 | | a0 -> destination address | ||
176 | | a1 -> source in extended precision | ||
177 | | | ||
178 | |Output | ||
179 | | a0 -> destroyed | ||
180 | | a1 -> destroyed | ||
181 | | d0 -> 0 | ||
182 | | | ||
183 | |Changes extended precision to single precision. | ||
184 | | sgl_sign = ext_sign | ||
185 | | sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias) | ||
186 | | get rid of ext integer bit | ||
187 | | sgl_mant = ext_mant{62:12} | ||
188 | | | ||
189 | | --------------- --------------- --------------- | ||
190 | | extended -> |s| exp | |1| ms mant | | ls mant | | ||
191 | | --------------- --------------- --------------- | ||
192 | | 95 64 63 62 40 32 31 12 0 | ||
193 | | | | | ||
194 | | | | | ||
195 | | | | | ||
196 | | v v | ||
197 | | --------------- | ||
198 | | single -> |s|exp| mant | | ||
199 | | --------------- | ||
200 | | 31 22 0 | ||
201 | | | ||
202 | dest_sgl: | ||
203 | clrl %d0 | ||
204 | movew LOCAL_EX(%a1),%d0 |get exponent | ||
205 | subw #0x3fff,%d0 |subtract extended precision bias | ||
206 | cmpw #0x4000,%d0 |check if inf | ||
207 | beqs sinf |if so, special case | ||
208 | addw #0x7f,%d0 |add single precision bias | ||
209 | swap %d0 |put exp in upper word of d0 | ||
210 | lsll #7,%d0 |shift it into single exp bits | ||
211 | tstb LOCAL_SGN(%a1) | ||
212 | beqs get_sman |if positive, continue | ||
213 | bsetl #31,%d0 |if negative, put in sign first | ||
214 | bras get_sman |get mantissa | ||
215 | sinf: | ||
216 | movel #0x7f800000,%d0 |load single inf exp to d0 | ||
217 | tstb LOCAL_SGN(%a1) | ||
218 | beqs sgl_wrt |if positive, continue | ||
219 | bsetl #31,%d0 |if negative, put in sign info | ||
220 | bras sgl_wrt | ||
221 | |||
222 | get_sman: | ||
223 | movel LOCAL_HI(%a1),%d1 |get ms mantissa | ||
224 | bfextu %d1{#1:#23},%d1 |get upper 23 bits of ms | ||
225 | orl %d1,%d0 |put these bits in ms word of single | ||
226 | |||
227 | sgl_wrt: | ||
228 | movel %d0,L_SCR1(%a6) |put the new exp back on the stack | ||
229 | movel #0x4,%d0 |byte count for single precision number | ||
230 | tstl %a0 |users destination address | ||
231 | beqs sgl_Dn |destination is a data register | ||
232 | exg %a0,%a1 |a0=supervisor source, a1=user dest | ||
233 | leal L_SCR1(%a6),%a0 |point a0 to data | ||
234 | bsrl mem_write |move the number to the user's memory | ||
235 | rts | ||
236 | sgl_Dn: | ||
237 | bsrl get_fline |returns fline word in d0 | ||
238 | andw #0x7,%d0 |isolate register number | ||
239 | movel %d0,%d1 |d1 has size:reg formatted for reg_dest | ||
240 | orl #0x10,%d1 |reg_dest wants size added to reg# | ||
241 | bral reg_dest |size is X, rts in reg_dest will | ||
242 | | ;return to caller of dest_sgl | ||
243 | |||
244 | dest_ext: | ||
245 | tstb LOCAL_SGN(%a1) |put back sign into exponent word | ||
246 | beqs dstx_cont | ||
247 | bsetb #sign_bit,LOCAL_EX(%a1) | ||
248 | dstx_cont: | ||
249 | clrb LOCAL_SGN(%a1) |clear out the sign byte | ||
250 | |||
251 | movel #0x0c,%d0 |byte count for extended number | ||
252 | exg %a0,%a1 |a0=supervisor source, a1=user dest | ||
253 | bsrl mem_write |move the number to the user's memory | ||
254 | rts | ||
255 | |||
256 | |end | ||
diff --git a/arch/m68k/fpsp040/x_unfl.S b/arch/m68k/fpsp040/x_unfl.S new file mode 100644 index 000000000000..077fcc230fcc --- /dev/null +++ b/arch/m68k/fpsp040/x_unfl.S | |||
@@ -0,0 +1,269 @@ | |||
1 | | | ||
2 | | x_unfl.sa 3.4 7/1/91 | ||
3 | | | ||
4 | | fpsp_unfl --- FPSP handler for underflow exception | ||
5 | | | ||
6 | | Trap disabled results | ||
7 | | For 881/2 compatibility, sw must denormalize the intermediate | ||
8 | | result, then store the result. Denormalization is accomplished | ||
9 | | by taking the intermediate result (which is always normalized) and | ||
10 | | shifting the mantissa right while incrementing the exponent until | ||
11 | | it is equal to the denormalized exponent for the destination | ||
12 | | format. After denormalization, the result is rounded to the | ||
13 | | destination format. | ||
14 | | | ||
15 | | Trap enabled results | ||
16 | | All trap disabled code applies. In addition the exceptional | ||
17 | | operand needs to made available to the user with a bias of $6000 | ||
18 | | added to the exponent. | ||
19 | | | ||
20 | |||
21 | | Copyright (C) Motorola, Inc. 1990 | ||
22 | | All Rights Reserved | ||
23 | | | ||
24 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
25 | | The copyright notice above does not evidence any | ||
26 | | actual or intended publication of such source code. | ||
27 | |||
28 | X_UNFL: |idnt 2,1 | Motorola 040 Floating Point Software Package | ||
29 | |||
30 | |section 8 | ||
31 | |||
32 | #include "fpsp.h" | ||
33 | |||
34 | |xref denorm | ||
35 | |xref round | ||
36 | |xref store | ||
37 | |xref g_rndpr | ||
38 | |xref g_opcls | ||
39 | |xref g_dfmtou | ||
40 | |xref real_unfl | ||
41 | |xref real_inex | ||
42 | |xref fpsp_done | ||
43 | |xref b1238_fix | ||
44 | |||
45 | .global fpsp_unfl | ||
46 | fpsp_unfl: | ||
47 | link %a6,#-LOCAL_SIZE | ||
48 | fsave -(%a7) | ||
49 | moveml %d0-%d1/%a0-%a1,USER_DA(%a6) | ||
50 | fmovemx %fp0-%fp3,USER_FP0(%a6) | ||
51 | fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6) | ||
52 | |||
53 | | | ||
54 | bsrl unf_res |denormalize, round & store interm op | ||
55 | | | ||
56 | | If underflow exceptions are not enabled, check for inexact | ||
57 | | exception | ||
58 | | | ||
59 | btstb #unfl_bit,FPCR_ENABLE(%a6) | ||
60 | beqs ck_inex | ||
61 | |||
62 | btstb #E3,E_BYTE(%a6) | ||
63 | beqs no_e3_1 | ||
64 | | | ||
65 | | Clear dirty bit on dest resister in the frame before branching | ||
66 | | to b1238_fix. | ||
67 | | | ||
68 | bfextu CMDREG3B(%a6){#6:#3},%d0 |get dest reg no | ||
69 | bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit | ||
70 | bsrl b1238_fix |test for bug1238 case | ||
71 | movel USER_FPSR(%a6),FPSR_SHADOW(%a6) | ||
72 | orl #sx_mask,E_BYTE(%a6) | ||
73 | no_e3_1: | ||
74 | moveml USER_DA(%a6),%d0-%d1/%a0-%a1 | ||
75 | fmovemx USER_FP0(%a6),%fp0-%fp3 | ||
76 | fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar | ||
77 | frestore (%a7)+ | ||
78 | unlk %a6 | ||
79 | bral real_unfl | ||
80 | | | ||
81 | | It is possible to have either inex2 or inex1 exceptions with the | ||
82 | | unfl. If the inex enable bit is set in the FPCR, and either | ||
83 | | inex2 or inex1 occurred, we must clean up and branch to the | ||
84 | | real inex handler. | ||
85 | | | ||
86 | ck_inex: | ||
87 | moveb FPCR_ENABLE(%a6),%d0 | ||
88 | andb FPSR_EXCEPT(%a6),%d0 | ||
89 | andib #0x3,%d0 | ||
90 | beqs unfl_done | ||
91 | |||
92 | | | ||
93 | | Inexact enabled and reported, and we must take an inexact exception | ||
94 | | | ||
95 | take_inex: | ||
96 | btstb #E3,E_BYTE(%a6) | ||
97 | beqs no_e3_2 | ||
98 | | | ||
99 | | Clear dirty bit on dest resister in the frame before branching | ||
100 | | to b1238_fix. | ||
101 | | | ||
102 | bfextu CMDREG3B(%a6){#6:#3},%d0 |get dest reg no | ||
103 | bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit | ||
104 | bsrl b1238_fix |test for bug1238 case | ||
105 | movel USER_FPSR(%a6),FPSR_SHADOW(%a6) | ||
106 | orl #sx_mask,E_BYTE(%a6) | ||
107 | no_e3_2: | ||
108 | moveb #INEX_VEC,EXC_VEC+1(%a6) | ||
109 | moveml USER_DA(%a6),%d0-%d1/%a0-%a1 | ||
110 | fmovemx USER_FP0(%a6),%fp0-%fp3 | ||
111 | fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar | ||
112 | frestore (%a7)+ | ||
113 | unlk %a6 | ||
114 | bral real_inex | ||
115 | |||
116 | unfl_done: | ||
117 | bclrb #E3,E_BYTE(%a6) | ||
118 | beqs e1_set |if set then branch | ||
119 | | | ||
120 | | Clear dirty bit on dest resister in the frame before branching | ||
121 | | to b1238_fix. | ||
122 | | | ||
123 | bfextu CMDREG3B(%a6){#6:#3},%d0 |get dest reg no | ||
124 | bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit | ||
125 | bsrl b1238_fix |test for bug1238 case | ||
126 | movel USER_FPSR(%a6),FPSR_SHADOW(%a6) | ||
127 | orl #sx_mask,E_BYTE(%a6) | ||
128 | moveml USER_DA(%a6),%d0-%d1/%a0-%a1 | ||
129 | fmovemx USER_FP0(%a6),%fp0-%fp3 | ||
130 | fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar | ||
131 | frestore (%a7)+ | ||
132 | unlk %a6 | ||
133 | bral fpsp_done | ||
134 | e1_set: | ||
135 | moveml USER_DA(%a6),%d0-%d1/%a0-%a1 | ||
136 | fmovemx USER_FP0(%a6),%fp0-%fp3 | ||
137 | fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar | ||
138 | unlk %a6 | ||
139 | bral fpsp_done | ||
140 | | | ||
141 | | unf_res --- underflow result calculation | ||
142 | | | ||
143 | unf_res: | ||
144 | bsrl g_rndpr |returns RND_PREC in d0 0=ext, | ||
145 | | ;1=sgl, 2=dbl | ||
146 | | ;we need the RND_PREC in the | ||
147 | | ;upper word for round | ||
148 | movew #0,-(%a7) | ||
149 | movew %d0,-(%a7) |copy RND_PREC to stack | ||
150 | | | ||
151 | | | ||
152 | | If the exception bit set is E3, the exceptional operand from the | ||
153 | | fpu is in WBTEMP; else it is in FPTEMP. | ||
154 | | | ||
155 | btstb #E3,E_BYTE(%a6) | ||
156 | beqs unf_E1 | ||
157 | unf_E3: | ||
158 | lea WBTEMP(%a6),%a0 |a0 now points to operand | ||
159 | | | ||
160 | | Test for fsgldiv and fsglmul. If the inst was one of these, then | ||
161 | | force the precision to extended for the denorm routine. Use | ||
162 | | the user's precision for the round routine. | ||
163 | | | ||
164 | movew CMDREG3B(%a6),%d1 |check for fsgldiv or fsglmul | ||
165 | andiw #0x7f,%d1 | ||
166 | cmpiw #0x30,%d1 |check for sgldiv | ||
167 | beqs unf_sgl | ||
168 | cmpiw #0x33,%d1 |check for sglmul | ||
169 | bnes unf_cont |if not, use fpcr prec in round | ||
170 | unf_sgl: | ||
171 | clrl %d0 | ||
172 | movew #0x1,(%a7) |override g_rndpr precision | ||
173 | | ;force single | ||
174 | bras unf_cont | ||
175 | unf_E1: | ||
176 | lea FPTEMP(%a6),%a0 |a0 now points to operand | ||
177 | unf_cont: | ||
178 | bclrb #sign_bit,LOCAL_EX(%a0) |clear sign bit | ||
179 | sne LOCAL_SGN(%a0) |store sign | ||
180 | |||
181 | bsrl denorm |returns denorm, a0 points to it | ||
182 | | | ||
183 | | WARNING: | ||
184 | | ;d0 has guard,round sticky bit | ||
185 | | ;make sure that it is not corrupted | ||
186 | | ;before it reaches the round subroutine | ||
187 | | ;also ensure that a0 isn't corrupted | ||
188 | |||
189 | | | ||
190 | | Set up d1 for round subroutine d1 contains the PREC/MODE | ||
191 | | information respectively on upper/lower register halves. | ||
192 | | | ||
193 | bfextu FPCR_MODE(%a6){#2:#2},%d1 |get mode from FPCR | ||
194 | | ;mode in lower d1 | ||
195 | addl (%a7)+,%d1 |merge PREC/MODE | ||
196 | | | ||
197 | | WARNING: a0 and d0 are assumed to be intact between the denorm and | ||
198 | | round subroutines. All code between these two subroutines | ||
199 | | must not corrupt a0 and d0. | ||
200 | | | ||
201 | | | ||
202 | | Perform Round | ||
203 | | Input: a0 points to input operand | ||
204 | | d0{31:29} has guard, round, sticky | ||
205 | | d1{01:00} has rounding mode | ||
206 | | d1{17:16} has rounding precision | ||
207 | | Output: a0 points to rounded operand | ||
208 | | | ||
209 | |||
210 | bsrl round |returns rounded denorm at (a0) | ||
211 | | | ||
212 | | Differentiate between store to memory vs. store to register | ||
213 | | | ||
214 | unf_store: | ||
215 | bsrl g_opcls |returns opclass in d0{2:0} | ||
216 | cmpib #0x3,%d0 | ||
217 | bnes not_opc011 | ||
218 | | | ||
219 | | At this point, a store to memory is pending | ||
220 | | | ||
221 | opc011: | ||
222 | bsrl g_dfmtou | ||
223 | tstb %d0 | ||
224 | beqs ext_opc011 |If extended, do not subtract | ||
225 | | ;If destination format is sgl/dbl, | ||
226 | tstb LOCAL_HI(%a0) |If rounded result is normal,don't | ||
227 | | ;subtract | ||
228 | bmis ext_opc011 | ||
229 | subqw #1,LOCAL_EX(%a0) |account for denorm bias vs. | ||
230 | | ;normalized bias | ||
231 | | ; normalized denormalized | ||
232 | | ;single $7f $7e | ||
233 | | ;double $3ff $3fe | ||
234 | | | ||
235 | ext_opc011: | ||
236 | bsrl store |stores to memory | ||
237 | bras unf_done |finish up | ||
238 | |||
239 | | | ||
240 | | At this point, a store to a float register is pending | ||
241 | | | ||
242 | not_opc011: | ||
243 | bsrl store |stores to float register | ||
244 | | ;a0 is not corrupted on a store to a | ||
245 | | ;float register. | ||
246 | | | ||
247 | | Set the condition codes according to result | ||
248 | | | ||
249 | tstl LOCAL_HI(%a0) |check upper mantissa | ||
250 | bnes ck_sgn | ||
251 | tstl LOCAL_LO(%a0) |check lower mantissa | ||
252 | bnes ck_sgn | ||
253 | bsetb #z_bit,FPSR_CC(%a6) |set condition codes if zero | ||
254 | ck_sgn: | ||
255 | btstb #sign_bit,LOCAL_EX(%a0) |check the sign bit | ||
256 | beqs unf_done | ||
257 | bsetb #neg_bit,FPSR_CC(%a6) | ||
258 | |||
259 | | | ||
260 | | Finish. | ||
261 | | | ||
262 | unf_done: | ||
263 | btstb #inex2_bit,FPSR_EXCEPT(%a6) | ||
264 | beqs no_aunfl | ||
265 | bsetb #aunfl_bit,FPSR_AEXCEPT(%a6) | ||
266 | no_aunfl: | ||
267 | rts | ||
268 | |||
269 | |end | ||
diff --git a/arch/m68k/fpsp040/x_unimp.S b/arch/m68k/fpsp040/x_unimp.S new file mode 100644 index 000000000000..920cb9410e9e --- /dev/null +++ b/arch/m68k/fpsp040/x_unimp.S | |||
@@ -0,0 +1,77 @@ | |||
1 | | | ||
2 | | x_unimp.sa 3.3 7/1/91 | ||
3 | | | ||
4 | | fpsp_unimp --- FPSP handler for unimplemented instruction | ||
5 | | exception. | ||
6 | | | ||
7 | | Invoked when the user program encounters a floating-point | ||
8 | | op-code that hardware does not support. Trap vector# 11 | ||
9 | | (See table 8-1 MC68030 User's Manual). | ||
10 | | | ||
11 | | | ||
12 | | Note: An fsave for an unimplemented inst. will create a short | ||
13 | | fsave stack. | ||
14 | | | ||
15 | | Input: 1. Six word stack frame for unimplemented inst, four word | ||
16 | | for illegal | ||
17 | | (See table 8-7 MC68030 User's Manual). | ||
18 | | 2. Unimp (short) fsave state frame created here by fsave | ||
19 | | instruction. | ||
20 | | | ||
21 | | | ||
22 | | Copyright (C) Motorola, Inc. 1990 | ||
23 | | All Rights Reserved | ||
24 | | | ||
25 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
26 | | The copyright notice above does not evidence any | ||
27 | | actual or intended publication of such source code. | ||
28 | |||
29 | X_UNIMP: |idnt 2,1 | Motorola 040 Floating Point Software Package | ||
30 | |||
31 | |section 8 | ||
32 | |||
33 | #include "fpsp.h" | ||
34 | |||
35 | |xref get_op | ||
36 | |xref do_func | ||
37 | |xref sto_res | ||
38 | |xref gen_except | ||
39 | |xref fpsp_fmt_error | ||
40 | |||
41 | .global fpsp_unimp | ||
42 | .global uni_2 | ||
43 | fpsp_unimp: | ||
44 | link %a6,#-LOCAL_SIZE | ||
45 | fsave -(%a7) | ||
46 | uni_2: | ||
47 | moveml %d0-%d1/%a0-%a1,USER_DA(%a6) | ||
48 | fmovemx %fp0-%fp3,USER_FP0(%a6) | ||
49 | fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6) | ||
50 | moveb (%a7),%d0 |test for valid version num | ||
51 | andib #0xf0,%d0 |test for $4x | ||
52 | cmpib #VER_4,%d0 |must be $4x or exit | ||
53 | bnel fpsp_fmt_error | ||
54 | | | ||
55 | | Temporary D25B Fix | ||
56 | | The following lines are used to ensure that the FPSR | ||
57 | | exception byte and condition codes are clear before proceeding | ||
58 | | | ||
59 | movel USER_FPSR(%a6),%d0 | ||
60 | andl #0xFF00FF,%d0 |clear all but accrued exceptions | ||
61 | movel %d0,USER_FPSR(%a6) | ||
62 | fmovel #0,%FPSR |clear all user bits | ||
63 | fmovel #0,%FPCR |clear all user exceptions for FPSP | ||
64 | |||
65 | clrb UFLG_TMP(%a6) |clr flag for unsupp data | ||
66 | |||
67 | bsrl get_op |go get operand(s) | ||
68 | clrb STORE_FLG(%a6) | ||
69 | bsrl do_func |do the function | ||
70 | fsave -(%a7) |capture possible exc state | ||
71 | tstb STORE_FLG(%a6) | ||
72 | bnes no_store |if STORE_FLG is set, no store | ||
73 | bsrl sto_res |store the result in user space | ||
74 | no_store: | ||
75 | bral gen_except |post any exceptions and return | ||
76 | |||
77 | |end | ||
diff --git a/arch/m68k/fpsp040/x_unsupp.S b/arch/m68k/fpsp040/x_unsupp.S new file mode 100644 index 000000000000..4ec57285b683 --- /dev/null +++ b/arch/m68k/fpsp040/x_unsupp.S | |||
@@ -0,0 +1,83 @@ | |||
1 | | | ||
2 | | x_unsupp.sa 3.3 7/1/91 | ||
3 | | | ||
4 | | fpsp_unsupp --- FPSP handler for unsupported data type exception | ||
5 | | | ||
6 | | Trap vector #55 (See table 8-1 Mc68030 User's manual). | ||
7 | | Invoked when the user program encounters a data format (packed) that | ||
8 | | hardware does not support or a data type (denormalized numbers or un- | ||
9 | | normalized numbers). | ||
10 | | Normalizes denorms and unnorms, unpacks packed numbers then stores | ||
11 | | them back into the machine to let the 040 finish the operation. | ||
12 | | | ||
13 | | Unsupp calls two routines: | ||
14 | | 1. get_op - gets the operand(s) | ||
15 | | 2. res_func - restore the function back into the 040 or | ||
16 | | if fmove.p fpm,<ea> then pack source (fpm) | ||
17 | | and store in users memory <ea>. | ||
18 | | | ||
19 | | Input: Long fsave stack frame | ||
20 | | | ||
21 | | | ||
22 | |||
23 | | Copyright (C) Motorola, Inc. 1990 | ||
24 | | All Rights Reserved | ||
25 | | | ||
26 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
27 | | The copyright notice above does not evidence any | ||
28 | | actual or intended publication of such source code. | ||
29 | |||
30 | X_UNSUPP: |idnt 2,1 | Motorola 040 Floating Point Software Package | ||
31 | |||
32 | |section 8 | ||
33 | |||
34 | #include "fpsp.h" | ||
35 | |||
36 | |xref get_op | ||
37 | |xref res_func | ||
38 | |xref gen_except | ||
39 | |xref fpsp_fmt_error | ||
40 | |||
41 | .global fpsp_unsupp | ||
42 | fpsp_unsupp: | ||
43 | | | ||
44 | link %a6,#-LOCAL_SIZE | ||
45 | fsave -(%a7) | ||
46 | moveml %d0-%d1/%a0-%a1,USER_DA(%a6) | ||
47 | fmovemx %fp0-%fp3,USER_FP0(%a6) | ||
48 | fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6) | ||
49 | |||
50 | |||
51 | moveb (%a7),VER_TMP(%a6) |save version number | ||
52 | moveb (%a7),%d0 |test for valid version num | ||
53 | andib #0xf0,%d0 |test for $4x | ||
54 | cmpib #VER_4,%d0 |must be $4x or exit | ||
55 | bnel fpsp_fmt_error | ||
56 | |||
57 | fmovel #0,%FPSR |clear all user status bits | ||
58 | fmovel #0,%FPCR |clear all user control bits | ||
59 | | | ||
60 | | The following lines are used to ensure that the FPSR | ||
61 | | exception byte and condition codes are clear before proceeding, | ||
62 | | except in the case of fmove, which leaves the cc's intact. | ||
63 | | | ||
64 | unsupp_con: | ||
65 | movel USER_FPSR(%a6),%d1 | ||
66 | btst #5,CMDREG1B(%a6) |looking for fmove out | ||
67 | bne fmove_con | ||
68 | andl #0xFF00FF,%d1 |clear all but aexcs and qbyte | ||
69 | bras end_fix | ||
70 | fmove_con: | ||
71 | andl #0x0FFF40FF,%d1 |clear all but cc's, snan bit, aexcs, and qbyte | ||
72 | end_fix: | ||
73 | movel %d1,USER_FPSR(%a6) | ||
74 | |||
75 | st UFLG_TMP(%a6) |set flag for unsupp data | ||
76 | |||
77 | bsrl get_op |everything okay, go get operand(s) | ||
78 | bsrl res_func |fix up stack frame so can restore it | ||
79 | clrl -(%a7) | ||
80 | moveb VER_TMP(%a6),(%a7) |move idle fmt word to top of stack | ||
81 | bral gen_except | ||
82 | | | ||
83 | |end | ||