aboutsummaryrefslogtreecommitdiffstats
path: root/arch/m68k/fpsp040
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/m68k/fpsp040
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'arch/m68k/fpsp040')
-rw-r--r--arch/m68k/fpsp040/Makefile16
-rw-r--r--arch/m68k/fpsp040/README30
-rw-r--r--arch/m68k/fpsp040/bindec.S920
-rw-r--r--arch/m68k/fpsp040/binstr.S140
-rw-r--r--arch/m68k/fpsp040/bugfix.S496
-rw-r--r--arch/m68k/fpsp040/decbin.S506
-rw-r--r--arch/m68k/fpsp040/do_func.S559
-rw-r--r--arch/m68k/fpsp040/fpsp.h348
-rw-r--r--arch/m68k/fpsp040/gen_except.S468
-rw-r--r--arch/m68k/fpsp040/get_op.S676
-rw-r--r--arch/m68k/fpsp040/kernel_ex.S494
-rw-r--r--arch/m68k/fpsp040/res_func.S2040
-rw-r--r--arch/m68k/fpsp040/round.S649
-rw-r--r--arch/m68k/fpsp040/sacos.S115
-rw-r--r--arch/m68k/fpsp040/sasin.S104
-rw-r--r--arch/m68k/fpsp040/satan.S478
-rw-r--r--arch/m68k/fpsp040/satanh.S104
-rw-r--r--arch/m68k/fpsp040/scale.S371
-rw-r--r--arch/m68k/fpsp040/scosh.S132
-rw-r--r--arch/m68k/fpsp040/setox.S865
-rw-r--r--arch/m68k/fpsp040/sgetem.S141
-rw-r--r--arch/m68k/fpsp040/sint.S247
-rw-r--r--arch/m68k/fpsp040/skeleton.S516
-rw-r--r--arch/m68k/fpsp040/slog2.S188
-rw-r--r--arch/m68k/fpsp040/slogn.S592
-rw-r--r--arch/m68k/fpsp040/smovecr.S162
-rw-r--r--arch/m68k/fpsp040/srem_mod.S422
-rw-r--r--arch/m68k/fpsp040/ssin.S746
-rw-r--r--arch/m68k/fpsp040/ssinh.S135
-rw-r--r--arch/m68k/fpsp040/stan.S455
-rw-r--r--arch/m68k/fpsp040/stanh.S185
-rw-r--r--arch/m68k/fpsp040/sto_res.S98
-rw-r--r--arch/m68k/fpsp040/stwotox.S427
-rw-r--r--arch/m68k/fpsp040/tbldo.S554
-rw-r--r--arch/m68k/fpsp040/util.S748
-rw-r--r--arch/m68k/fpsp040/x_bsun.S47
-rw-r--r--arch/m68k/fpsp040/x_fline.S104
-rw-r--r--arch/m68k/fpsp040/x_operr.S356
-rw-r--r--arch/m68k/fpsp040/x_ovfl.S186
-rw-r--r--arch/m68k/fpsp040/x_snan.S277
-rw-r--r--arch/m68k/fpsp040/x_store.S256
-rw-r--r--arch/m68k/fpsp040/x_unfl.S269
-rw-r--r--arch/m68k/fpsp040/x_unimp.S77
-rw-r--r--arch/m68k/fpsp040/x_unsupp.S83
44 files changed, 16782 insertions, 0 deletions
diff --git a/arch/m68k/fpsp040/Makefile b/arch/m68k/fpsp040/Makefile
new file mode 100644
index 000000000000..0214d2f6f8b0
--- /dev/null
+++ b/arch/m68k/fpsp040/Makefile
@@ -0,0 +1,16 @@
1#
2# Makefile for Linux arch/m68k/fpsp040 source directory
3#
4
5obj-y := bindec.o binstr.o decbin.o do_func.o gen_except.o get_op.o \
6 kernel_ex.o res_func.o round.o sacos.o sasin.o satan.o satanh.o \
7 scosh.o setox.o sgetem.o sint.o slog2.o slogn.o \
8 smovecr.o srem_mod.o scale.o \
9 ssin.o ssinh.o stan.o stanh.o sto_res.o stwotox.o tbldo.o util.o \
10 x_bsun.o x_fline.o x_operr.o x_ovfl.o x_snan.o x_store.o \
11 x_unfl.o x_unimp.o x_unsupp.o bugfix.o skeleton.o
12
13EXTRA_AFLAGS := -traditional
14EXTRA_LDFLAGS := -x
15
16$(OS_OBJS): fpsp.h
diff --git a/arch/m68k/fpsp040/README b/arch/m68k/fpsp040/README
new file mode 100644
index 000000000000..f5749446033e
--- /dev/null
+++ b/arch/m68k/fpsp040/README
@@ -0,0 +1,30 @@
1
2MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
3M68000 Hi-Performance Microprocessor Division
4M68040 Software Package
5
6M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
7All rights reserved.
8
9THE SOFTWARE is provided on an "AS IS" basis and without warranty.
10To the maximum extent permitted by applicable law,
11MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
12INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
13PARTICULAR PURPOSE and any warranty against infringement with
14regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
15and any accompanying written materials.
16
17To the maximum extent permitted by applicable law,
18IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
19(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
20PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
21OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
22SOFTWARE. Motorola assumes no responsibility for the maintenance
23and support of the SOFTWARE.
24
25You are hereby granted a copyright license to use, modify, and
26distribute the SOFTWARE so long as this entire notice is retained
27without alteration in any modified and/or redistributed versions,
28and that such modified versions are clearly identified as such.
29No licenses are granted by implication, estoppel or otherwise
30under any patents or trademarks of Motorola, Inc.
diff --git a/arch/m68k/fpsp040/bindec.S b/arch/m68k/fpsp040/bindec.S
new file mode 100644
index 000000000000..3ba446a99a12
--- /dev/null
+++ b/arch/m68k/fpsp040/bindec.S
@@ -0,0 +1,920 @@
1|
2| bindec.sa 3.4 1/3/91
3|
4| bindec
5|
6| Description:
7| Converts an input in extended precision format
8| to bcd format.
9|
10| Input:
11| a0 points to the input extended precision value
12| value in memory; d0 contains the k-factor sign-extended
13| to 32-bits. The input may be either normalized,
14| unnormalized, or denormalized.
15|
16| Output: result in the FP_SCR1 space on the stack.
17|
18| Saves and Modifies: D2-D7,A2,FP2
19|
20| Algorithm:
21|
22| A1. Set RM and size ext; Set SIGMA = sign of input.
23| The k-factor is saved for use in d7. Clear the
24| BINDEC_FLG for separating normalized/denormalized
25| input. If input is unnormalized or denormalized,
26| normalize it.
27|
28| A2. Set X = abs(input).
29|
30| A3. Compute ILOG.
31| ILOG is the log base 10 of the input value. It is
32| approximated by adding e + 0.f when the original
33| value is viewed as 2^^e * 1.f in extended precision.
34| This value is stored in d6.
35|
36| A4. Clr INEX bit.
37| The operation in A3 above may have set INEX2.
38|
39| A5. Set ICTR = 0;
40| ICTR is a flag used in A13. It must be set before the
41| loop entry A6.
42|
43| A6. Calculate LEN.
44| LEN is the number of digits to be displayed. The
45| k-factor can dictate either the total number of digits,
46| if it is a positive number, or the number of digits
47| after the decimal point which are to be included as
48| significant. See the 68882 manual for examples.
49| If LEN is computed to be greater than 17, set OPERR in
50| USER_FPSR. LEN is stored in d4.
51|
52| A7. Calculate SCALE.
53| SCALE is equal to 10^ISCALE, where ISCALE is the number
54| of decimal places needed to insure LEN integer digits
55| in the output before conversion to bcd. LAMBDA is the
56| sign of ISCALE, used in A9. Fp1 contains
57| 10^^(abs(ISCALE)) using a rounding mode which is a
58| function of the original rounding mode and the signs
59| of ISCALE and X. A table is given in the code.
60|
61| A8. Clr INEX; Force RZ.
62| The operation in A3 above may have set INEX2.
63| RZ mode is forced for the scaling operation to insure
64| only one rounding error. The grs bits are collected in
65| the INEX flag for use in A10.
66|
67| A9. Scale X -> Y.
68| The mantissa is scaled to the desired number of
69| significant digits. The excess digits are collected
70| in INEX2.
71|
72| A10. Or in INEX.
73| If INEX is set, round error occurred. This is
74| compensated for by 'or-ing' in the INEX2 flag to
75| the lsb of Y.
76|
77| A11. Restore original FPCR; set size ext.
78| Perform FINT operation in the user's rounding mode.
79| Keep the size to extended.
80|
81| A12. Calculate YINT = FINT(Y) according to user's rounding
82| mode. The FPSP routine sintd0 is used. The output
83| is in fp0.
84|
85| A13. Check for LEN digits.
86| If the int operation results in more than LEN digits,
87| or less than LEN -1 digits, adjust ILOG and repeat from
88| A6. This test occurs only on the first pass. If the
89| result is exactly 10^LEN, decrement ILOG and divide
90| the mantissa by 10.
91|
92| A14. Convert the mantissa to bcd.
93| The binstr routine is used to convert the LEN digit
94| mantissa to bcd in memory. The input to binstr is
95| to be a fraction; i.e. (mantissa)/10^LEN and adjusted
96| such that the decimal point is to the left of bit 63.
97| The bcd digits are stored in the correct position in
98| the final string area in memory.
99|
100| A15. Convert the exponent to bcd.
101| As in A14 above, the exp is converted to bcd and the
102| digits are stored in the final string.
103| Test the length of the final exponent string. If the
104| length is 4, set operr.
105|
106| A16. Write sign bits to final string.
107|
108| Implementation Notes:
109|
110| The registers are used as follows:
111|
112| d0: scratch; LEN input to binstr
113| d1: scratch
114| d2: upper 32-bits of mantissa for binstr
115| d3: scratch;lower 32-bits of mantissa for binstr
116| d4: LEN
117| d5: LAMBDA/ICTR
118| d6: ILOG
119| d7: k-factor
120| a0: ptr for original operand/final result
121| a1: scratch pointer
122| a2: pointer to FP_X; abs(original value) in ext
123| fp0: scratch
124| fp1: scratch
125| fp2: scratch
126| F_SCR1:
127| F_SCR2:
128| L_SCR1:
129| L_SCR2:
130
131| Copyright (C) Motorola, Inc. 1990
132| All Rights Reserved
133|
134| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
135| The copyright notice above does not evidence any
136| actual or intended publication of such source code.
137
138|BINDEC idnt 2,1 | Motorola 040 Floating Point Software Package
139
140#include "fpsp.h"
141
142 |section 8
143
144| Constants in extended precision
145LOG2: .long 0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
146LOG2UP1: .long 0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
147
148| Constants in single precision
149FONE: .long 0x3F800000,0x00000000,0x00000000,0x00000000
150FTWO: .long 0x40000000,0x00000000,0x00000000,0x00000000
151FTEN: .long 0x41200000,0x00000000,0x00000000,0x00000000
152F4933: .long 0x459A2800,0x00000000,0x00000000,0x00000000
153
154RBDTBL: .byte 0,0,0,0
155 .byte 3,3,2,2
156 .byte 3,2,2,3
157 .byte 2,3,3,2
158
159 |xref binstr
160 |xref sintdo
161 |xref ptenrn,ptenrm,ptenrp
162
163 .global bindec
164 .global sc_mul
165bindec:
166 moveml %d2-%d7/%a2,-(%a7)
167 fmovemx %fp0-%fp2,-(%a7)
168
169| A1. Set RM and size ext. Set SIGMA = sign input;
170| The k-factor is saved for use in d7. Clear BINDEC_FLG for
171| separating normalized/denormalized input. If the input
172| is a denormalized number, set the BINDEC_FLG memory word
173| to signal denorm. If the input is unnormalized, normalize
174| the input and test for denormalized result.
175|
176 fmovel #rm_mode,%FPCR |set RM and ext
177 movel (%a0),L_SCR2(%a6) |save exponent for sign check
178 movel %d0,%d7 |move k-factor to d7
179 clrb BINDEC_FLG(%a6) |clr norm/denorm flag
180 movew STAG(%a6),%d0 |get stag
181 andiw #0xe000,%d0 |isolate stag bits
182 beq A2_str |if zero, input is norm
183|
184| Normalize the denorm
185|
186un_de_norm:
187 movew (%a0),%d0
188 andiw #0x7fff,%d0 |strip sign of normalized exp
189 movel 4(%a0),%d1
190 movel 8(%a0),%d2
191norm_loop:
192 subw #1,%d0
193 lsll #1,%d2
194 roxll #1,%d1
195 tstl %d1
196 bges norm_loop
197|
198| Test if the normalized input is denormalized
199|
200 tstw %d0
201 bgts pos_exp |if greater than zero, it is a norm
202 st BINDEC_FLG(%a6) |set flag for denorm
203pos_exp:
204 andiw #0x7fff,%d0 |strip sign of normalized exp
205 movew %d0,(%a0)
206 movel %d1,4(%a0)
207 movel %d2,8(%a0)
208
209| A2. Set X = abs(input).
210|
211A2_str:
212 movel (%a0),FP_SCR2(%a6) | move input to work space
213 movel 4(%a0),FP_SCR2+4(%a6) | move input to work space
214 movel 8(%a0),FP_SCR2+8(%a6) | move input to work space
215 andil #0x7fffffff,FP_SCR2(%a6) |create abs(X)
216
217| A3. Compute ILOG.
218| ILOG is the log base 10 of the input value. It is approx-
219| imated by adding e + 0.f when the original value is viewed
220| as 2^^e * 1.f in extended precision. This value is stored
221| in d6.
222|
223| Register usage:
224| Input/Output
225| d0: k-factor/exponent
226| d2: x/x
227| d3: x/x
228| d4: x/x
229| d5: x/x
230| d6: x/ILOG
231| d7: k-factor/Unchanged
232| a0: ptr for original operand/final result
233| a1: x/x
234| a2: x/x
235| fp0: x/float(ILOG)
236| fp1: x/x
237| fp2: x/x
238| F_SCR1:x/x
239| F_SCR2:Abs(X)/Abs(X) with $3fff exponent
240| L_SCR1:x/x
241| L_SCR2:first word of X packed/Unchanged
242
243 tstb BINDEC_FLG(%a6) |check for denorm
244 beqs A3_cont |if clr, continue with norm
245 movel #-4933,%d6 |force ILOG = -4933
246 bras A4_str
247A3_cont:
248 movew FP_SCR2(%a6),%d0 |move exp to d0
249 movew #0x3fff,FP_SCR2(%a6) |replace exponent with 0x3fff
250 fmovex FP_SCR2(%a6),%fp0 |now fp0 has 1.f
251 subw #0x3fff,%d0 |strip off bias
252 faddw %d0,%fp0 |add in exp
253 fsubs FONE,%fp0 |subtract off 1.0
254 fbge pos_res |if pos, branch
255 fmulx LOG2UP1,%fp0 |if neg, mul by LOG2UP1
256 fmovel %fp0,%d6 |put ILOG in d6 as a lword
257 bras A4_str |go move out ILOG
258pos_res:
259 fmulx LOG2,%fp0 |if pos, mul by LOG2
260 fmovel %fp0,%d6 |put ILOG in d6 as a lword
261
262
263| A4. Clr INEX bit.
264| The operation in A3 above may have set INEX2.
265
266A4_str:
267 fmovel #0,%FPSR |zero all of fpsr - nothing needed
268
269
270| A5. Set ICTR = 0;
271| ICTR is a flag used in A13. It must be set before the
272| loop entry A6. The lower word of d5 is used for ICTR.
273
274 clrw %d5 |clear ICTR
275
276
277| A6. Calculate LEN.
278| LEN is the number of digits to be displayed. The k-factor
279| can dictate either the total number of digits, if it is
280| a positive number, or the number of digits after the
281| original decimal point which are to be included as
282| significant. See the 68882 manual for examples.
283| If LEN is computed to be greater than 17, set OPERR in
284| USER_FPSR. LEN is stored in d4.
285|
286| Register usage:
287| Input/Output
288| d0: exponent/Unchanged
289| d2: x/x/scratch
290| d3: x/x
291| d4: exc picture/LEN
292| d5: ICTR/Unchanged
293| d6: ILOG/Unchanged
294| d7: k-factor/Unchanged
295| a0: ptr for original operand/final result
296| a1: x/x
297| a2: x/x
298| fp0: float(ILOG)/Unchanged
299| fp1: x/x
300| fp2: x/x
301| F_SCR1:x/x
302| F_SCR2:Abs(X) with $3fff exponent/Unchanged
303| L_SCR1:x/x
304| L_SCR2:first word of X packed/Unchanged
305
306A6_str:
307 tstl %d7 |branch on sign of k
308 bles k_neg |if k <= 0, LEN = ILOG + 1 - k
309 movel %d7,%d4 |if k > 0, LEN = k
310 bras len_ck |skip to LEN check
311k_neg:
312 movel %d6,%d4 |first load ILOG to d4
313 subl %d7,%d4 |subtract off k
314 addql #1,%d4 |add in the 1
315len_ck:
316 tstl %d4 |LEN check: branch on sign of LEN
317 bles LEN_ng |if neg, set LEN = 1
318 cmpl #17,%d4 |test if LEN > 17
319 bles A7_str |if not, forget it
320 movel #17,%d4 |set max LEN = 17
321 tstl %d7 |if negative, never set OPERR
322 bles A7_str |if positive, continue
323 orl #opaop_mask,USER_FPSR(%a6) |set OPERR & AIOP in USER_FPSR
324 bras A7_str |finished here
325LEN_ng:
326 moveql #1,%d4 |min LEN is 1
327
328
329| A7. Calculate SCALE.
330| SCALE is equal to 10^ISCALE, where ISCALE is the number
331| of decimal places needed to insure LEN integer digits
332| in the output before conversion to bcd. LAMBDA is the sign
333| of ISCALE, used in A9. Fp1 contains 10^^(abs(ISCALE)) using
334| the rounding mode as given in the following table (see
335| Coonen, p. 7.23 as ref.; however, the SCALE variable is
336| of opposite sign in bindec.sa from Coonen).
337|
338| Initial USE
339| FPCR[6:5] LAMBDA SIGN(X) FPCR[6:5]
340| ----------------------------------------------
341| RN 00 0 0 00/0 RN
342| RN 00 0 1 00/0 RN
343| RN 00 1 0 00/0 RN
344| RN 00 1 1 00/0 RN
345| RZ 01 0 0 11/3 RP
346| RZ 01 0 1 11/3 RP
347| RZ 01 1 0 10/2 RM
348| RZ 01 1 1 10/2 RM
349| RM 10 0 0 11/3 RP
350| RM 10 0 1 10/2 RM
351| RM 10 1 0 10/2 RM
352| RM 10 1 1 11/3 RP
353| RP 11 0 0 10/2 RM
354| RP 11 0 1 11/3 RP
355| RP 11 1 0 11/3 RP
356| RP 11 1 1 10/2 RM
357|
358| Register usage:
359| Input/Output
360| d0: exponent/scratch - final is 0
361| d2: x/0 or 24 for A9
362| d3: x/scratch - offset ptr into PTENRM array
363| d4: LEN/Unchanged
364| d5: 0/ICTR:LAMBDA
365| d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
366| d7: k-factor/Unchanged
367| a0: ptr for original operand/final result
368| a1: x/ptr to PTENRM array
369| a2: x/x
370| fp0: float(ILOG)/Unchanged
371| fp1: x/10^ISCALE
372| fp2: x/x
373| F_SCR1:x/x
374| F_SCR2:Abs(X) with $3fff exponent/Unchanged
375| L_SCR1:x/x
376| L_SCR2:first word of X packed/Unchanged
377
378A7_str:
379 tstl %d7 |test sign of k
380 bgts k_pos |if pos and > 0, skip this
381 cmpl %d6,%d7 |test k - ILOG
382 blts k_pos |if ILOG >= k, skip this
383 movel %d7,%d6 |if ((k<0) & (ILOG < k)) ILOG = k
384k_pos:
385 movel %d6,%d0 |calc ILOG + 1 - LEN in d0
386 addql #1,%d0 |add the 1
387 subl %d4,%d0 |sub off LEN
388 swap %d5 |use upper word of d5 for LAMBDA
389 clrw %d5 |set it zero initially
390 clrw %d2 |set up d2 for very small case
391 tstl %d0 |test sign of ISCALE
392 bges iscale |if pos, skip next inst
393 addqw #1,%d5 |if neg, set LAMBDA true
394 cmpl #0xffffecd4,%d0 |test iscale <= -4908
395 bgts no_inf |if false, skip rest
396 addil #24,%d0 |add in 24 to iscale
397 movel #24,%d2 |put 24 in d2 for A9
398no_inf:
399 negl %d0 |and take abs of ISCALE
400iscale:
401 fmoves FONE,%fp1 |init fp1 to 1
402 bfextu USER_FPCR(%a6){#26:#2},%d1 |get initial rmode bits
403 lslw #1,%d1 |put them in bits 2:1
404 addw %d5,%d1 |add in LAMBDA
405 lslw #1,%d1 |put them in bits 3:1
406 tstl L_SCR2(%a6) |test sign of original x
407 bges x_pos |if pos, don't set bit 0
408 addql #1,%d1 |if neg, set bit 0
409x_pos:
410 leal RBDTBL,%a2 |load rbdtbl base
411 moveb (%a2,%d1),%d3 |load d3 with new rmode
412 lsll #4,%d3 |put bits in proper position
413 fmovel %d3,%fpcr |load bits into fpu
414 lsrl #4,%d3 |put bits in proper position
415 tstb %d3 |decode new rmode for pten table
416 bnes not_rn |if zero, it is RN
417 leal PTENRN,%a1 |load a1 with RN table base
418 bras rmode |exit decode
419not_rn:
420 lsrb #1,%d3 |get lsb in carry
421 bccs not_rp |if carry clear, it is RM
422 leal PTENRP,%a1 |load a1 with RP table base
423 bras rmode |exit decode
424not_rp:
425 leal PTENRM,%a1 |load a1 with RM table base
426rmode:
427 clrl %d3 |clr table index
428e_loop:
429 lsrl #1,%d0 |shift next bit into carry
430 bccs e_next |if zero, skip the mul
431 fmulx (%a1,%d3),%fp1 |mul by 10**(d3_bit_no)
432e_next:
433 addl #12,%d3 |inc d3 to next pwrten table entry
434 tstl %d0 |test if ISCALE is zero
435 bnes e_loop |if not, loop
436
437
438| A8. Clr INEX; Force RZ.
439| The operation in A3 above may have set INEX2.
440| RZ mode is forced for the scaling operation to insure
441| only one rounding error. The grs bits are collected in
442| the INEX flag for use in A10.
443|
444| Register usage:
445| Input/Output
446
447 fmovel #0,%FPSR |clr INEX
448 fmovel #rz_mode,%FPCR |set RZ rounding mode
449
450
451| A9. Scale X -> Y.
452| The mantissa is scaled to the desired number of significant
453| digits. The excess digits are collected in INEX2. If mul,
454| Check d2 for excess 10 exponential value. If not zero,
455| the iscale value would have caused the pwrten calculation
456| to overflow. Only a negative iscale can cause this, so
457| multiply by 10^(d2), which is now only allowed to be 24,
458| with a multiply by 10^8 and 10^16, which is exact since
459| 10^24 is exact. If the input was denormalized, we must
460| create a busy stack frame with the mul command and the
461| two operands, and allow the fpu to complete the multiply.
462|
463| Register usage:
464| Input/Output
465| d0: FPCR with RZ mode/Unchanged
466| d2: 0 or 24/unchanged
467| d3: x/x
468| d4: LEN/Unchanged
469| d5: ICTR:LAMBDA
470| d6: ILOG/Unchanged
471| d7: k-factor/Unchanged
472| a0: ptr for original operand/final result
473| a1: ptr to PTENRM array/Unchanged
474| a2: x/x
475| fp0: float(ILOG)/X adjusted for SCALE (Y)
476| fp1: 10^ISCALE/Unchanged
477| fp2: x/x
478| F_SCR1:x/x
479| F_SCR2:Abs(X) with $3fff exponent/Unchanged
480| L_SCR1:x/x
481| L_SCR2:first word of X packed/Unchanged
482
483A9_str:
484 fmovex (%a0),%fp0 |load X from memory
485 fabsx %fp0 |use abs(X)
486 tstw %d5 |LAMBDA is in lower word of d5
487 bne sc_mul |if neg (LAMBDA = 1), scale by mul
488 fdivx %fp1,%fp0 |calculate X / SCALE -> Y to fp0
489 bras A10_st |branch to A10
490
491sc_mul:
492 tstb BINDEC_FLG(%a6) |check for denorm
493 beqs A9_norm |if norm, continue with mul
494 fmovemx %fp1-%fp1,-(%a7) |load ETEMP with 10^ISCALE
495 movel 8(%a0),-(%a7) |load FPTEMP with input arg
496 movel 4(%a0),-(%a7)
497 movel (%a0),-(%a7)
498 movel #18,%d3 |load count for busy stack
499A9_loop:
500 clrl -(%a7) |clear lword on stack
501 dbf %d3,A9_loop
502 moveb VER_TMP(%a6),(%a7) |write current version number
503 moveb #BUSY_SIZE-4,1(%a7) |write current busy size
504 moveb #0x10,0x44(%a7) |set fcefpte[15] bit
505 movew #0x0023,0x40(%a7) |load cmdreg1b with mul command
506 moveb #0xfe,0x8(%a7) |load all 1s to cu savepc
507 frestore (%a7)+ |restore frame to fpu for completion
508 fmulx 36(%a1),%fp0 |multiply fp0 by 10^8
509 fmulx 48(%a1),%fp0 |multiply fp0 by 10^16
510 bras A10_st
511A9_norm:
512 tstw %d2 |test for small exp case
513 beqs A9_con |if zero, continue as normal
514 fmulx 36(%a1),%fp0 |multiply fp0 by 10^8
515 fmulx 48(%a1),%fp0 |multiply fp0 by 10^16
516A9_con:
517 fmulx %fp1,%fp0 |calculate X * SCALE -> Y to fp0
518
519
520| A10. Or in INEX.
521| If INEX is set, round error occurred. This is compensated
522| for by 'or-ing' in the INEX2 flag to the lsb of Y.
523|
524| Register usage:
525| Input/Output
526| d0: FPCR with RZ mode/FPSR with INEX2 isolated
527| d2: x/x
528| d3: x/x
529| d4: LEN/Unchanged
530| d5: ICTR:LAMBDA
531| d6: ILOG/Unchanged
532| d7: k-factor/Unchanged
533| a0: ptr for original operand/final result
534| a1: ptr to PTENxx array/Unchanged
535| a2: x/ptr to FP_SCR2(a6)
536| fp0: Y/Y with lsb adjusted
537| fp1: 10^ISCALE/Unchanged
538| fp2: x/x
539
540A10_st:
541 fmovel %FPSR,%d0 |get FPSR
542 fmovex %fp0,FP_SCR2(%a6) |move Y to memory
543 leal FP_SCR2(%a6),%a2 |load a2 with ptr to FP_SCR2
544 btstl #9,%d0 |check if INEX2 set
545 beqs A11_st |if clear, skip rest
546 oril #1,8(%a2) |or in 1 to lsb of mantissa
547 fmovex FP_SCR2(%a6),%fp0 |write adjusted Y back to fpu
548
549
550| A11. Restore original FPCR; set size ext.
551| Perform FINT operation in the user's rounding mode. Keep
552| the size to extended. The sintdo entry point in the sint
553| routine expects the FPCR value to be in USER_FPCR for
554| mode and precision. The original FPCR is saved in L_SCR1.
555
556A11_st:
557 movel USER_FPCR(%a6),L_SCR1(%a6) |save it for later
558 andil #0x00000030,USER_FPCR(%a6) |set size to ext,
559| ;block exceptions
560
561
562| A12. Calculate YINT = FINT(Y) according to user's rounding mode.
563| The FPSP routine sintd0 is used. The output is in fp0.
564|
565| Register usage:
566| Input/Output
567| d0: FPSR with AINEX cleared/FPCR with size set to ext
568| d2: x/x/scratch
569| d3: x/x
570| d4: LEN/Unchanged
571| d5: ICTR:LAMBDA/Unchanged
572| d6: ILOG/Unchanged
573| d7: k-factor/Unchanged
574| a0: ptr for original operand/src ptr for sintdo
575| a1: ptr to PTENxx array/Unchanged
576| a2: ptr to FP_SCR2(a6)/Unchanged
577| a6: temp pointer to FP_SCR2(a6) - orig value saved and restored
578| fp0: Y/YINT
579| fp1: 10^ISCALE/Unchanged
580| fp2: x/x
581| F_SCR1:x/x
582| F_SCR2:Y adjusted for inex/Y with original exponent
583| L_SCR1:x/original USER_FPCR
584| L_SCR2:first word of X packed/Unchanged
585
586A12_st:
587 moveml %d0-%d1/%a0-%a1,-(%a7) |save regs used by sintd0
588 movel L_SCR1(%a6),-(%a7)
589 movel L_SCR2(%a6),-(%a7)
590 leal FP_SCR2(%a6),%a0 |a0 is ptr to F_SCR2(a6)
591 fmovex %fp0,(%a0) |move Y to memory at FP_SCR2(a6)
592 tstl L_SCR2(%a6) |test sign of original operand
593 bges do_fint |if pos, use Y
594 orl #0x80000000,(%a0) |if neg, use -Y
595do_fint:
596 movel USER_FPSR(%a6),-(%a7)
597 bsr sintdo |sint routine returns int in fp0
598 moveb (%a7),USER_FPSR(%a6)
599 addl #4,%a7
600 movel (%a7)+,L_SCR2(%a6)
601 movel (%a7)+,L_SCR1(%a6)
602 moveml (%a7)+,%d0-%d1/%a0-%a1 |restore regs used by sint
603 movel L_SCR2(%a6),FP_SCR2(%a6) |restore original exponent
604 movel L_SCR1(%a6),USER_FPCR(%a6) |restore user's FPCR
605
606
607| A13. Check for LEN digits.
608| If the int operation results in more than LEN digits,
609| or less than LEN -1 digits, adjust ILOG and repeat from
610| A6. This test occurs only on the first pass. If the
611| result is exactly 10^LEN, decrement ILOG and divide
612| the mantissa by 10. The calculation of 10^LEN cannot
613| be inexact, since all powers of ten upto 10^27 are exact
614| in extended precision, so the use of a previous power-of-ten
615| table will introduce no error.
616|
617|
618| Register usage:
619| Input/Output
620| d0: FPCR with size set to ext/scratch final = 0
621| d2: x/x
622| d3: x/scratch final = x
623| d4: LEN/LEN adjusted
624| d5: ICTR:LAMBDA/LAMBDA:ICTR
625| d6: ILOG/ILOG adjusted
626| d7: k-factor/Unchanged
627| a0: pointer into memory for packed bcd string formation
628| a1: ptr to PTENxx array/Unchanged
629| a2: ptr to FP_SCR2(a6)/Unchanged
630| fp0: int portion of Y/abs(YINT) adjusted
631| fp1: 10^ISCALE/Unchanged
632| fp2: x/10^LEN
633| F_SCR1:x/x
634| F_SCR2:Y with original exponent/Unchanged
635| L_SCR1:original USER_FPCR/Unchanged
636| L_SCR2:first word of X packed/Unchanged
637
638A13_st:
639 swap %d5 |put ICTR in lower word of d5
640 tstw %d5 |check if ICTR = 0
641 bne not_zr |if non-zero, go to second test
642|
643| Compute 10^(LEN-1)
644|
645 fmoves FONE,%fp2 |init fp2 to 1.0
646 movel %d4,%d0 |put LEN in d0
647 subql #1,%d0 |d0 = LEN -1
648 clrl %d3 |clr table index
649l_loop:
650 lsrl #1,%d0 |shift next bit into carry
651 bccs l_next |if zero, skip the mul
652 fmulx (%a1,%d3),%fp2 |mul by 10**(d3_bit_no)
653l_next:
654 addl #12,%d3 |inc d3 to next pwrten table entry
655 tstl %d0 |test if LEN is zero
656 bnes l_loop |if not, loop
657|
658| 10^LEN-1 is computed for this test and A14. If the input was
659| denormalized, check only the case in which YINT > 10^LEN.
660|
661 tstb BINDEC_FLG(%a6) |check if input was norm
662 beqs A13_con |if norm, continue with checking
663 fabsx %fp0 |take abs of YINT
664 bra test_2
665|
666| Compare abs(YINT) to 10^(LEN-1) and 10^LEN
667|
668A13_con:
669 fabsx %fp0 |take abs of YINT
670 fcmpx %fp2,%fp0 |compare abs(YINT) with 10^(LEN-1)
671 fbge test_2 |if greater, do next test
672 subql #1,%d6 |subtract 1 from ILOG
673 movew #1,%d5 |set ICTR
674 fmovel #rm_mode,%FPCR |set rmode to RM
675 fmuls FTEN,%fp2 |compute 10^LEN
676 bra A6_str |return to A6 and recompute YINT
677test_2:
678 fmuls FTEN,%fp2 |compute 10^LEN
679 fcmpx %fp2,%fp0 |compare abs(YINT) with 10^LEN
680 fblt A14_st |if less, all is ok, go to A14
681 fbgt fix_ex |if greater, fix and redo
682 fdivs FTEN,%fp0 |if equal, divide by 10
683 addql #1,%d6 | and inc ILOG
684 bras A14_st | and continue elsewhere
685fix_ex:
686 addql #1,%d6 |increment ILOG by 1
687 movew #1,%d5 |set ICTR
688 fmovel #rm_mode,%FPCR |set rmode to RM
689 bra A6_str |return to A6 and recompute YINT
690|
691| Since ICTR <> 0, we have already been through one adjustment,
692| and shouldn't have another; this is to check if abs(YINT) = 10^LEN
693| 10^LEN is again computed using whatever table is in a1 since the
694| value calculated cannot be inexact.
695|
696not_zr:
697 fmoves FONE,%fp2 |init fp2 to 1.0
698 movel %d4,%d0 |put LEN in d0
699 clrl %d3 |clr table index
700z_loop:
701 lsrl #1,%d0 |shift next bit into carry
702 bccs z_next |if zero, skip the mul
703 fmulx (%a1,%d3),%fp2 |mul by 10**(d3_bit_no)
704z_next:
705 addl #12,%d3 |inc d3 to next pwrten table entry
706 tstl %d0 |test if LEN is zero
707 bnes z_loop |if not, loop
708 fabsx %fp0 |get abs(YINT)
709 fcmpx %fp2,%fp0 |check if abs(YINT) = 10^LEN
710 fbne A14_st |if not, skip this
711 fdivs FTEN,%fp0 |divide abs(YINT) by 10
712 addql #1,%d6 |and inc ILOG by 1
713 addql #1,%d4 | and inc LEN
714 fmuls FTEN,%fp2 | if LEN++, the get 10^^LEN
715
716
717| A14. Convert the mantissa to bcd.
718| The binstr routine is used to convert the LEN digit
719| mantissa to bcd in memory. The input to binstr is
720| to be a fraction; i.e. (mantissa)/10^LEN and adjusted
721| such that the decimal point is to the left of bit 63.
722| The bcd digits are stored in the correct position in
723| the final string area in memory.
724|
725|
726| Register usage:
727| Input/Output
728| d0: x/LEN call to binstr - final is 0
729| d1: x/0
730| d2: x/ms 32-bits of mant of abs(YINT)
731| d3: x/ls 32-bits of mant of abs(YINT)
732| d4: LEN/Unchanged
733| d5: ICTR:LAMBDA/LAMBDA:ICTR
734| d6: ILOG
735| d7: k-factor/Unchanged
736| a0: pointer into memory for packed bcd string formation
737| /ptr to first mantissa byte in result string
738| a1: ptr to PTENxx array/Unchanged
739| a2: ptr to FP_SCR2(a6)/Unchanged
740| fp0: int portion of Y/abs(YINT) adjusted
741| fp1: 10^ISCALE/Unchanged
742| fp2: 10^LEN/Unchanged
743| F_SCR1:x/Work area for final result
744| F_SCR2:Y with original exponent/Unchanged
745| L_SCR1:original USER_FPCR/Unchanged
746| L_SCR2:first word of X packed/Unchanged
747
748A14_st:
749 fmovel #rz_mode,%FPCR |force rz for conversion
750 fdivx %fp2,%fp0 |divide abs(YINT) by 10^LEN
751 leal FP_SCR1(%a6),%a0
752 fmovex %fp0,(%a0) |move abs(YINT)/10^LEN to memory
753 movel 4(%a0),%d2 |move 2nd word of FP_RES to d2
754 movel 8(%a0),%d3 |move 3rd word of FP_RES to d3
755 clrl 4(%a0) |zero word 2 of FP_RES
756 clrl 8(%a0) |zero word 3 of FP_RES
757 movel (%a0),%d0 |move exponent to d0
758 swap %d0 |put exponent in lower word
759 beqs no_sft |if zero, don't shift
760 subil #0x3ffd,%d0 |sub bias less 2 to make fract
761 tstl %d0 |check if > 1
762 bgts no_sft |if so, don't shift
763 negl %d0 |make exp positive
764m_loop:
765 lsrl #1,%d2 |shift d2:d3 right, add 0s
766 roxrl #1,%d3 |the number of places
767 dbf %d0,m_loop |given in d0
768no_sft:
769 tstl %d2 |check for mantissa of zero
770 bnes no_zr |if not, go on
771 tstl %d3 |continue zero check
772 beqs zer_m |if zero, go directly to binstr
773no_zr:
774 clrl %d1 |put zero in d1 for addx
775 addil #0x00000080,%d3 |inc at bit 7
776 addxl %d1,%d2 |continue inc
777 andil #0xffffff80,%d3 |strip off lsb not used by 882
778zer_m:
779 movel %d4,%d0 |put LEN in d0 for binstr call
780 addql #3,%a0 |a0 points to M16 byte in result
781 bsr binstr |call binstr to convert mant
782
783
784| A15. Convert the exponent to bcd.
785| As in A14 above, the exp is converted to bcd and the
786| digits are stored in the final string.
787|
788| Digits are stored in L_SCR1(a6) on return from BINDEC as:
789|
790| 32 16 15 0
791| -----------------------------------------
792| | 0 | e3 | e2 | e1 | e4 | X | X | X |
793| -----------------------------------------
794|
795| And are moved into their proper places in FP_SCR1. If digit e4
796| is non-zero, OPERR is signaled. In all cases, all 4 digits are
797| written as specified in the 881/882 manual for packed decimal.
798|
799| Register usage:
800| Input/Output
801| d0: x/LEN call to binstr - final is 0
802| d1: x/scratch (0);shift count for final exponent packing
803| d2: x/ms 32-bits of exp fraction/scratch
804| d3: x/ls 32-bits of exp fraction
805| d4: LEN/Unchanged
806| d5: ICTR:LAMBDA/LAMBDA:ICTR
807| d6: ILOG
808| d7: k-factor/Unchanged
809| a0: ptr to result string/ptr to L_SCR1(a6)
810| a1: ptr to PTENxx array/Unchanged
811| a2: ptr to FP_SCR2(a6)/Unchanged
812| fp0: abs(YINT) adjusted/float(ILOG)
813| fp1: 10^ISCALE/Unchanged
814| fp2: 10^LEN/Unchanged
815| F_SCR1:Work area for final result/BCD result
816| F_SCR2:Y with original exponent/ILOG/10^4
817| L_SCR1:original USER_FPCR/Exponent digits on return from binstr
818| L_SCR2:first word of X packed/Unchanged
819
820A15_st:
821 tstb BINDEC_FLG(%a6) |check for denorm
822 beqs not_denorm
823 ftstx %fp0 |test for zero
824 fbeq den_zero |if zero, use k-factor or 4933
825 fmovel %d6,%fp0 |float ILOG
826 fabsx %fp0 |get abs of ILOG
827 bras convrt
828den_zero:
829 tstl %d7 |check sign of the k-factor
830 blts use_ilog |if negative, use ILOG
831 fmoves F4933,%fp0 |force exponent to 4933
832 bras convrt |do it
833use_ilog:
834 fmovel %d6,%fp0 |float ILOG
835 fabsx %fp0 |get abs of ILOG
836 bras convrt
837not_denorm:
838 ftstx %fp0 |test for zero
839 fbne not_zero |if zero, force exponent
840 fmoves FONE,%fp0 |force exponent to 1
841 bras convrt |do it
842not_zero:
843 fmovel %d6,%fp0 |float ILOG
844 fabsx %fp0 |get abs of ILOG
845convrt:
846 fdivx 24(%a1),%fp0 |compute ILOG/10^4
847 fmovex %fp0,FP_SCR2(%a6) |store fp0 in memory
848 movel 4(%a2),%d2 |move word 2 to d2
849 movel 8(%a2),%d3 |move word 3 to d3
850 movew (%a2),%d0 |move exp to d0
851 beqs x_loop_fin |if zero, skip the shift
852 subiw #0x3ffd,%d0 |subtract off bias
853 negw %d0 |make exp positive
854x_loop:
855 lsrl #1,%d2 |shift d2:d3 right
856 roxrl #1,%d3 |the number of places
857 dbf %d0,x_loop |given in d0
858x_loop_fin:
859 clrl %d1 |put zero in d1 for addx
860 addil #0x00000080,%d3 |inc at bit 6
861 addxl %d1,%d2 |continue inc
862 andil #0xffffff80,%d3 |strip off lsb not used by 882
863 movel #4,%d0 |put 4 in d0 for binstr call
864 leal L_SCR1(%a6),%a0 |a0 is ptr to L_SCR1 for exp digits
865 bsr binstr |call binstr to convert exp
866 movel L_SCR1(%a6),%d0 |load L_SCR1 lword to d0
867 movel #12,%d1 |use d1 for shift count
868 lsrl %d1,%d0 |shift d0 right by 12
869 bfins %d0,FP_SCR1(%a6){#4:#12} |put e3:e2:e1 in FP_SCR1
870 lsrl %d1,%d0 |shift d0 right by 12
871 bfins %d0,FP_SCR1(%a6){#16:#4} |put e4 in FP_SCR1
872 tstb %d0 |check if e4 is zero
873 beqs A16_st |if zero, skip rest
874 orl #opaop_mask,USER_FPSR(%a6) |set OPERR & AIOP in USER_FPSR
875
876
877| A16. Write sign bits to final string.
878| Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
879|
880| Register usage:
881| Input/Output
882| d0: x/scratch - final is x
883| d2: x/x
884| d3: x/x
885| d4: LEN/Unchanged
886| d5: ICTR:LAMBDA/LAMBDA:ICTR
887| d6: ILOG/ILOG adjusted
888| d7: k-factor/Unchanged
889| a0: ptr to L_SCR1(a6)/Unchanged
890| a1: ptr to PTENxx array/Unchanged
891| a2: ptr to FP_SCR2(a6)/Unchanged
892| fp0: float(ILOG)/Unchanged
893| fp1: 10^ISCALE/Unchanged
894| fp2: 10^LEN/Unchanged
895| F_SCR1:BCD result with correct signs
896| F_SCR2:ILOG/10^4
897| L_SCR1:Exponent digits on return from binstr
898| L_SCR2:first word of X packed/Unchanged
899
900A16_st:
901 clrl %d0 |clr d0 for collection of signs
902 andib #0x0f,FP_SCR1(%a6) |clear first nibble of FP_SCR1
903 tstl L_SCR2(%a6) |check sign of original mantissa
904 bges mant_p |if pos, don't set SM
905 moveql #2,%d0 |move 2 in to d0 for SM
906mant_p:
907 tstl %d6 |check sign of ILOG
908 bges wr_sgn |if pos, don't set SE
909 addql #1,%d0 |set bit 0 in d0 for SE
910wr_sgn:
911 bfins %d0,FP_SCR1(%a6){#0:#2} |insert SM and SE into FP_SCR1
912
913| Clean up and restore all registers used.
914
915 fmovel #0,%FPSR |clear possible inex2/ainex bits
916 fmovemx (%a7)+,%fp0-%fp2
917 moveml (%a7)+,%d2-%d7/%a2
918 rts
919
920 |end
diff --git a/arch/m68k/fpsp040/binstr.S b/arch/m68k/fpsp040/binstr.S
new file mode 100644
index 000000000000..d53555c0a2b6
--- /dev/null
+++ b/arch/m68k/fpsp040/binstr.S
@@ -0,0 +1,140 @@
1|
2| binstr.sa 3.3 12/19/90
3|
4|
5| Description: Converts a 64-bit binary integer to bcd.
6|
7| Input: 64-bit binary integer in d2:d3, desired length (LEN) in
8| d0, and a pointer to start in memory for bcd characters
9| in d0. (This pointer must point to byte 4 of the first
10| lword of the packed decimal memory string.)
11|
12| Output: LEN bcd digits representing the 64-bit integer.
13|
14| Algorithm:
15| The 64-bit binary is assumed to have a decimal point before
16| bit 63. The fraction is multiplied by 10 using a mul by 2
17| shift and a mul by 8 shift. The bits shifted out of the
18| msb form a decimal digit. This process is iterated until
19| LEN digits are formed.
20|
21| A1. Init d7 to 1. D7 is the byte digit counter, and if 1, the
22| digit formed will be assumed the least significant. This is
23| to force the first byte formed to have a 0 in the upper 4 bits.
24|
25| A2. Beginning of the loop:
26| Copy the fraction in d2:d3 to d4:d5.
27|
28| A3. Multiply the fraction in d2:d3 by 8 using bit-field
29| extracts and shifts. The three msbs from d2 will go into
30| d1.
31|
32| A4. Multiply the fraction in d4:d5 by 2 using shifts. The msb
33| will be collected by the carry.
34|
35| A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5
36| into d2:d3. D1 will contain the bcd digit formed.
37|
38| A6. Test d7. If zero, the digit formed is the ms digit. If non-
39| zero, it is the ls digit. Put the digit in its place in the
40| upper word of d0. If it is the ls digit, write the word
41| from d0 to memory.
42|
43| A7. Decrement d6 (LEN counter) and repeat the loop until zero.
44|
45| Implementation Notes:
46|
47| The registers are used as follows:
48|
49| d0: LEN counter
50| d1: temp used to form the digit
51| d2: upper 32-bits of fraction for mul by 8
52| d3: lower 32-bits of fraction for mul by 8
53| d4: upper 32-bits of fraction for mul by 2
54| d5: lower 32-bits of fraction for mul by 2
55| d6: temp for bit-field extracts
56| d7: byte digit formation word;digit count {0,1}
57| a0: pointer into memory for packed bcd string formation
58|
59
60| Copyright (C) Motorola, Inc. 1990
61| All Rights Reserved
62|
63| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
64| The copyright notice above does not evidence any
65| actual or intended publication of such source code.
66
67|BINSTR idnt 2,1 | Motorola 040 Floating Point Software Package
68
69 |section 8
70
71#include "fpsp.h"
72
73 .global binstr
74binstr:
75 moveml %d0-%d7,-(%a7)
76|
77| A1: Init d7
78|
79 moveql #1,%d7 |init d7 for second digit
80 subql #1,%d0 |for dbf d0 would have LEN+1 passes
81|
82| A2. Copy d2:d3 to d4:d5. Start loop.
83|
84loop:
85 movel %d2,%d4 |copy the fraction before muls
86 movel %d3,%d5 |to d4:d5
87|
88| A3. Multiply d2:d3 by 8; extract msbs into d1.
89|
90 bfextu %d2{#0:#3},%d1 |copy 3 msbs of d2 into d1
91 asll #3,%d2 |shift d2 left by 3 places
92 bfextu %d3{#0:#3},%d6 |copy 3 msbs of d3 into d6
93 asll #3,%d3 |shift d3 left by 3 places
94 orl %d6,%d2 |or in msbs from d3 into d2
95|
96| A4. Multiply d4:d5 by 2; add carry out to d1.
97|
98 asll #1,%d5 |mul d5 by 2
99 roxll #1,%d4 |mul d4 by 2
100 swap %d6 |put 0 in d6 lower word
101 addxw %d6,%d1 |add in extend from mul by 2
102|
103| A5. Add mul by 8 to mul by 2. D1 contains the digit formed.
104|
105 addl %d5,%d3 |add lower 32 bits
106 nop |ERRATA ; FIX #13 (Rev. 1.2 6/6/90)
107 addxl %d4,%d2 |add with extend upper 32 bits
108 nop |ERRATA ; FIX #13 (Rev. 1.2 6/6/90)
109 addxw %d6,%d1 |add in extend from add to d1
110 swap %d6 |with d6 = 0; put 0 in upper word
111|
112| A6. Test d7 and branch.
113|
114 tstw %d7 |if zero, store digit & to loop
115 beqs first_d |if non-zero, form byte & write
116sec_d:
117 swap %d7 |bring first digit to word d7b
118 aslw #4,%d7 |first digit in upper 4 bits d7b
119 addw %d1,%d7 |add in ls digit to d7b
120 moveb %d7,(%a0)+ |store d7b byte in memory
121 swap %d7 |put LEN counter in word d7a
122 clrw %d7 |set d7a to signal no digits done
123 dbf %d0,loop |do loop some more!
124 bras end_bstr |finished, so exit
125first_d:
126 swap %d7 |put digit word in d7b
127 movew %d1,%d7 |put new digit in d7b
128 swap %d7 |put LEN counter in word d7a
129 addqw #1,%d7 |set d7a to signal first digit done
130 dbf %d0,loop |do loop some more!
131 swap %d7 |put last digit in string
132 lslw #4,%d7 |move it to upper 4 bits
133 moveb %d7,(%a0)+ |store it in memory string
134|
135| Clean up and return with result in fp0.
136|
137end_bstr:
138 moveml (%a7)+,%d0-%d7
139 rts
140 |end
diff --git a/arch/m68k/fpsp040/bugfix.S b/arch/m68k/fpsp040/bugfix.S
new file mode 100644
index 000000000000..942c4f6f4fd1
--- /dev/null
+++ b/arch/m68k/fpsp040/bugfix.S
@@ -0,0 +1,496 @@
1|
2| bugfix.sa 3.2 1/31/91
3|
4|
5| This file contains workarounds for bugs in the 040
6| relating to the Floating-Point Software Package (FPSP)
7|
8| Fixes for bugs: 1238
9|
10| Bug: 1238
11|
12|
13| /* The following dirty_bit clear should be left in
14| * the handler permanently to improve throughput.
15| * The dirty_bits are located at bits [23:16] in
16| * longword $08 in the busy frame $4x60. Bit 16
17| * corresponds to FP0, bit 17 corresponds to FP1,
18| * and so on.
19| */
20| if (E3_exception_just_serviced) {
21| dirty_bit[cmdreg3b[9:7]] = 0;
22| }
23|
24| if (fsave_format_version != $40) {goto NOFIX}
25|
26| if !(E3_exception_just_serviced) {goto NOFIX}
27| if (cupc == 0000000) {goto NOFIX}
28| if ((cmdreg1b[15:13] != 000) &&
29| (cmdreg1b[15:10] != 010001)) {goto NOFIX}
30| if (((cmdreg1b[15:13] != 000) || ((cmdreg1b[12:10] != cmdreg2b[9:7]) &&
31| (cmdreg1b[12:10] != cmdreg3b[9:7])) ) &&
32| ((cmdreg1b[ 9: 7] != cmdreg2b[9:7]) &&
33| (cmdreg1b[ 9: 7] != cmdreg3b[9:7])) ) {goto NOFIX}
34|
35| /* Note: for 6d43b or 8d43b, you may want to add the following code
36| * to get better coverage. (If you do not insert this code, the part
37| * won't lock up; it will simply get the wrong answer.)
38| * Do NOT insert this code for 10d43b or later parts.
39| *
40| * if (fpiarcu == integer stack return address) {
41| * cupc = 0000000;
42| * goto NOFIX;
43| * }
44| */
45|
46| if (cmdreg1b[15:13] != 000) {goto FIX_OPCLASS2}
47| FIX_OPCLASS0:
48| if (((cmdreg1b[12:10] == cmdreg2b[9:7]) ||
49| (cmdreg1b[ 9: 7] == cmdreg2b[9:7])) &&
50| (cmdreg1b[12:10] != cmdreg3b[9:7]) &&
51| (cmdreg1b[ 9: 7] != cmdreg3b[9:7])) { /* xu conflict only */
52| /* We execute the following code if there is an
53| xu conflict and NOT an nu conflict */
54|
55| /* first save some values on the fsave frame */
56| stag_temp = STAG[fsave_frame];
57| cmdreg1b_temp = CMDREG1B[fsave_frame];
58| dtag_temp = DTAG[fsave_frame];
59| ete15_temp = ETE15[fsave_frame];
60|
61| CUPC[fsave_frame] = 0000000;
62| FRESTORE
63| FSAVE
64|
65| /* If the xu instruction is exceptional, we punt.
66| * Otherwise, we would have to include OVFL/UNFL handler
67| * code here to get the correct answer.
68| */
69| if (fsave_frame_format == $4060) {goto KILL_PROCESS}
70|
71| fsave_frame = /* build a long frame of all zeros */
72| fsave_frame_format = $4060; /* label it as long frame */
73|
74| /* load it with the temps we saved */
75| STAG[fsave_frame] = stag_temp;
76| CMDREG1B[fsave_frame] = cmdreg1b_temp;
77| DTAG[fsave_frame] = dtag_temp;
78| ETE15[fsave_frame] = ete15_temp;
79|
80| /* Make sure that the cmdreg3b dest reg is not going to
81| * be destroyed by a FMOVEM at the end of all this code.
82| * If it is, you should move the current value of the reg
83| * onto the stack so that the reg will loaded with that value.
84| */
85|
86| /* All done. Proceed with the code below */
87| }
88|
89| etemp = FP_reg_[cmdreg1b[12:10]];
90| ete15 = ~ete14;
91| cmdreg1b[15:10] = 010010;
92| clear(bug_flag_procIDxxxx);
93| FRESTORE and return;
94|
95|
96| FIX_OPCLASS2:
97| if ((cmdreg1b[9:7] == cmdreg2b[9:7]) &&
98| (cmdreg1b[9:7] != cmdreg3b[9:7])) { /* xu conflict only */
99| /* We execute the following code if there is an
100| xu conflict and NOT an nu conflict */
101|
102| /* first save some values on the fsave frame */
103| stag_temp = STAG[fsave_frame];
104| cmdreg1b_temp = CMDREG1B[fsave_frame];
105| dtag_temp = DTAG[fsave_frame];
106| ete15_temp = ETE15[fsave_frame];
107| etemp_temp = ETEMP[fsave_frame];
108|
109| CUPC[fsave_frame] = 0000000;
110| FRESTORE
111| FSAVE
112|
113|
114| /* If the xu instruction is exceptional, we punt.
115| * Otherwise, we would have to include OVFL/UNFL handler
116| * code here to get the correct answer.
117| */
118| if (fsave_frame_format == $4060) {goto KILL_PROCESS}
119|
120| fsave_frame = /* build a long frame of all zeros */
121| fsave_frame_format = $4060; /* label it as long frame */
122|
123| /* load it with the temps we saved */
124| STAG[fsave_frame] = stag_temp;
125| CMDREG1B[fsave_frame] = cmdreg1b_temp;
126| DTAG[fsave_frame] = dtag_temp;
127| ETE15[fsave_frame] = ete15_temp;
128| ETEMP[fsave_frame] = etemp_temp;
129|
130| /* Make sure that the cmdreg3b dest reg is not going to
131| * be destroyed by a FMOVEM at the end of all this code.
132| * If it is, you should move the current value of the reg
133| * onto the stack so that the reg will loaded with that value.
134| */
135|
136| /* All done. Proceed with the code below */
137| }
138|
139| if (etemp_exponent == min_sgl) etemp_exponent = min_dbl;
140| if (etemp_exponent == max_sgl) etemp_exponent = max_dbl;
141| cmdreg1b[15:10] = 010101;
142| clear(bug_flag_procIDxxxx);
143| FRESTORE and return;
144|
145|
146| NOFIX:
147| clear(bug_flag_procIDxxxx);
148| FRESTORE and return;
149|
150
151
152| Copyright (C) Motorola, Inc. 1990
153| All Rights Reserved
154|
155| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
156| The copyright notice above does not evidence any
157| actual or intended publication of such source code.
158
159|BUGFIX idnt 2,1 | Motorola 040 Floating Point Software Package
160
161 |section 8
162
163#include "fpsp.h"
164
165 |xref fpsp_fmt_error
166
167 .global b1238_fix
168b1238_fix:
169|
170| This code is entered only on completion of the handling of an
171| nu-generated ovfl, unfl, or inex exception. If the version
172| number of the fsave is not $40, this handler is not necessary.
173| Simply branch to fix_done and exit normally.
174|
175 cmpib #VER_40,4(%a7)
176 bne fix_done
177|
178| Test for cu_savepc equal to zero. If not, this is not a bug
179| #1238 case.
180|
181 moveb CU_SAVEPC(%a6),%d0
182 andib #0xFE,%d0
183 beq fix_done |if zero, this is not bug #1238
184
185|
186| Test the register conflict aspect. If opclass0, check for
187| cu src equal to xu dest or equal to nu dest. If so, go to
188| op0. Else, or if opclass2, check for cu dest equal to
189| xu dest or equal to nu dest. If so, go to tst_opcl. Else,
190| exit, it is not the bug case.
191|
192| Check for opclass 0. If not, go and check for opclass 2 and sgl.
193|
194 movew CMDREG1B(%a6),%d0
195 andiw #0xE000,%d0 |strip all but opclass
196 bne op2sgl |not opclass 0, check op2
197|
198| Check for cu and nu register conflict. If one exists, this takes
199| priority over a cu and xu conflict.
200|
201 bfextu CMDREG1B(%a6){#3:#3},%d0 |get 1st src
202 bfextu CMDREG3B(%a6){#6:#3},%d1 |get 3rd dest
203 cmpb %d0,%d1
204 beqs op0 |if equal, continue bugfix
205|
206| Check for cu dest equal to nu dest. If so, go and fix the
207| bug condition. Otherwise, exit.
208|
209 bfextu CMDREG1B(%a6){#6:#3},%d0 |get 1st dest
210 cmpb %d0,%d1 |cmp 1st dest with 3rd dest
211 beqs op0 |if equal, continue bugfix
212|
213| Check for cu and xu register conflict.
214|
215 bfextu CMDREG2B(%a6){#6:#3},%d1 |get 2nd dest
216 cmpb %d0,%d1 |cmp 1st dest with 2nd dest
217 beqs op0_xu |if equal, continue bugfix
218 bfextu CMDREG1B(%a6){#3:#3},%d0 |get 1st src
219 cmpb %d0,%d1 |cmp 1st src with 2nd dest
220 beq op0_xu
221 bne fix_done |if the reg checks fail, exit
222|
223| We have the opclass 0 situation.
224|
225op0:
226 bfextu CMDREG1B(%a6){#3:#3},%d0 |get source register no
227 movel #7,%d1
228 subl %d0,%d1
229 clrl %d0
230 bsetl %d1,%d0
231 fmovemx %d0,ETEMP(%a6) |load source to ETEMP
232
233 moveb #0x12,%d0
234 bfins %d0,CMDREG1B(%a6){#0:#6} |opclass 2, extended
235|
236| Set ETEMP exponent bit 15 as the opposite of ete14
237|
238 btst #6,ETEMP_EX(%a6) |check etemp exponent bit 14
239 beq setete15
240 bclr #etemp15_bit,STAG(%a6)
241 bra finish
242setete15:
243 bset #etemp15_bit,STAG(%a6)
244 bra finish
245
246|
247| We have the case in which a conflict exists between the cu src or
248| dest and the dest of the xu. We must clear the instruction in
249| the cu and restore the state, allowing the instruction in the
250| xu to complete. Remember, the instruction in the nu
251| was exceptional, and was completed by the appropriate handler.
252| If the result of the xu instruction is not exceptional, we can
253| restore the instruction from the cu to the frame and continue
254| processing the original exception. If the result is also
255| exceptional, we choose to kill the process.
256|
257| Items saved from the stack:
258|
259| $3c stag - L_SCR1
260| $40 cmdreg1b - L_SCR2
261| $44 dtag - L_SCR3
262|
263| The cu savepc is set to zero, and the frame is restored to the
264| fpu.
265|
266op0_xu:
267 movel STAG(%a6),L_SCR1(%a6)
268 movel CMDREG1B(%a6),L_SCR2(%a6)
269 movel DTAG(%a6),L_SCR3(%a6)
270 andil #0xe0000000,L_SCR3(%a6)
271 moveb #0,CU_SAVEPC(%a6)
272 movel (%a7)+,%d1 |save return address from bsr
273 frestore (%a7)+
274 fsave -(%a7)
275|
276| Check if the instruction which just completed was exceptional.
277|
278 cmpw #0x4060,(%a7)
279 beq op0_xb
280|
281| It is necessary to isolate the result of the instruction in the
282| xu if it is to fp0 - fp3 and write that value to the USER_FPn
283| locations on the stack. The correct destination register is in
284| cmdreg2b.
285|
286 bfextu CMDREG2B(%a6){#6:#3},%d0 |get dest register no
287 cmpil #3,%d0
288 bgts op0_xi
289 beqs op0_fp3
290 cmpil #1,%d0
291 blts op0_fp0
292 beqs op0_fp1
293op0_fp2:
294 fmovemx %fp2-%fp2,USER_FP2(%a6)
295 bras op0_xi
296op0_fp1:
297 fmovemx %fp1-%fp1,USER_FP1(%a6)
298 bras op0_xi
299op0_fp0:
300 fmovemx %fp0-%fp0,USER_FP0(%a6)
301 bras op0_xi
302op0_fp3:
303 fmovemx %fp3-%fp3,USER_FP3(%a6)
304|
305| The frame returned is idle. We must build a busy frame to hold
306| the cu state information and setup etemp.
307|
308op0_xi:
309 movel #22,%d0 |clear 23 lwords
310 clrl (%a7)
311op0_loop:
312 clrl -(%a7)
313 dbf %d0,op0_loop
314 movel #0x40600000,-(%a7)
315 movel L_SCR1(%a6),STAG(%a6)
316 movel L_SCR2(%a6),CMDREG1B(%a6)
317 movel L_SCR3(%a6),DTAG(%a6)
318 moveb #0x6,CU_SAVEPC(%a6)
319 movel %d1,-(%a7) |return bsr return address
320 bfextu CMDREG1B(%a6){#3:#3},%d0 |get source register no
321 movel #7,%d1
322 subl %d0,%d1
323 clrl %d0
324 bsetl %d1,%d0
325 fmovemx %d0,ETEMP(%a6) |load source to ETEMP
326
327 moveb #0x12,%d0
328 bfins %d0,CMDREG1B(%a6){#0:#6} |opclass 2, extended
329|
330| Set ETEMP exponent bit 15 as the opposite of ete14
331|
332 btst #6,ETEMP_EX(%a6) |check etemp exponent bit 14
333 beq op0_sete15
334 bclr #etemp15_bit,STAG(%a6)
335 bra finish
336op0_sete15:
337 bset #etemp15_bit,STAG(%a6)
338 bra finish
339
340|
341| The frame returned is busy. It is not possible to reconstruct
342| the code sequence to allow completion. We will jump to
343| fpsp_fmt_error and allow the kernel to kill the process.
344|
345op0_xb:
346 jmp fpsp_fmt_error
347
348|
349| Check for opclass 2 and single size. If not both, exit.
350|
351op2sgl:
352 movew CMDREG1B(%a6),%d0
353 andiw #0xFC00,%d0 |strip all but opclass and size
354 cmpiw #0x4400,%d0 |test for opclass 2 and size=sgl
355 bne fix_done |if not, it is not bug 1238
356|
357| Check for cu dest equal to nu dest or equal to xu dest, with
358| a cu and nu conflict taking priority an nu conflict. If either,
359| go and fix the bug condition. Otherwise, exit.
360|
361 bfextu CMDREG1B(%a6){#6:#3},%d0 |get 1st dest
362 bfextu CMDREG3B(%a6){#6:#3},%d1 |get 3rd dest
363 cmpb %d0,%d1 |cmp 1st dest with 3rd dest
364 beq op2_com |if equal, continue bugfix
365 bfextu CMDREG2B(%a6){#6:#3},%d1 |get 2nd dest
366 cmpb %d0,%d1 |cmp 1st dest with 2nd dest
367 bne fix_done |if the reg checks fail, exit
368|
369| We have the case in which a conflict exists between the cu src or
370| dest and the dest of the xu. We must clear the instruction in
371| the cu and restore the state, allowing the instruction in the
372| xu to complete. Remember, the instruction in the nu
373| was exceptional, and was completed by the appropriate handler.
374| If the result of the xu instruction is not exceptional, we can
375| restore the instruction from the cu to the frame and continue
376| processing the original exception. If the result is also
377| exceptional, we choose to kill the process.
378|
379| Items saved from the stack:
380|
381| $3c stag - L_SCR1
382| $40 cmdreg1b - L_SCR2
383| $44 dtag - L_SCR3
384| etemp - FP_SCR2
385|
386| The cu savepc is set to zero, and the frame is restored to the
387| fpu.
388|
389op2_xu:
390 movel STAG(%a6),L_SCR1(%a6)
391 movel CMDREG1B(%a6),L_SCR2(%a6)
392 movel DTAG(%a6),L_SCR3(%a6)
393 andil #0xe0000000,L_SCR3(%a6)
394 moveb #0,CU_SAVEPC(%a6)
395 movel ETEMP(%a6),FP_SCR2(%a6)
396 movel ETEMP_HI(%a6),FP_SCR2+4(%a6)
397 movel ETEMP_LO(%a6),FP_SCR2+8(%a6)
398 movel (%a7)+,%d1 |save return address from bsr
399 frestore (%a7)+
400 fsave -(%a7)
401|
402| Check if the instruction which just completed was exceptional.
403|
404 cmpw #0x4060,(%a7)
405 beq op2_xb
406|
407| It is necessary to isolate the result of the instruction in the
408| xu if it is to fp0 - fp3 and write that value to the USER_FPn
409| locations on the stack. The correct destination register is in
410| cmdreg2b.
411|
412 bfextu CMDREG2B(%a6){#6:#3},%d0 |get dest register no
413 cmpil #3,%d0
414 bgts op2_xi
415 beqs op2_fp3
416 cmpil #1,%d0
417 blts op2_fp0
418 beqs op2_fp1
419op2_fp2:
420 fmovemx %fp2-%fp2,USER_FP2(%a6)
421 bras op2_xi
422op2_fp1:
423 fmovemx %fp1-%fp1,USER_FP1(%a6)
424 bras op2_xi
425op2_fp0:
426 fmovemx %fp0-%fp0,USER_FP0(%a6)
427 bras op2_xi
428op2_fp3:
429 fmovemx %fp3-%fp3,USER_FP3(%a6)
430|
431| The frame returned is idle. We must build a busy frame to hold
432| the cu state information and fix up etemp.
433|
434op2_xi:
435 movel #22,%d0 |clear 23 lwords
436 clrl (%a7)
437op2_loop:
438 clrl -(%a7)
439 dbf %d0,op2_loop
440 movel #0x40600000,-(%a7)
441 movel L_SCR1(%a6),STAG(%a6)
442 movel L_SCR2(%a6),CMDREG1B(%a6)
443 movel L_SCR3(%a6),DTAG(%a6)
444 moveb #0x6,CU_SAVEPC(%a6)
445 movel FP_SCR2(%a6),ETEMP(%a6)
446 movel FP_SCR2+4(%a6),ETEMP_HI(%a6)
447 movel FP_SCR2+8(%a6),ETEMP_LO(%a6)
448 movel %d1,-(%a7)
449 bra op2_com
450
451|
452| We have the opclass 2 single source situation.
453|
454op2_com:
455 moveb #0x15,%d0
456 bfins %d0,CMDREG1B(%a6){#0:#6} |opclass 2, double
457
458 cmpw #0x407F,ETEMP_EX(%a6) |single +max
459 bnes case2
460 movew #0x43FF,ETEMP_EX(%a6) |to double +max
461 bra finish
462case2:
463 cmpw #0xC07F,ETEMP_EX(%a6) |single -max
464 bnes case3
465 movew #0xC3FF,ETEMP_EX(%a6) |to double -max
466 bra finish
467case3:
468 cmpw #0x3F80,ETEMP_EX(%a6) |single +min
469 bnes case4
470 movew #0x3C00,ETEMP_EX(%a6) |to double +min
471 bra finish
472case4:
473 cmpw #0xBF80,ETEMP_EX(%a6) |single -min
474 bne fix_done
475 movew #0xBC00,ETEMP_EX(%a6) |to double -min
476 bra finish
477|
478| The frame returned is busy. It is not possible to reconstruct
479| the code sequence to allow completion. fpsp_fmt_error causes
480| an fline illegal instruction to be executed.
481|
482| You should replace the jump to fpsp_fmt_error with a jump
483| to the entry point used to kill a process.
484|
485op2_xb:
486 jmp fpsp_fmt_error
487
488|
489| Enter here if the case is not of the situations affected by
490| bug #1238, or if the fix is completed, and exit.
491|
492finish:
493fix_done:
494 rts
495
496 |end
diff --git a/arch/m68k/fpsp040/decbin.S b/arch/m68k/fpsp040/decbin.S
new file mode 100644
index 000000000000..2160609e328d
--- /dev/null
+++ b/arch/m68k/fpsp040/decbin.S
@@ -0,0 +1,506 @@
1|
2| decbin.sa 3.3 12/19/90
3|
4| Description: Converts normalized packed bcd value pointed to by
5| register A6 to extended-precision value in FP0.
6|
7| Input: Normalized packed bcd value in ETEMP(a6).
8|
9| Output: Exact floating-point representation of the packed bcd value.
10|
11| Saves and Modifies: D2-D5
12|
13| Speed: The program decbin takes ??? cycles to execute.
14|
15| Object Size:
16|
17| External Reference(s): None.
18|
19| Algorithm:
20| Expected is a normal bcd (i.e. non-exceptional; all inf, zero,
21| and NaN operands are dispatched without entering this routine)
22| value in 68881/882 format at location ETEMP(A6).
23|
24| A1. Convert the bcd exponent to binary by successive adds and muls.
25| Set the sign according to SE. Subtract 16 to compensate
26| for the mantissa which is to be interpreted as 17 integer
27| digits, rather than 1 integer and 16 fraction digits.
28| Note: this operation can never overflow.
29|
30| A2. Convert the bcd mantissa to binary by successive
31| adds and muls in FP0. Set the sign according to SM.
32| The mantissa digits will be converted with the decimal point
33| assumed following the least-significant digit.
34| Note: this operation can never overflow.
35|
36| A3. Count the number of leading/trailing zeros in the
37| bcd string. If SE is positive, count the leading zeros;
38| if negative, count the trailing zeros. Set the adjusted
39| exponent equal to the exponent from A1 and the zero count
40| added if SM = 1 and subtracted if SM = 0. Scale the
41| mantissa the equivalent of forcing in the bcd value:
42|
43| SM = 0 a non-zero digit in the integer position
44| SM = 1 a non-zero digit in Mant0, lsd of the fraction
45|
46| this will insure that any value, regardless of its
47| representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted
48| consistently.
49|
50| A4. Calculate the factor 10^exp in FP1 using a table of
51| 10^(2^n) values. To reduce the error in forming factors
52| greater than 10^27, a directed rounding scheme is used with
53| tables rounded to RN, RM, and RP, according to the table
54| in the comments of the pwrten section.
55|
56| A5. Form the final binary number by scaling the mantissa by
57| the exponent factor. This is done by multiplying the
58| mantissa in FP0 by the factor in FP1 if the adjusted
59| exponent sign is positive, and dividing FP0 by FP1 if
60| it is negative.
61|
62| Clean up and return. Check if the final mul or div resulted
63| in an inex2 exception. If so, set inex1 in the fpsr and
64| check if the inex1 exception is enabled. If so, set d7 upper
65| word to $0100. This will signal unimp.sa that an enabled inex1
66| exception occurred. Unimp will fix the stack.
67|
68
69| Copyright (C) Motorola, Inc. 1990
70| All Rights Reserved
71|
72| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
73| The copyright notice above does not evidence any
74| actual or intended publication of such source code.
75
76|DECBIN idnt 2,1 | Motorola 040 Floating Point Software Package
77
78 |section 8
79
80#include "fpsp.h"
81
82|
83| PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
84| to nearest, minus, and plus, respectively. The tables include
85| 10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}. No rounding
86| is required until the power is greater than 27, however, all
87| tables include the first 5 for ease of indexing.
88|
89 |xref PTENRN
90 |xref PTENRM
91 |xref PTENRP
92
93RTABLE: .byte 0,0,0,0
94 .byte 2,3,2,3
95 .byte 2,3,3,2
96 .byte 3,2,2,3
97
98 .global decbin
99 .global calc_e
100 .global pwrten
101 .global calc_m
102 .global norm
103 .global ap_st_z
104 .global ap_st_n
105|
106 .set FNIBS,7
107 .set FSTRT,0
108|
109 .set ESTRT,4
110 .set EDIGITS,2 |
111|
112| Constants in single precision
113FZERO: .long 0x00000000
114FONE: .long 0x3F800000
115FTEN: .long 0x41200000
116
117 .set TEN,10
118
119|
120decbin:
121 | fmovel #0,FPCR ;clr real fpcr
122 moveml %d2-%d5,-(%a7)
123|
124| Calculate exponent:
125| 1. Copy bcd value in memory for use as a working copy.
126| 2. Calculate absolute value of exponent in d1 by mul and add.
127| 3. Correct for exponent sign.
128| 4. Subtract 16 to compensate for interpreting the mant as all integer digits.
129| (i.e., all digits assumed left of the decimal point.)
130|
131| Register usage:
132|
133| calc_e:
134| (*) d0: temp digit storage
135| (*) d1: accumulator for binary exponent
136| (*) d2: digit count
137| (*) d3: offset pointer
138| ( ) d4: first word of bcd
139| ( ) a0: pointer to working bcd value
140| ( ) a6: pointer to original bcd value
141| (*) FP_SCR1: working copy of original bcd value
142| (*) L_SCR1: copy of original exponent word
143|
144calc_e:
145 movel #EDIGITS,%d2 |# of nibbles (digits) in fraction part
146 moveql #ESTRT,%d3 |counter to pick up digits
147 leal FP_SCR1(%a6),%a0 |load tmp bcd storage address
148 movel ETEMP(%a6),(%a0) |save input bcd value
149 movel ETEMP_HI(%a6),4(%a0) |save words 2 and 3
150 movel ETEMP_LO(%a6),8(%a0) |and work with these
151 movel (%a0),%d4 |get first word of bcd
152 clrl %d1 |zero d1 for accumulator
153e_gd:
154 mulul #TEN,%d1 |mul partial product by one digit place
155 bfextu %d4{%d3:#4},%d0 |get the digit and zero extend into d0
156 addl %d0,%d1 |d1 = d1 + d0
157 addqb #4,%d3 |advance d3 to the next digit
158 dbf %d2,e_gd |if we have used all 3 digits, exit loop
159 btst #30,%d4 |get SE
160 beqs e_pos |don't negate if pos
161 negl %d1 |negate before subtracting
162e_pos:
163 subl #16,%d1 |sub to compensate for shift of mant
164 bges e_save |if still pos, do not neg
165 negl %d1 |now negative, make pos and set SE
166 orl #0x40000000,%d4 |set SE in d4,
167 orl #0x40000000,(%a0) |and in working bcd
168e_save:
169 movel %d1,L_SCR1(%a6) |save exp in memory
170|
171|
172| Calculate mantissa:
173| 1. Calculate absolute value of mantissa in fp0 by mul and add.
174| 2. Correct for mantissa sign.
175| (i.e., all digits assumed left of the decimal point.)
176|
177| Register usage:
178|
179| calc_m:
180| (*) d0: temp digit storage
181| (*) d1: lword counter
182| (*) d2: digit count
183| (*) d3: offset pointer
184| ( ) d4: words 2 and 3 of bcd
185| ( ) a0: pointer to working bcd value
186| ( ) a6: pointer to original bcd value
187| (*) fp0: mantissa accumulator
188| ( ) FP_SCR1: working copy of original bcd value
189| ( ) L_SCR1: copy of original exponent word
190|
191calc_m:
192 moveql #1,%d1 |word counter, init to 1
193 fmoves FZERO,%fp0 |accumulator
194|
195|
196| Since the packed number has a long word between the first & second parts,
197| get the integer digit then skip down & get the rest of the
198| mantissa. We will unroll the loop once.
199|
200 bfextu (%a0){#28:#4},%d0 |integer part is ls digit in long word
201 faddb %d0,%fp0 |add digit to sum in fp0
202|
203|
204| Get the rest of the mantissa.
205|
206loadlw:
207 movel (%a0,%d1.L*4),%d4 |load mantissa longword into d4
208 moveql #FSTRT,%d3 |counter to pick up digits
209 moveql #FNIBS,%d2 |reset number of digits per a0 ptr
210md2b:
211 fmuls FTEN,%fp0 |fp0 = fp0 * 10
212 bfextu %d4{%d3:#4},%d0 |get the digit and zero extend
213 faddb %d0,%fp0 |fp0 = fp0 + digit
214|
215|
216| If all the digits (8) in that long word have been converted (d2=0),
217| then inc d1 (=2) to point to the next long word and reset d3 to 0
218| to initialize the digit offset, and set d2 to 7 for the digit count;
219| else continue with this long word.
220|
221 addqb #4,%d3 |advance d3 to the next digit
222 dbf %d2,md2b |check for last digit in this lw
223nextlw:
224 addql #1,%d1 |inc lw pointer in mantissa
225 cmpl #2,%d1 |test for last lw
226 ble loadlw |if not, get last one
227
228|
229| Check the sign of the mant and make the value in fp0 the same sign.
230|
231m_sign:
232 btst #31,(%a0) |test sign of the mantissa
233 beq ap_st_z |if clear, go to append/strip zeros
234 fnegx %fp0 |if set, negate fp0
235
236|
237| Append/strip zeros:
238|
239| For adjusted exponents which have an absolute value greater than 27*,
240| this routine calculates the amount needed to normalize the mantissa
241| for the adjusted exponent. That number is subtracted from the exp
242| if the exp was positive, and added if it was negative. The purpose
243| of this is to reduce the value of the exponent and the possibility
244| of error in calculation of pwrten.
245|
246| 1. Branch on the sign of the adjusted exponent.
247| 2p.(positive exp)
248| 2. Check M16 and the digits in lwords 2 and 3 in descending order.
249| 3. Add one for each zero encountered until a non-zero digit.
250| 4. Subtract the count from the exp.
251| 5. Check if the exp has crossed zero in #3 above; make the exp abs
252| and set SE.
253| 6. Multiply the mantissa by 10**count.
254| 2n.(negative exp)
255| 2. Check the digits in lwords 3 and 2 in descending order.
256| 3. Add one for each zero encountered until a non-zero digit.
257| 4. Add the count to the exp.
258| 5. Check if the exp has crossed zero in #3 above; clear SE.
259| 6. Divide the mantissa by 10**count.
260|
261| *Why 27? If the adjusted exponent is within -28 < expA < 28, than
262| any adjustment due to append/strip zeros will drive the resultant
263| exponent towards zero. Since all pwrten constants with a power
264| of 27 or less are exact, there is no need to use this routine to
265| attempt to lessen the resultant exponent.
266|
267| Register usage:
268|
269| ap_st_z:
270| (*) d0: temp digit storage
271| (*) d1: zero count
272| (*) d2: digit count
273| (*) d3: offset pointer
274| ( ) d4: first word of bcd
275| (*) d5: lword counter
276| ( ) a0: pointer to working bcd value
277| ( ) FP_SCR1: working copy of original bcd value
278| ( ) L_SCR1: copy of original exponent word
279|
280|
281| First check the absolute value of the exponent to see if this
282| routine is necessary. If so, then check the sign of the exponent
283| and do append (+) or strip (-) zeros accordingly.
284| This section handles a positive adjusted exponent.
285|
286ap_st_z:
287 movel L_SCR1(%a6),%d1 |load expA for range test
288 cmpl #27,%d1 |test is with 27
289 ble pwrten |if abs(expA) <28, skip ap/st zeros
290 btst #30,(%a0) |check sign of exp
291 bne ap_st_n |if neg, go to neg side
292 clrl %d1 |zero count reg
293 movel (%a0),%d4 |load lword 1 to d4
294 bfextu %d4{#28:#4},%d0 |get M16 in d0
295 bnes ap_p_fx |if M16 is non-zero, go fix exp
296 addql #1,%d1 |inc zero count
297 moveql #1,%d5 |init lword counter
298 movel (%a0,%d5.L*4),%d4 |get lword 2 to d4
299 bnes ap_p_cl |if lw 2 is zero, skip it
300 addql #8,%d1 |and inc count by 8
301 addql #1,%d5 |inc lword counter
302 movel (%a0,%d5.L*4),%d4 |get lword 3 to d4
303ap_p_cl:
304 clrl %d3 |init offset reg
305 moveql #7,%d2 |init digit counter
306ap_p_gd:
307 bfextu %d4{%d3:#4},%d0 |get digit
308 bnes ap_p_fx |if non-zero, go to fix exp
309 addql #4,%d3 |point to next digit
310 addql #1,%d1 |inc digit counter
311 dbf %d2,ap_p_gd |get next digit
312ap_p_fx:
313 movel %d1,%d0 |copy counter to d2
314 movel L_SCR1(%a6),%d1 |get adjusted exp from memory
315 subl %d0,%d1 |subtract count from exp
316 bges ap_p_fm |if still pos, go to pwrten
317 negl %d1 |now its neg; get abs
318 movel (%a0),%d4 |load lword 1 to d4
319 orl #0x40000000,%d4 | and set SE in d4
320 orl #0x40000000,(%a0) | and in memory
321|
322| Calculate the mantissa multiplier to compensate for the striping of
323| zeros from the mantissa.
324|
325ap_p_fm:
326 movel #PTENRN,%a1 |get address of power-of-ten table
327 clrl %d3 |init table index
328 fmoves FONE,%fp1 |init fp1 to 1
329 moveql #3,%d2 |init d2 to count bits in counter
330ap_p_el:
331 asrl #1,%d0 |shift lsb into carry
332 bccs ap_p_en |if 1, mul fp1 by pwrten factor
333 fmulx (%a1,%d3),%fp1 |mul by 10**(d3_bit_no)
334ap_p_en:
335 addl #12,%d3 |inc d3 to next rtable entry
336 tstl %d0 |check if d0 is zero
337 bnes ap_p_el |if not, get next bit
338 fmulx %fp1,%fp0 |mul mantissa by 10**(no_bits_shifted)
339 bra pwrten |go calc pwrten
340|
341| This section handles a negative adjusted exponent.
342|
343ap_st_n:
344 clrl %d1 |clr counter
345 moveql #2,%d5 |set up d5 to point to lword 3
346 movel (%a0,%d5.L*4),%d4 |get lword 3
347 bnes ap_n_cl |if not zero, check digits
348 subl #1,%d5 |dec d5 to point to lword 2
349 addql #8,%d1 |inc counter by 8
350 movel (%a0,%d5.L*4),%d4 |get lword 2
351ap_n_cl:
352 movel #28,%d3 |point to last digit
353 moveql #7,%d2 |init digit counter
354ap_n_gd:
355 bfextu %d4{%d3:#4},%d0 |get digit
356 bnes ap_n_fx |if non-zero, go to exp fix
357 subql #4,%d3 |point to previous digit
358 addql #1,%d1 |inc digit counter
359 dbf %d2,ap_n_gd |get next digit
360ap_n_fx:
361 movel %d1,%d0 |copy counter to d0
362 movel L_SCR1(%a6),%d1 |get adjusted exp from memory
363 subl %d0,%d1 |subtract count from exp
364 bgts ap_n_fm |if still pos, go fix mantissa
365 negl %d1 |take abs of exp and clr SE
366 movel (%a0),%d4 |load lword 1 to d4
367 andl #0xbfffffff,%d4 | and clr SE in d4
368 andl #0xbfffffff,(%a0) | and in memory
369|
370| Calculate the mantissa multiplier to compensate for the appending of
371| zeros to the mantissa.
372|
373ap_n_fm:
374 movel #PTENRN,%a1 |get address of power-of-ten table
375 clrl %d3 |init table index
376 fmoves FONE,%fp1 |init fp1 to 1
377 moveql #3,%d2 |init d2 to count bits in counter
378ap_n_el:
379 asrl #1,%d0 |shift lsb into carry
380 bccs ap_n_en |if 1, mul fp1 by pwrten factor
381 fmulx (%a1,%d3),%fp1 |mul by 10**(d3_bit_no)
382ap_n_en:
383 addl #12,%d3 |inc d3 to next rtable entry
384 tstl %d0 |check if d0 is zero
385 bnes ap_n_el |if not, get next bit
386 fdivx %fp1,%fp0 |div mantissa by 10**(no_bits_shifted)
387|
388|
389| Calculate power-of-ten factor from adjusted and shifted exponent.
390|
391| Register usage:
392|
393| pwrten:
394| (*) d0: temp
395| ( ) d1: exponent
396| (*) d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
397| (*) d3: FPCR work copy
398| ( ) d4: first word of bcd
399| (*) a1: RTABLE pointer
400| calc_p:
401| (*) d0: temp
402| ( ) d1: exponent
403| (*) d3: PWRTxx table index
404| ( ) a0: pointer to working copy of bcd
405| (*) a1: PWRTxx pointer
406| (*) fp1: power-of-ten accumulator
407|
408| Pwrten calculates the exponent factor in the selected rounding mode
409| according to the following table:
410|
411| Sign of Mant Sign of Exp Rounding Mode PWRTEN Rounding Mode
412|
413| ANY ANY RN RN
414|
415| + + RP RP
416| - + RP RM
417| + - RP RM
418| - - RP RP
419|
420| + + RM RM
421| - + RM RP
422| + - RM RP
423| - - RM RM
424|
425| + + RZ RM
426| - + RZ RM
427| + - RZ RP
428| - - RZ RP
429|
430|
431pwrten:
432 movel USER_FPCR(%a6),%d3 |get user's FPCR
433 bfextu %d3{#26:#2},%d2 |isolate rounding mode bits
434 movel (%a0),%d4 |reload 1st bcd word to d4
435 asll #2,%d2 |format d2 to be
436 bfextu %d4{#0:#2},%d0 | {FPCR[6],FPCR[5],SM,SE}
437 addl %d0,%d2 |in d2 as index into RTABLE
438 leal RTABLE,%a1 |load rtable base
439 moveb (%a1,%d2),%d0 |load new rounding bits from table
440 clrl %d3 |clear d3 to force no exc and extended
441 bfins %d0,%d3{#26:#2} |stuff new rounding bits in FPCR
442 fmovel %d3,%FPCR |write new FPCR
443 asrl #1,%d0 |write correct PTENxx table
444 bccs not_rp |to a1
445 leal PTENRP,%a1 |it is RP
446 bras calc_p |go to init section
447not_rp:
448 asrl #1,%d0 |keep checking
449 bccs not_rm
450 leal PTENRM,%a1 |it is RM
451 bras calc_p |go to init section
452not_rm:
453 leal PTENRN,%a1 |it is RN
454calc_p:
455 movel %d1,%d0 |copy exp to d0;use d0
456 bpls no_neg |if exp is negative,
457 negl %d0 |invert it
458 orl #0x40000000,(%a0) |and set SE bit
459no_neg:
460 clrl %d3 |table index
461 fmoves FONE,%fp1 |init fp1 to 1
462e_loop:
463 asrl #1,%d0 |shift next bit into carry
464 bccs e_next |if zero, skip the mul
465 fmulx (%a1,%d3),%fp1 |mul by 10**(d3_bit_no)
466e_next:
467 addl #12,%d3 |inc d3 to next rtable entry
468 tstl %d0 |check if d0 is zero
469 bnes e_loop |not zero, continue shifting
470|
471|
472| Check the sign of the adjusted exp and make the value in fp0 the
473| same sign. If the exp was pos then multiply fp1*fp0;
474| else divide fp0/fp1.
475|
476| Register Usage:
477| norm:
478| ( ) a0: pointer to working bcd value
479| (*) fp0: mantissa accumulator
480| ( ) fp1: scaling factor - 10**(abs(exp))
481|
482norm:
483 btst #30,(%a0) |test the sign of the exponent
484 beqs mul |if clear, go to multiply
485div:
486 fdivx %fp1,%fp0 |exp is negative, so divide mant by exp
487 bras end_dec
488mul:
489 fmulx %fp1,%fp0 |exp is positive, so multiply by exp
490|
491|
492| Clean up and return with result in fp0.
493|
494| If the final mul/div in decbin incurred an inex exception,
495| it will be inex2, but will be reported as inex1 by get_op.
496|
497end_dec:
498 fmovel %FPSR,%d0 |get status register
499 bclrl #inex2_bit+8,%d0 |test for inex2 and clear it
500 fmovel %d0,%FPSR |return status reg w/o inex2
501 beqs no_exc |skip this if no exc
502 orl #inx1a_mask,USER_FPSR(%a6) |set inex1/ainex
503no_exc:
504 moveml (%a7)+,%d2-%d5
505 rts
506 |end
diff --git a/arch/m68k/fpsp040/do_func.S b/arch/m68k/fpsp040/do_func.S
new file mode 100644
index 000000000000..81f6a9856dce
--- /dev/null
+++ b/arch/m68k/fpsp040/do_func.S
@@ -0,0 +1,559 @@
1|
2| do_func.sa 3.4 2/18/91
3|
4| Do_func performs the unimplemented operation. The operation
5| to be performed is determined from the lower 7 bits of the
6| extension word (except in the case of fmovecr and fsincos).
7| The opcode and tag bits form an index into a jump table in
8| tbldo.sa. Cases of zero, infinity and NaN are handled in
9| do_func by forcing the default result. Normalized and
10| denormalized (there are no unnormalized numbers at this
11| point) are passed onto the emulation code.
12|
13| CMDREG1B and STAG are extracted from the fsave frame
14| and combined to form the table index. The function called
15| will start with a0 pointing to the ETEMP operand. Dyadic
16| functions can find FPTEMP at -12(a0).
17|
18| Called functions return their result in fp0. Sincos returns
19| sin(x) in fp0 and cos(x) in fp1.
20|
21
22| Copyright (C) Motorola, Inc. 1990
23| All Rights Reserved
24|
25| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
26| The copyright notice above does not evidence any
27| actual or intended publication of such source code.
28
29DO_FUNC: |idnt 2,1 | Motorola 040 Floating Point Software Package
30
31 |section 8
32
33#include "fpsp.h"
34
35 |xref t_dz2
36 |xref t_operr
37 |xref t_inx2
38 |xref t_resdnrm
39 |xref dst_nan
40 |xref src_nan
41 |xref nrm_set
42 |xref sto_cos
43
44 |xref tblpre
45 |xref slognp1,slogn,slog10,slog2
46 |xref slognd,slog10d,slog2d
47 |xref smod,srem
48 |xref sscale
49 |xref smovcr
50
51PONE: .long 0x3fff0000,0x80000000,0x00000000 |+1
52MONE: .long 0xbfff0000,0x80000000,0x00000000 |-1
53PZERO: .long 0x00000000,0x00000000,0x00000000 |+0
54MZERO: .long 0x80000000,0x00000000,0x00000000 |-0
55PINF: .long 0x7fff0000,0x00000000,0x00000000 |+inf
56MINF: .long 0xffff0000,0x00000000,0x00000000 |-inf
57QNAN: .long 0x7fff0000,0xffffffff,0xffffffff |non-signaling nan
58PPIBY2: .long 0x3FFF0000,0xC90FDAA2,0x2168C235 |+PI/2
59MPIBY2: .long 0xbFFF0000,0xC90FDAA2,0x2168C235 |-PI/2
60
61 .global do_func
62do_func:
63 clrb CU_ONLY(%a6)
64|
65| Check for fmovecr. It does not follow the format of fp gen
66| unimplemented instructions. The test is on the upper 6 bits;
67| if they are $17, the inst is fmovecr. Call entry smovcr
68| directly.
69|
70 bfextu CMDREG1B(%a6){#0:#6},%d0 |get opclass and src fields
71 cmpil #0x17,%d0 |if op class and size fields are $17,
72| ;it is FMOVECR; if not, continue
73 bnes not_fmovecr
74 jmp smovcr |fmovecr; jmp directly to emulation
75
76not_fmovecr:
77 movew CMDREG1B(%a6),%d0
78 andl #0x7F,%d0
79 cmpil #0x38,%d0 |if the extension is >= $38,
80 bge serror |it is illegal
81 bfextu STAG(%a6){#0:#3},%d1
82 lsll #3,%d0 |make room for STAG
83 addl %d1,%d0 |combine for final index into table
84 leal tblpre,%a1 |start of monster jump table
85 movel (%a1,%d0.w*4),%a1 |real target address
86 leal ETEMP(%a6),%a0 |a0 is pointer to src op
87 movel USER_FPCR(%a6),%d1
88 andl #0xFF,%d1 | discard all but rounding mode/prec
89 fmovel #0,%fpcr
90 jmp (%a1)
91|
92| ERROR
93|
94 .global serror
95serror:
96 st STORE_FLG(%a6)
97 rts
98|
99| These routines load forced values into fp0. They are called
100| by index into tbldo.
101|
102| Load a signed zero to fp0 and set inex2/ainex
103|
104 .global snzrinx
105snzrinx:
106 btstb #sign_bit,LOCAL_EX(%a0) |get sign of source operand
107 bnes ld_mzinx |if negative, branch
108 bsr ld_pzero |bsr so we can return and set inx
109 bra t_inx2 |now, set the inx for the next inst
110ld_mzinx:
111 bsr ld_mzero |if neg, load neg zero, return here
112 bra t_inx2 |now, set the inx for the next inst
113|
114| Load a signed zero to fp0; do not set inex2/ainex
115|
116 .global szero
117szero:
118 btstb #sign_bit,LOCAL_EX(%a0) |get sign of source operand
119 bne ld_mzero |if neg, load neg zero
120 bra ld_pzero |load positive zero
121|
122| Load a signed infinity to fp0; do not set inex2/ainex
123|
124 .global sinf
125sinf:
126 btstb #sign_bit,LOCAL_EX(%a0) |get sign of source operand
127 bne ld_minf |if negative branch
128 bra ld_pinf
129|
130| Load a signed one to fp0; do not set inex2/ainex
131|
132 .global sone
133sone:
134 btstb #sign_bit,LOCAL_EX(%a0) |check sign of source
135 bne ld_mone
136 bra ld_pone
137|
138| Load a signed pi/2 to fp0; do not set inex2/ainex
139|
140 .global spi_2
141spi_2:
142 btstb #sign_bit,LOCAL_EX(%a0) |check sign of source
143 bne ld_mpi2
144 bra ld_ppi2
145|
146| Load either a +0 or +inf for plus/minus operand
147|
148 .global szr_inf
149szr_inf:
150 btstb #sign_bit,LOCAL_EX(%a0) |check sign of source
151 bne ld_pzero
152 bra ld_pinf
153|
154| Result is either an operr or +inf for plus/minus operand
155| [Used by slogn, slognp1, slog10, and slog2]
156|
157 .global sopr_inf
158sopr_inf:
159 btstb #sign_bit,LOCAL_EX(%a0) |check sign of source
160 bne t_operr
161 bra ld_pinf
162|
163| FLOGNP1
164|
165 .global sslognp1
166sslognp1:
167 fmovemx (%a0),%fp0-%fp0
168 fcmpb #-1,%fp0
169 fbgt slognp1
170 fbeq t_dz2 |if = -1, divide by zero exception
171 fmovel #0,%FPSR |clr N flag
172 bra t_operr |take care of operands < -1
173|
174| FETOXM1
175|
176 .global setoxm1i
177setoxm1i:
178 btstb #sign_bit,LOCAL_EX(%a0) |check sign of source
179 bne ld_mone
180 bra ld_pinf
181|
182| FLOGN
183|
184| Test for 1.0 as an input argument, returning +zero. Also check
185| the sign and return operr if negative.
186|
187 .global sslogn
188sslogn:
189 btstb #sign_bit,LOCAL_EX(%a0)
190 bne t_operr |take care of operands < 0
191 cmpiw #0x3fff,LOCAL_EX(%a0) |test for 1.0 input
192 bne slogn
193 cmpil #0x80000000,LOCAL_HI(%a0)
194 bne slogn
195 tstl LOCAL_LO(%a0)
196 bne slogn
197 fmovex PZERO,%fp0
198 rts
199
200 .global sslognd
201sslognd:
202 btstb #sign_bit,LOCAL_EX(%a0)
203 beq slognd
204 bra t_operr |take care of operands < 0
205
206|
207| FLOG10
208|
209 .global sslog10
210sslog10:
211 btstb #sign_bit,LOCAL_EX(%a0)
212 bne t_operr |take care of operands < 0
213 cmpiw #0x3fff,LOCAL_EX(%a0) |test for 1.0 input
214 bne slog10
215 cmpil #0x80000000,LOCAL_HI(%a0)
216 bne slog10
217 tstl LOCAL_LO(%a0)
218 bne slog10
219 fmovex PZERO,%fp0
220 rts
221
222 .global sslog10d
223sslog10d:
224 btstb #sign_bit,LOCAL_EX(%a0)
225 beq slog10d
226 bra t_operr |take care of operands < 0
227
228|
229| FLOG2
230|
231 .global sslog2
232sslog2:
233 btstb #sign_bit,LOCAL_EX(%a0)
234 bne t_operr |take care of operands < 0
235 cmpiw #0x3fff,LOCAL_EX(%a0) |test for 1.0 input
236 bne slog2
237 cmpil #0x80000000,LOCAL_HI(%a0)
238 bne slog2
239 tstl LOCAL_LO(%a0)
240 bne slog2
241 fmovex PZERO,%fp0
242 rts
243
244 .global sslog2d
245sslog2d:
246 btstb #sign_bit,LOCAL_EX(%a0)
247 beq slog2d
248 bra t_operr |take care of operands < 0
249
250|
251| FMOD
252|
253pmodt:
254| ;$21 fmod
255| ;dtag,stag
256 .long smod | 00,00 norm,norm = normal
257 .long smod_oper | 00,01 norm,zero = nan with operr
258 .long smod_fpn | 00,10 norm,inf = fpn
259 .long smod_snan | 00,11 norm,nan = nan
260 .long smod_zro | 01,00 zero,norm = +-zero
261 .long smod_oper | 01,01 zero,zero = nan with operr
262 .long smod_zro | 01,10 zero,inf = +-zero
263 .long smod_snan | 01,11 zero,nan = nan
264 .long smod_oper | 10,00 inf,norm = nan with operr
265 .long smod_oper | 10,01 inf,zero = nan with operr
266 .long smod_oper | 10,10 inf,inf = nan with operr
267 .long smod_snan | 10,11 inf,nan = nan
268 .long smod_dnan | 11,00 nan,norm = nan
269 .long smod_dnan | 11,01 nan,zero = nan
270 .long smod_dnan | 11,10 nan,inf = nan
271 .long smod_dnan | 11,11 nan,nan = nan
272
273 .global pmod
274pmod:
275 clrb FPSR_QBYTE(%a6) | clear quotient field
276 bfextu STAG(%a6){#0:#3},%d0 |stag = d0
277 bfextu DTAG(%a6){#0:#3},%d1 |dtag = d1
278
279|
280| Alias extended denorms to norms for the jump table.
281|
282 bclrl #2,%d0
283 bclrl #2,%d1
284
285 lslb #2,%d1
286 orb %d0,%d1 |d1{3:2} = dtag, d1{1:0} = stag
287| ;Tag values:
288| ;00 = norm or denorm
289| ;01 = zero
290| ;10 = inf
291| ;11 = nan
292 lea pmodt,%a1
293 movel (%a1,%d1.w*4),%a1
294 jmp (%a1)
295
296smod_snan:
297 bra src_nan
298smod_dnan:
299 bra dst_nan
300smod_oper:
301 bra t_operr
302smod_zro:
303 moveb ETEMP(%a6),%d1 |get sign of src op
304 moveb FPTEMP(%a6),%d0 |get sign of dst op
305 eorb %d0,%d1 |get exor of sign bits
306 btstl #7,%d1 |test for sign
307 beqs smod_zsn |if clr, do not set sign big
308 bsetb #q_sn_bit,FPSR_QBYTE(%a6) |set q-byte sign bit
309smod_zsn:
310 btstl #7,%d0 |test if + or -
311 beq ld_pzero |if pos then load +0
312 bra ld_mzero |else neg load -0
313
314smod_fpn:
315 moveb ETEMP(%a6),%d1 |get sign of src op
316 moveb FPTEMP(%a6),%d0 |get sign of dst op
317 eorb %d0,%d1 |get exor of sign bits
318 btstl #7,%d1 |test for sign
319 beqs smod_fsn |if clr, do not set sign big
320 bsetb #q_sn_bit,FPSR_QBYTE(%a6) |set q-byte sign bit
321smod_fsn:
322 tstb DTAG(%a6) |filter out denormal destination case
323 bpls smod_nrm |
324 leal FPTEMP(%a6),%a0 |a0<- addr(FPTEMP)
325 bra t_resdnrm |force UNFL(but exact) result
326smod_nrm:
327 fmovel USER_FPCR(%a6),%fpcr |use user's rmode and precision
328 fmovex FPTEMP(%a6),%fp0 |return dest to fp0
329 rts
330
331|
332| FREM
333|
334premt:
335| ;$25 frem
336| ;dtag,stag
337 .long srem | 00,00 norm,norm = normal
338 .long srem_oper | 00,01 norm,zero = nan with operr
339 .long srem_fpn | 00,10 norm,inf = fpn
340 .long srem_snan | 00,11 norm,nan = nan
341 .long srem_zro | 01,00 zero,norm = +-zero
342 .long srem_oper | 01,01 zero,zero = nan with operr
343 .long srem_zro | 01,10 zero,inf = +-zero
344 .long srem_snan | 01,11 zero,nan = nan
345 .long srem_oper | 10,00 inf,norm = nan with operr
346 .long srem_oper | 10,01 inf,zero = nan with operr
347 .long srem_oper | 10,10 inf,inf = nan with operr
348 .long srem_snan | 10,11 inf,nan = nan
349 .long srem_dnan | 11,00 nan,norm = nan
350 .long srem_dnan | 11,01 nan,zero = nan
351 .long srem_dnan | 11,10 nan,inf = nan
352 .long srem_dnan | 11,11 nan,nan = nan
353
354 .global prem
355prem:
356 clrb FPSR_QBYTE(%a6) |clear quotient field
357 bfextu STAG(%a6){#0:#3},%d0 |stag = d0
358 bfextu DTAG(%a6){#0:#3},%d1 |dtag = d1
359|
360| Alias extended denorms to norms for the jump table.
361|
362 bclr #2,%d0
363 bclr #2,%d1
364
365 lslb #2,%d1
366 orb %d0,%d1 |d1{3:2} = dtag, d1{1:0} = stag
367| ;Tag values:
368| ;00 = norm or denorm
369| ;01 = zero
370| ;10 = inf
371| ;11 = nan
372 lea premt,%a1
373 movel (%a1,%d1.w*4),%a1
374 jmp (%a1)
375
376srem_snan:
377 bra src_nan
378srem_dnan:
379 bra dst_nan
380srem_oper:
381 bra t_operr
382srem_zro:
383 moveb ETEMP(%a6),%d1 |get sign of src op
384 moveb FPTEMP(%a6),%d0 |get sign of dst op
385 eorb %d0,%d1 |get exor of sign bits
386 btstl #7,%d1 |test for sign
387 beqs srem_zsn |if clr, do not set sign big
388 bsetb #q_sn_bit,FPSR_QBYTE(%a6) |set q-byte sign bit
389srem_zsn:
390 btstl #7,%d0 |test if + or -
391 beq ld_pzero |if pos then load +0
392 bra ld_mzero |else neg load -0
393
394srem_fpn:
395 moveb ETEMP(%a6),%d1 |get sign of src op
396 moveb FPTEMP(%a6),%d0 |get sign of dst op
397 eorb %d0,%d1 |get exor of sign bits
398 btstl #7,%d1 |test for sign
399 beqs srem_fsn |if clr, do not set sign big
400 bsetb #q_sn_bit,FPSR_QBYTE(%a6) |set q-byte sign bit
401srem_fsn:
402 tstb DTAG(%a6) |filter out denormal destination case
403 bpls srem_nrm |
404 leal FPTEMP(%a6),%a0 |a0<- addr(FPTEMP)
405 bra t_resdnrm |force UNFL(but exact) result
406srem_nrm:
407 fmovel USER_FPCR(%a6),%fpcr |use user's rmode and precision
408 fmovex FPTEMP(%a6),%fp0 |return dest to fp0
409 rts
410|
411| FSCALE
412|
413pscalet:
414| ;$26 fscale
415| ;dtag,stag
416 .long sscale | 00,00 norm,norm = result
417 .long sscale | 00,01 norm,zero = fpn
418 .long scl_opr | 00,10 norm,inf = nan with operr
419 .long scl_snan | 00,11 norm,nan = nan
420 .long scl_zro | 01,00 zero,norm = +-zero
421 .long scl_zro | 01,01 zero,zero = +-zero
422 .long scl_opr | 01,10 zero,inf = nan with operr
423 .long scl_snan | 01,11 zero,nan = nan
424 .long scl_inf | 10,00 inf,norm = +-inf
425 .long scl_inf | 10,01 inf,zero = +-inf
426 .long scl_opr | 10,10 inf,inf = nan with operr
427 .long scl_snan | 10,11 inf,nan = nan
428 .long scl_dnan | 11,00 nan,norm = nan
429 .long scl_dnan | 11,01 nan,zero = nan
430 .long scl_dnan | 11,10 nan,inf = nan
431 .long scl_dnan | 11,11 nan,nan = nan
432
433 .global pscale
434pscale:
435 bfextu STAG(%a6){#0:#3},%d0 |stag in d0
436 bfextu DTAG(%a6){#0:#3},%d1 |dtag in d1
437 bclrl #2,%d0 |alias denorm into norm
438 bclrl #2,%d1 |alias denorm into norm
439 lslb #2,%d1
440 orb %d0,%d1 |d1{4:2} = dtag, d1{1:0} = stag
441| ;dtag values stag values:
442| ;000 = norm 00 = norm
443| ;001 = zero 01 = zero
444| ;010 = inf 10 = inf
445| ;011 = nan 11 = nan
446| ;100 = dnrm
447|
448|
449 leal pscalet,%a1 |load start of jump table
450 movel (%a1,%d1.w*4),%a1 |load a1 with label depending on tag
451 jmp (%a1) |go to the routine
452
453scl_opr:
454 bra t_operr
455
456scl_dnan:
457 bra dst_nan
458
459scl_zro:
460 btstb #sign_bit,FPTEMP_EX(%a6) |test if + or -
461 beq ld_pzero |if pos then load +0
462 bra ld_mzero |if neg then load -0
463scl_inf:
464 btstb #sign_bit,FPTEMP_EX(%a6) |test if + or -
465 beq ld_pinf |if pos then load +inf
466 bra ld_minf |else neg load -inf
467scl_snan:
468 bra src_nan
469|
470| FSINCOS
471|
472 .global ssincosz
473ssincosz:
474 btstb #sign_bit,ETEMP(%a6) |get sign
475 beqs sincosp
476 fmovex MZERO,%fp0
477 bras sincoscom
478sincosp:
479 fmovex PZERO,%fp0
480sincoscom:
481 fmovemx PONE,%fp1-%fp1 |do not allow FPSR to be affected
482 bra sto_cos |store cosine result
483
484 .global ssincosi
485ssincosi:
486 fmovex QNAN,%fp1 |load NAN
487 bsr sto_cos |store cosine result
488 fmovex QNAN,%fp0 |load NAN
489 bra t_operr
490
491 .global ssincosnan
492ssincosnan:
493 movel ETEMP_EX(%a6),FP_SCR1(%a6)
494 movel ETEMP_HI(%a6),FP_SCR1+4(%a6)
495 movel ETEMP_LO(%a6),FP_SCR1+8(%a6)
496 bsetb #signan_bit,FP_SCR1+4(%a6)
497 fmovemx FP_SCR1(%a6),%fp1-%fp1
498 bsr sto_cos
499 bra src_nan
500|
501| This code forces default values for the zero, inf, and nan cases
502| in the transcendentals code. The CC bits must be set in the
503| stacked FPSR to be correctly reported.
504|
505|**Returns +PI/2
506 .global ld_ppi2
507ld_ppi2:
508 fmovex PPIBY2,%fp0 |load +pi/2
509 bra t_inx2 |set inex2 exc
510
511|**Returns -PI/2
512 .global ld_mpi2
513ld_mpi2:
514 fmovex MPIBY2,%fp0 |load -pi/2
515 orl #neg_mask,USER_FPSR(%a6) |set N bit
516 bra t_inx2 |set inex2 exc
517
518|**Returns +inf
519 .global ld_pinf
520ld_pinf:
521 fmovex PINF,%fp0 |load +inf
522 orl #inf_mask,USER_FPSR(%a6) |set I bit
523 rts
524
525|**Returns -inf
526 .global ld_minf
527ld_minf:
528 fmovex MINF,%fp0 |load -inf
529 orl #neg_mask+inf_mask,USER_FPSR(%a6) |set N and I bits
530 rts
531
532|**Returns +1
533 .global ld_pone
534ld_pone:
535 fmovex PONE,%fp0 |load +1
536 rts
537
538|**Returns -1
539 .global ld_mone
540ld_mone:
541 fmovex MONE,%fp0 |load -1
542 orl #neg_mask,USER_FPSR(%a6) |set N bit
543 rts
544
545|**Returns +0
546 .global ld_pzero
547ld_pzero:
548 fmovex PZERO,%fp0 |load +0
549 orl #z_mask,USER_FPSR(%a6) |set Z bit
550 rts
551
552|**Returns -0
553 .global ld_mzero
554ld_mzero:
555 fmovex MZERO,%fp0 |load -0
556 orl #neg_mask+z_mask,USER_FPSR(%a6) |set N and Z bits
557 rts
558
559 |end
diff --git a/arch/m68k/fpsp040/fpsp.h b/arch/m68k/fpsp040/fpsp.h
new file mode 100644
index 000000000000..984a4eb8010a
--- /dev/null
+++ b/arch/m68k/fpsp040/fpsp.h
@@ -0,0 +1,348 @@
1|
2| fpsp.h 3.3 3.3
3|
4
5| Copyright (C) Motorola, Inc. 1990
6| All Rights Reserved
7|
8| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
9| The copyright notice above does not evidence any
10| actual or intended publication of such source code.
11
12| fpsp.h --- stack frame offsets during FPSP exception handling
13|
14| These equates are used to access the exception frame, the fsave
15| frame and any local variables needed by the FPSP package.
16|
17| All FPSP handlers begin by executing:
18|
19| link a6,#-LOCAL_SIZE
20| fsave -(a7)
21| movem.l d0-d1/a0-a1,USER_DA(a6)
22| fmovem.x fp0-fp3,USER_FP0(a6)
23| fmove.l fpsr/fpcr/fpiar,USER_FPSR(a6)
24|
25| After initialization, the stack looks like this:
26|
27| A7 ---> +-------------------------------+
28| | |
29| | FPU fsave area |
30| | |
31| +-------------------------------+
32| | |
33| | FPSP Local Variables |
34| | including |
35| | saved registers |
36| | |
37| +-------------------------------+
38| A6 ---> | Saved A6 |
39| +-------------------------------+
40| | |
41| | Exception Frame |
42| | |
43| | |
44|
45| Positive offsets from A6 refer to the exception frame. Negative
46| offsets refer to the Local Variable area and the fsave area.
47| The fsave frame is also accessible from the top via A7.
48|
49| On exit, the handlers execute:
50|
51| movem.l USER_DA(a6),d0-d1/a0-a1
52| fmovem.x USER_FP0(a6),fp0-fp3
53| fmove.l USER_FPSR(a6),fpsr/fpcr/fpiar
54| frestore (a7)+
55| unlk a6
56|
57| and then either "bra fpsp_done" if the exception was completely
58| handled by the package, or "bra real_xxxx" which is an external
59| label to a routine that will process a real exception of the
60| type that was generated. Some handlers may omit the "frestore"
61| if the FPU state after the exception is idle.
62|
63| Sometimes the exception handler will transform the fsave area
64| because it needs to report an exception back to the user. This
65| can happen if the package is entered for an unimplemented float
66| instruction that generates (say) an underflow. Alternatively,
67| a second fsave frame can be pushed onto the stack and the
68| handler exit code will reload the new frame and discard the old.
69|
70| The registers d0, d1, a0, a1 and fp0-fp3 are always saved and
71| restored from the "local variable" area and can be used as
72| temporaries. If a routine needs to change any
73| of these registers, it should modify the saved copy and let
74| the handler exit code restore the value.
75|
76|----------------------------------------------------------------------
77|
78| Local Variables on the stack
79|
80 .set LOCAL_SIZE,192 | bytes needed for local variables
81 .set LV,-LOCAL_SIZE | convenient base value
82|
83 .set USER_DA,LV+0 | save space for D0-D1,A0-A1
84 .set USER_D0,LV+0 | saved user D0
85 .set USER_D1,LV+4 | saved user D1
86 .set USER_A0,LV+8 | saved user A0
87 .set USER_A1,LV+12 | saved user A1
88 .set USER_FP0,LV+16 | saved user FP0
89 .set USER_FP1,LV+28 | saved user FP1
90 .set USER_FP2,LV+40 | saved user FP2
91 .set USER_FP3,LV+52 | saved user FP3
92 .set USER_FPCR,LV+64 | saved user FPCR
93 .set FPCR_ENABLE,USER_FPCR+2 | FPCR exception enable
94 .set FPCR_MODE,USER_FPCR+3 | FPCR rounding mode control
95 .set USER_FPSR,LV+68 | saved user FPSR
96 .set FPSR_CC,USER_FPSR+0 | FPSR condition code
97 .set FPSR_QBYTE,USER_FPSR+1 | FPSR quotient
98 .set FPSR_EXCEPT,USER_FPSR+2 | FPSR exception
99 .set FPSR_AEXCEPT,USER_FPSR+3 | FPSR accrued exception
100 .set USER_FPIAR,LV+72 | saved user FPIAR
101 .set FP_SCR1,LV+76 | room for a temporary float value
102 .set FP_SCR2,LV+92 | room for a temporary float value
103 .set L_SCR1,LV+108 | room for a temporary long value
104 .set L_SCR2,LV+112 | room for a temporary long value
105 .set STORE_FLG,LV+116
106 .set BINDEC_FLG,LV+117 | used in bindec
107 .set DNRM_FLG,LV+118 | used in res_func
108 .set RES_FLG,LV+119 | used in res_func
109 .set DY_MO_FLG,LV+120 | dyadic/monadic flag
110 .set UFLG_TMP,LV+121 | temporary for uflag errata
111 .set CU_ONLY,LV+122 | cu-only flag
112 .set VER_TMP,LV+123 | temp holding for version number
113 .set L_SCR3,LV+124 | room for a temporary long value
114 .set FP_SCR3,LV+128 | room for a temporary float value
115 .set FP_SCR4,LV+144 | room for a temporary float value
116 .set FP_SCR5,LV+160 | room for a temporary float value
117 .set FP_SCR6,LV+176
118|
119|NEXT equ LV+192 ;need to increase LOCAL_SIZE
120|
121|--------------------------------------------------------------------------
122|
123| fsave offsets and bit definitions
124|
125| Offsets are defined from the end of an fsave because the last 10
126| words of a busy frame are the same as the unimplemented frame.
127|
128 .set CU_SAVEPC,LV-92 | micro-pc for CU (1 byte)
129 .set FPR_DIRTY_BITS,LV-91 | fpr dirty bits
130|
131 .set WBTEMP,LV-76 | write back temp (12 bytes)
132 .set WBTEMP_EX,WBTEMP | wbtemp sign and exponent (2 bytes)
133 .set WBTEMP_HI,WBTEMP+4 | wbtemp mantissa [63:32] (4 bytes)
134 .set WBTEMP_LO,WBTEMP+8 | wbtemp mantissa [31:00] (4 bytes)
135|
136 .set WBTEMP_SGN,WBTEMP+2 | used to store sign
137|
138 .set FPSR_SHADOW,LV-64 | fpsr shadow reg
139|
140 .set FPIARCU,LV-60 | Instr. addr. reg. for CU (4 bytes)
141|
142 .set CMDREG2B,LV-52 | cmd reg for machine 2
143 .set CMDREG3B,LV-48 | cmd reg for E3 exceptions (2 bytes)
144|
145 .set NMNEXC,LV-44 | NMNEXC (unsup,snan bits only)
146 .set nmn_unsup_bit,1 |
147 .set nmn_snan_bit,0 |
148|
149 .set NMCEXC,LV-43 | NMNEXC & NMCEXC
150 .set nmn_operr_bit,7
151 .set nmn_ovfl_bit,6
152 .set nmn_unfl_bit,5
153 .set nmc_unsup_bit,4
154 .set nmc_snan_bit,3
155 .set nmc_operr_bit,2
156 .set nmc_ovfl_bit,1
157 .set nmc_unfl_bit,0
158|
159 .set STAG,LV-40 | source tag (1 byte)
160 .set WBTEMP_GRS,LV-40 | alias wbtemp guard, round, sticky
161 .set guard_bit,1 | guard bit is bit number 1
162 .set round_bit,0 | round bit is bit number 0
163 .set stag_mask,0xE0 | upper 3 bits are source tag type
164 .set denorm_bit,7 | bit determines if denorm or unnorm
165 .set etemp15_bit,4 | etemp exponent bit #15
166 .set wbtemp66_bit,2 | wbtemp mantissa bit #66
167 .set wbtemp1_bit,1 | wbtemp mantissa bit #1
168 .set wbtemp0_bit,0 | wbtemp mantissa bit #0
169|
170 .set STICKY,LV-39 | holds sticky bit
171 .set sticky_bit,7
172|
173 .set CMDREG1B,LV-36 | cmd reg for E1 exceptions (2 bytes)
174 .set kfact_bit,12 | distinguishes static/dynamic k-factor
175| ;on packed move outs. NOTE: this
176| ;equate only works when CMDREG1B is in
177| ;a register.
178|
179 .set CMDWORD,LV-35 | command word in cmd1b
180 .set direction_bit,5 | bit 0 in opclass
181 .set size_bit2,12 | bit 2 in size field
182|
183 .set DTAG,LV-32 | dest tag (1 byte)
184 .set dtag_mask,0xE0 | upper 3 bits are dest type tag
185 .set fptemp15_bit,4 | fptemp exponent bit #15
186|
187 .set WB_BYTE,LV-31 | holds WBTE15 bit (1 byte)
188 .set wbtemp15_bit,4 | wbtemp exponent bit #15
189|
190 .set E_BYTE,LV-28 | holds E1 and E3 bits (1 byte)
191 .set E1,2 | which bit is E1 flag
192 .set E3,1 | which bit is E3 flag
193 .set SFLAG,0 | which bit is S flag
194|
195 .set T_BYTE,LV-27 | holds T and U bits (1 byte)
196 .set XFLAG,7 | which bit is X flag
197 .set UFLAG,5 | which bit is U flag
198 .set TFLAG,4 | which bit is T flag
199|
200 .set FPTEMP,LV-24 | fptemp (12 bytes)
201 .set FPTEMP_EX,FPTEMP | fptemp sign and exponent (2 bytes)
202 .set FPTEMP_HI,FPTEMP+4 | fptemp mantissa [63:32] (4 bytes)
203 .set FPTEMP_LO,FPTEMP+8 | fptemp mantissa [31:00] (4 bytes)
204|
205 .set FPTEMP_SGN,FPTEMP+2 | used to store sign
206|
207 .set ETEMP,LV-12 | etemp (12 bytes)
208 .set ETEMP_EX,ETEMP | etemp sign and exponent (2 bytes)
209 .set ETEMP_HI,ETEMP+4 | etemp mantissa [63:32] (4 bytes)
210 .set ETEMP_LO,ETEMP+8 | etemp mantissa [31:00] (4 bytes)
211|
212 .set ETEMP_SGN,ETEMP+2 | used to store sign
213|
214 .set EXC_SR,4 | exception frame status register
215 .set EXC_PC,6 | exception frame program counter
216 .set EXC_VEC,10 | exception frame vector (format+vector#)
217 .set EXC_EA,12 | exception frame effective address
218|
219|--------------------------------------------------------------------------
220|
221| FPSR/FPCR bits
222|
223 .set neg_bit,3 | negative result
224 .set z_bit,2 | zero result
225 .set inf_bit,1 | infinity result
226 .set nan_bit,0 | not-a-number result
227|
228 .set q_sn_bit,7 | sign bit of quotient byte
229|
230 .set bsun_bit,7 | branch on unordered
231 .set snan_bit,6 | signalling nan
232 .set operr_bit,5 | operand error
233 .set ovfl_bit,4 | overflow
234 .set unfl_bit,3 | underflow
235 .set dz_bit,2 | divide by zero
236 .set inex2_bit,1 | inexact result 2
237 .set inex1_bit,0 | inexact result 1
238|
239 .set aiop_bit,7 | accrued illegal operation
240 .set aovfl_bit,6 | accrued overflow
241 .set aunfl_bit,5 | accrued underflow
242 .set adz_bit,4 | accrued divide by zero
243 .set ainex_bit,3 | accrued inexact
244|
245| FPSR individual bit masks
246|
247 .set neg_mask,0x08000000
248 .set z_mask,0x04000000
249 .set inf_mask,0x02000000
250 .set nan_mask,0x01000000
251|
252 .set bsun_mask,0x00008000 |
253 .set snan_mask,0x00004000
254 .set operr_mask,0x00002000
255 .set ovfl_mask,0x00001000
256 .set unfl_mask,0x00000800
257 .set dz_mask,0x00000400
258 .set inex2_mask,0x00000200
259 .set inex1_mask,0x00000100
260|
261 .set aiop_mask,0x00000080 | accrued illegal operation
262 .set aovfl_mask,0x00000040 | accrued overflow
263 .set aunfl_mask,0x00000020 | accrued underflow
264 .set adz_mask,0x00000010 | accrued divide by zero
265 .set ainex_mask,0x00000008 | accrued inexact
266|
267| FPSR combinations used in the FPSP
268|
269 .set dzinf_mask,inf_mask+dz_mask+adz_mask
270 .set opnan_mask,nan_mask+operr_mask+aiop_mask
271 .set nzi_mask,0x01ffffff | clears N, Z, and I
272 .set unfinx_mask,unfl_mask+inex2_mask+aunfl_mask+ainex_mask
273 .set unf2inx_mask,unfl_mask+inex2_mask+ainex_mask
274 .set ovfinx_mask,ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
275 .set inx1a_mask,inex1_mask+ainex_mask
276 .set inx2a_mask,inex2_mask+ainex_mask
277 .set snaniop_mask,nan_mask+snan_mask+aiop_mask
278 .set naniop_mask,nan_mask+aiop_mask
279 .set neginf_mask,neg_mask+inf_mask
280 .set infaiop_mask,inf_mask+aiop_mask
281 .set negz_mask,neg_mask+z_mask
282 .set opaop_mask,operr_mask+aiop_mask
283 .set unfl_inx_mask,unfl_mask+aunfl_mask+ainex_mask
284 .set ovfl_inx_mask,ovfl_mask+aovfl_mask+ainex_mask
285|
286|--------------------------------------------------------------------------
287|
288| FPCR rounding modes
289|
290 .set x_mode,0x00 | round to extended
291 .set s_mode,0x40 | round to single
292 .set d_mode,0x80 | round to double
293|
294 .set rn_mode,0x00 | round nearest
295 .set rz_mode,0x10 | round to zero
296 .set rm_mode,0x20 | round to minus infinity
297 .set rp_mode,0x30 | round to plus infinity
298|
299|--------------------------------------------------------------------------
300|
301| Miscellaneous equates
302|
303 .set signan_bit,6 | signalling nan bit in mantissa
304 .set sign_bit,7
305|
306 .set rnd_stky_bit,29 | round/sticky bit of mantissa
307| this can only be used if in a data register
308 .set sx_mask,0x01800000 | set s and x bits in word $48
309|
310 .set LOCAL_EX,0
311 .set LOCAL_SGN,2
312 .set LOCAL_HI,4
313 .set LOCAL_LO,8
314 .set LOCAL_GRS,12 | valid ONLY for FP_SCR1, FP_SCR2
315|
316|
317 .set norm_tag,0x00 | tag bits in {7:5} position
318 .set zero_tag,0x20
319 .set inf_tag,0x40
320 .set nan_tag,0x60
321 .set dnrm_tag,0x80
322|
323| fsave sizes and formats
324|
325 .set VER_4,0x40 | fpsp compatible version numbers
326| are in the $40s {$40-$4f}
327 .set VER_40,0x40 | original version number
328 .set VER_41,0x41 | revision version number
329|
330 .set BUSY_SIZE,100 | size of busy frame
331 .set BUSY_FRAME,LV-BUSY_SIZE | start of busy frame
332|
333 .set UNIMP_40_SIZE,44 | size of orig unimp frame
334 .set UNIMP_41_SIZE,52 | size of rev unimp frame
335|
336 .set IDLE_SIZE,4 | size of idle frame
337 .set IDLE_FRAME,LV-IDLE_SIZE | start of idle frame
338|
339| exception vectors
340|
341 .set TRACE_VEC,0x2024 | trace trap
342 .set FLINE_VEC,0x002C | real F-line
343 .set UNIMP_VEC,0x202C | unimplemented
344 .set INEX_VEC,0x00C4
345|
346 .set dbl_thresh,0x3C01
347 .set sgl_thresh,0x3F81
348|
diff --git a/arch/m68k/fpsp040/gen_except.S b/arch/m68k/fpsp040/gen_except.S
new file mode 100644
index 000000000000..401d06f39f73
--- /dev/null
+++ b/arch/m68k/fpsp040/gen_except.S
@@ -0,0 +1,468 @@
1|
2| gen_except.sa 3.7 1/16/92
3|
4| gen_except --- FPSP routine to detect reportable exceptions
5|
6| This routine compares the exception enable byte of the
7| user_fpcr on the stack with the exception status byte
8| of the user_fpsr.
9|
10| Any routine which may report an exceptions must load
11| the stack frame in memory with the exceptional operand(s).
12|
13| Priority for exceptions is:
14|
15| Highest: bsun
16| snan
17| operr
18| ovfl
19| unfl
20| dz
21| inex2
22| Lowest: inex1
23|
24| Note: The IEEE standard specifies that inex2 is to be
25| reported if ovfl occurs and the ovfl enable bit is not
26| set but the inex2 enable bit is.
27|
28|
29| Copyright (C) Motorola, Inc. 1990
30| All Rights Reserved
31|
32| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
33| The copyright notice above does not evidence any
34| actual or intended publication of such source code.
35
36GEN_EXCEPT: |idnt 2,1 | Motorola 040 Floating Point Software Package
37
38 |section 8
39
40#include "fpsp.h"
41
42 |xref real_trace
43 |xref fpsp_done
44 |xref fpsp_fmt_error
45
46exc_tbl:
47 .long bsun_exc
48 .long commonE1
49 .long commonE1
50 .long ovfl_unfl
51 .long ovfl_unfl
52 .long commonE1
53 .long commonE3
54 .long commonE3
55 .long no_match
56
57 .global gen_except
58gen_except:
59 cmpib #IDLE_SIZE-4,1(%a7) |test for idle frame
60 beq do_check |go handle idle frame
61 cmpib #UNIMP_40_SIZE-4,1(%a7) |test for orig unimp frame
62 beqs unimp_x |go handle unimp frame
63 cmpib #UNIMP_41_SIZE-4,1(%a7) |test for rev unimp frame
64 beqs unimp_x |go handle unimp frame
65 cmpib #BUSY_SIZE-4,1(%a7) |if size <> $60, fmt error
66 bnel fpsp_fmt_error
67 leal BUSY_SIZE+LOCAL_SIZE(%a7),%a1 |init a1 so fpsp.h
68| ;equates will work
69| Fix up the new busy frame with entries from the unimp frame
70|
71 movel ETEMP_EX(%a6),ETEMP_EX(%a1) |copy etemp from unimp
72 movel ETEMP_HI(%a6),ETEMP_HI(%a1) |frame to busy frame
73 movel ETEMP_LO(%a6),ETEMP_LO(%a1)
74 movel CMDREG1B(%a6),CMDREG1B(%a1) |set inst in frame to unimp
75 movel CMDREG1B(%a6),%d0 |fix cmd1b to make it
76 andl #0x03c30000,%d0 |work for cmd3b
77 bfextu CMDREG1B(%a6){#13:#1},%d1 |extract bit 2
78 lsll #5,%d1
79 swap %d1
80 orl %d1,%d0 |put it in the right place
81 bfextu CMDREG1B(%a6){#10:#3},%d1 |extract bit 3,4,5
82 lsll #2,%d1
83 swap %d1
84 orl %d1,%d0 |put them in the right place
85 movel %d0,CMDREG3B(%a1) |in the busy frame
86|
87| Or in the FPSR from the emulation with the USER_FPSR on the stack.
88|
89 fmovel %FPSR,%d0
90 orl %d0,USER_FPSR(%a6)
91 movel USER_FPSR(%a6),FPSR_SHADOW(%a1) |set exc bits
92 orl #sx_mask,E_BYTE(%a1)
93 bra do_clean
94
95|
96| Frame is an unimp frame possible resulting from an fmove <ea>,fp0
97| that caused an exception
98|
99| a1 is modified to point into the new frame allowing fpsp equates
100| to be valid.
101|
102unimp_x:
103 cmpib #UNIMP_40_SIZE-4,1(%a7) |test for orig unimp frame
104 bnes test_rev
105 leal UNIMP_40_SIZE+LOCAL_SIZE(%a7),%a1
106 bras unimp_con
107test_rev:
108 cmpib #UNIMP_41_SIZE-4,1(%a7) |test for rev unimp frame
109 bnel fpsp_fmt_error |if not $28 or $30
110 leal UNIMP_41_SIZE+LOCAL_SIZE(%a7),%a1
111
112unimp_con:
113|
114| Fix up the new unimp frame with entries from the old unimp frame
115|
116 movel CMDREG1B(%a6),CMDREG1B(%a1) |set inst in frame to unimp
117|
118| Or in the FPSR from the emulation with the USER_FPSR on the stack.
119|
120 fmovel %FPSR,%d0
121 orl %d0,USER_FPSR(%a6)
122 bra do_clean
123
124|
125| Frame is idle, so check for exceptions reported through
126| USER_FPSR and set the unimp frame accordingly.
127| A7 must be incremented to the point before the
128| idle fsave vector to the unimp vector.
129|
130
131do_check:
132 addl #4,%a7 |point A7 back to unimp frame
133|
134| Or in the FPSR from the emulation with the USER_FPSR on the stack.
135|
136 fmovel %FPSR,%d0
137 orl %d0,USER_FPSR(%a6)
138|
139| On a busy frame, we must clear the nmnexc bits.
140|
141 cmpib #BUSY_SIZE-4,1(%a7) |check frame type
142 bnes check_fr |if busy, clr nmnexc
143 clrw NMNEXC(%a6) |clr nmnexc & nmcexc
144 btstb #5,CMDREG1B(%a6) |test for fmove out
145 bnes frame_com
146 movel USER_FPSR(%a6),FPSR_SHADOW(%a6) |set exc bits
147 orl #sx_mask,E_BYTE(%a6)
148 bras frame_com
149check_fr:
150 cmpb #UNIMP_40_SIZE-4,1(%a7)
151 beqs frame_com
152 clrw NMNEXC(%a6)
153frame_com:
154 moveb FPCR_ENABLE(%a6),%d0 |get fpcr enable byte
155 andb FPSR_EXCEPT(%a6),%d0 |and in the fpsr exc byte
156 bfffo %d0{#24:#8},%d1 |test for first set bit
157 leal exc_tbl,%a0 |load jmp table address
158 subib #24,%d1 |normalize bit offset to 0-8
159 movel (%a0,%d1.w*4),%a0 |load routine address based
160| ;based on first enabled exc
161 jmp (%a0) |jump to routine
162|
163| Bsun is not possible in unimp or unsupp
164|
165bsun_exc:
166 bra do_clean
167|
168| The typical work to be done to the unimp frame to report an
169| exception is to set the E1/E3 byte and clr the U flag.
170| commonE1 does this for E1 exceptions, which are snan,
171| operr, and dz. commonE3 does this for E3 exceptions, which
172| are inex2 and inex1, and also clears the E1 exception bit
173| left over from the unimp exception.
174|
175commonE1:
176 bsetb #E1,E_BYTE(%a6) |set E1 flag
177 bra commonE |go clean and exit
178
179commonE3:
180 tstb UFLG_TMP(%a6) |test flag for unsup/unimp state
181 bnes unsE3
182uniE3:
183 bsetb #E3,E_BYTE(%a6) |set E3 flag
184 bclrb #E1,E_BYTE(%a6) |clr E1 from unimp
185 bra commonE
186
187unsE3:
188 tstb RES_FLG(%a6)
189 bnes unsE3_0
190unsE3_1:
191 bsetb #E3,E_BYTE(%a6) |set E3 flag
192unsE3_0:
193 bclrb #E1,E_BYTE(%a6) |clr E1 flag
194 movel CMDREG1B(%a6),%d0
195 andl #0x03c30000,%d0 |work for cmd3b
196 bfextu CMDREG1B(%a6){#13:#1},%d1 |extract bit 2
197 lsll #5,%d1
198 swap %d1
199 orl %d1,%d0 |put it in the right place
200 bfextu CMDREG1B(%a6){#10:#3},%d1 |extract bit 3,4,5
201 lsll #2,%d1
202 swap %d1
203 orl %d1,%d0 |put them in the right place
204 movel %d0,CMDREG3B(%a6) |in the busy frame
205
206commonE:
207 bclrb #UFLAG,T_BYTE(%a6) |clr U flag from unimp
208 bra do_clean |go clean and exit
209|
210| No bits in the enable byte match existing exceptions. Check for
211| the case of the ovfl exc without the ovfl enabled, but with
212| inex2 enabled.
213|
214no_match:
215 btstb #inex2_bit,FPCR_ENABLE(%a6) |check for ovfl/inex2 case
216 beqs no_exc |if clear, exit
217 btstb #ovfl_bit,FPSR_EXCEPT(%a6) |now check ovfl
218 beqs no_exc |if clear, exit
219 bras ovfl_unfl |go to unfl_ovfl to determine if
220| ;it is an unsupp or unimp exc
221
222| No exceptions are to be reported. If the instruction was
223| unimplemented, no FPU restore is necessary. If it was
224| unsupported, we must perform the restore.
225no_exc:
226 tstb UFLG_TMP(%a6) |test flag for unsupp/unimp state
227 beqs uni_no_exc
228uns_no_exc:
229 tstb RES_FLG(%a6) |check if frestore is needed
230 bne do_clean |if clear, no frestore needed
231uni_no_exc:
232 moveml USER_DA(%a6),%d0-%d1/%a0-%a1
233 fmovemx USER_FP0(%a6),%fp0-%fp3
234 fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
235 unlk %a6
236 bra finish_up
237|
238| Unsupported Data Type Handler:
239| Ovfl:
240| An fmoveout that results in an overflow is reported this way.
241| Unfl:
242| An fmoveout that results in an underflow is reported this way.
243|
244| Unimplemented Instruction Handler:
245| Ovfl:
246| Only scosh, setox, ssinh, stwotox, and scale can set overflow in
247| this manner.
248| Unfl:
249| Stwotox, setox, and scale can set underflow in this manner.
250| Any of the other Library Routines such that f(x)=x in which
251| x is an extended denorm can report an underflow exception.
252| It is the responsibility of the exception-causing exception
253| to make sure that WBTEMP is correct.
254|
255| The exceptional operand is in FP_SCR1.
256|
257ovfl_unfl:
258 tstb UFLG_TMP(%a6) |test flag for unsupp/unimp state
259 beqs ofuf_con
260|
261| The caller was from an unsupported data type trap. Test if the
262| caller set CU_ONLY. If so, the exceptional operand is expected in
263| FPTEMP, rather than WBTEMP.
264|
265 tstb CU_ONLY(%a6) |test if inst is cu-only
266 beq unsE3
267| move.w #$fe,CU_SAVEPC(%a6)
268 clrb CU_SAVEPC(%a6)
269 bsetb #E1,E_BYTE(%a6) |set E1 exception flag
270 movew ETEMP_EX(%a6),FPTEMP_EX(%a6)
271 movel ETEMP_HI(%a6),FPTEMP_HI(%a6)
272 movel ETEMP_LO(%a6),FPTEMP_LO(%a6)
273 bsetb #fptemp15_bit,DTAG(%a6) |set fpte15
274 bclrb #UFLAG,T_BYTE(%a6) |clr U flag from unimp
275 bra do_clean |go clean and exit
276
277ofuf_con:
278 moveb (%a7),VER_TMP(%a6) |save version number
279 cmpib #BUSY_SIZE-4,1(%a7) |check for busy frame
280 beqs busy_fr |if unimp, grow to busy
281 cmpib #VER_40,(%a7) |test for orig unimp frame
282 bnes try_41 |if not, test for rev frame
283 moveql #13,%d0 |need to zero 14 lwords
284 bras ofuf_fin
285try_41:
286 cmpib #VER_41,(%a7) |test for rev unimp frame
287 bnel fpsp_fmt_error |if neither, exit with error
288 moveql #11,%d0 |need to zero 12 lwords
289
290ofuf_fin:
291 clrl (%a7)
292loop1:
293 clrl -(%a7) |clear and dec a7
294 dbra %d0,loop1
295 moveb VER_TMP(%a6),(%a7)
296 moveb #BUSY_SIZE-4,1(%a7) |write busy fmt word.
297busy_fr:
298 movel FP_SCR1(%a6),WBTEMP_EX(%a6) |write
299 movel FP_SCR1+4(%a6),WBTEMP_HI(%a6) |exceptional op to
300 movel FP_SCR1+8(%a6),WBTEMP_LO(%a6) |wbtemp
301 bsetb #E3,E_BYTE(%a6) |set E3 flag
302 bclrb #E1,E_BYTE(%a6) |make sure E1 is clear
303 bclrb #UFLAG,T_BYTE(%a6) |clr U flag
304 movel USER_FPSR(%a6),FPSR_SHADOW(%a6)
305 orl #sx_mask,E_BYTE(%a6)
306 movel CMDREG1B(%a6),%d0 |fix cmd1b to make it
307 andl #0x03c30000,%d0 |work for cmd3b
308 bfextu CMDREG1B(%a6){#13:#1},%d1 |extract bit 2
309 lsll #5,%d1
310 swap %d1
311 orl %d1,%d0 |put it in the right place
312 bfextu CMDREG1B(%a6){#10:#3},%d1 |extract bit 3,4,5
313 lsll #2,%d1
314 swap %d1
315 orl %d1,%d0 |put them in the right place
316 movel %d0,CMDREG3B(%a6) |in the busy frame
317
318|
319| Check if the frame to be restored is busy or unimp.
320|** NOTE *** Bug fix for errata (0d43b #3)
321| If the frame is unimp, we must create a busy frame to
322| fix the bug with the nmnexc bits in cases in which they
323| are set by a previous instruction and not cleared by
324| the save. The frame will be unimp only if the final
325| instruction in an emulation routine caused the exception
326| by doing an fmove <ea>,fp0. The exception operand, in
327| internal format, is in fptemp.
328|
329do_clean:
330 cmpib #UNIMP_40_SIZE-4,1(%a7)
331 bnes do_con
332 moveql #13,%d0 |in orig, need to zero 14 lwords
333 bras do_build
334do_con:
335 cmpib #UNIMP_41_SIZE-4,1(%a7)
336 bnes do_restore |frame must be busy
337 moveql #11,%d0 |in rev, need to zero 12 lwords
338
339do_build:
340 moveb (%a7),VER_TMP(%a6)
341 clrl (%a7)
342loop2:
343 clrl -(%a7) |clear and dec a7
344 dbra %d0,loop2
345|
346| Use a1 as pointer into new frame. a6 is not correct if an unimp or
347| busy frame was created as the result of an exception on the final
348| instruction of an emulation routine.
349|
350| We need to set the nmcexc bits if the exception is E1. Otherwise,
351| the exc taken will be inex2.
352|
353 leal BUSY_SIZE+LOCAL_SIZE(%a7),%a1 |init a1 for new frame
354 moveb VER_TMP(%a6),(%a7) |write busy fmt word
355 moveb #BUSY_SIZE-4,1(%a7)
356 movel FP_SCR1(%a6),WBTEMP_EX(%a1) |write
357 movel FP_SCR1+4(%a6),WBTEMP_HI(%a1) |exceptional op to
358 movel FP_SCR1+8(%a6),WBTEMP_LO(%a1) |wbtemp
359| btst.b #E1,E_BYTE(%a1)
360| beq.b do_restore
361 bfextu USER_FPSR(%a6){#17:#4},%d0 |get snan/operr/ovfl/unfl bits
362 bfins %d0,NMCEXC(%a1){#4:#4} |and insert them in nmcexc
363 movel USER_FPSR(%a6),FPSR_SHADOW(%a1) |set exc bits
364 orl #sx_mask,E_BYTE(%a1)
365
366do_restore:
367 moveml USER_DA(%a6),%d0-%d1/%a0-%a1
368 fmovemx USER_FP0(%a6),%fp0-%fp3
369 fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
370 frestore (%a7)+
371 tstb RES_FLG(%a6) |RES_FLG indicates a "continuation" frame
372 beq cont
373 bsr bug1384
374cont:
375 unlk %a6
376|
377| If trace mode enabled, then go to trace handler. This handler
378| cannot have any fp instructions. If there are fp inst's and an
379| exception has been restored into the machine then the exception
380| will occur upon execution of the fp inst. This is not desirable
381| in the kernel (supervisor mode). See MC68040 manual Section 9.3.8.
382|
383finish_up:
384 btstb #7,(%a7) |test T1 in SR
385 bnes g_trace
386 btstb #6,(%a7) |test T0 in SR
387 bnes g_trace
388 bral fpsp_done
389|
390| Change integer stack to look like trace stack
391| The address of the instruction that caused the
392| exception is already in the integer stack (is
393| the same as the saved friar)
394|
395| If the current frame is already a 6-word stack then all
396| that needs to be done is to change the vector# to TRACE.
397| If the frame is only a 4-word stack (meaning we got here
398| on an Unsupported data type exception), then we need to grow
399| the stack an extra 2 words and get the FPIAR from the FPU.
400|
401g_trace:
402 bftst EXC_VEC-4(%sp){#0:#4}
403 bne g_easy
404
405 subw #4,%sp | make room
406 movel 4(%sp),(%sp)
407 movel 8(%sp),4(%sp)
408 subw #BUSY_SIZE,%sp
409 fsave (%sp)
410 fmovel %fpiar,BUSY_SIZE+EXC_EA-4(%sp)
411 frestore (%sp)
412 addw #BUSY_SIZE,%sp
413
414g_easy:
415 movew #TRACE_VEC,EXC_VEC-4(%a7)
416 bral real_trace
417|
418| This is a work-around for hardware bug 1384.
419|
420bug1384:
421 link %a5,#0
422 fsave -(%sp)
423 cmpib #0x41,(%sp) | check for correct frame
424 beq frame_41
425 bgt nofix | if more advanced mask, do nada
426
427frame_40:
428 tstb 1(%sp) | check to see if idle
429 bne notidle
430idle40:
431 clrl (%sp) | get rid of old fsave frame
432 movel %d1,USER_D1(%a6) | save d1
433 movew #8,%d1 | place unimp frame instead
434loop40: clrl -(%sp)
435 dbra %d1,loop40
436 movel USER_D1(%a6),%d1 | restore d1
437 movel #0x40280000,-(%sp)
438 frestore (%sp)+
439 unlk %a5
440 rts
441
442frame_41:
443 tstb 1(%sp) | check to see if idle
444 bne notidle
445idle41:
446 clrl (%sp) | get rid of old fsave frame
447 movel %d1,USER_D1(%a6) | save d1
448 movew #10,%d1 | place unimp frame instead
449loop41: clrl -(%sp)
450 dbra %d1,loop41
451 movel USER_D1(%a6),%d1 | restore d1
452 movel #0x41300000,-(%sp)
453 frestore (%sp)+
454 unlk %a5
455 rts
456
457notidle:
458 bclrb #etemp15_bit,-40(%a5)
459 frestore (%sp)+
460 unlk %a5
461 rts
462
463nofix:
464 frestore (%sp)+
465 unlk %a5
466 rts
467
468 |end
diff --git a/arch/m68k/fpsp040/get_op.S b/arch/m68k/fpsp040/get_op.S
new file mode 100644
index 000000000000..c7c2f3727425
--- /dev/null
+++ b/arch/m68k/fpsp040/get_op.S
@@ -0,0 +1,676 @@
1|
2| get_op.sa 3.6 5/19/92
3|
4| get_op.sa 3.5 4/26/91
5|
6| Description: This routine is called by the unsupported format/data
7| type exception handler ('unsupp' - vector 55) and the unimplemented
8| instruction exception handler ('unimp' - vector 11). 'get_op'
9| determines the opclass (0, 2, or 3) and branches to the
10| opclass handler routine. See 68881/2 User's Manual table 4-11
11| for a description of the opclasses.
12|
13| For UNSUPPORTED data/format (exception vector 55) and for
14| UNIMPLEMENTED instructions (exception vector 11) the following
15| applies:
16|
17| - For unnormalized numbers (opclass 0, 2, or 3) the
18| number(s) is normalized and the operand type tag is updated.
19|
20| - For a packed number (opclass 2) the number is unpacked and the
21| operand type tag is updated.
22|
23| - For denormalized numbers (opclass 0 or 2) the number(s) is not
24| changed but passed to the next module. The next module for
25| unimp is do_func, the next module for unsupp is res_func.
26|
27| For UNSUPPORTED data/format (exception vector 55) only the
28| following applies:
29|
30| - If there is a move out with a packed number (opclass 3) the
31| number is packed and written to user memory. For the other
32| opclasses the number(s) are written back to the fsave stack
33| and the instruction is then restored back into the '040. The
34| '040 is then able to complete the instruction.
35|
36| For example:
37| fadd.x fpm,fpn where the fpm contains an unnormalized number.
38| The '040 takes an unsupported data trap and gets to this
39| routine. The number is normalized, put back on the stack and
40| then an frestore is done to restore the instruction back into
41| the '040. The '040 then re-executes the fadd.x fpm,fpn with
42| a normalized number in the source and the instruction is
43| successful.
44|
45| Next consider if in the process of normalizing the un-
46| normalized number it becomes a denormalized number. The
47| routine which converts the unnorm to a norm (called mk_norm)
48| detects this and tags the number as a denorm. The routine
49| res_func sees the denorm tag and converts the denorm to a
50| norm. The instruction is then restored back into the '040
51| which re_executes the instruction.
52|
53|
54| Copyright (C) Motorola, Inc. 1990
55| All Rights Reserved
56|
57| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
58| The copyright notice above does not evidence any
59| actual or intended publication of such source code.
60
61GET_OP: |idnt 2,1 | Motorola 040 Floating Point Software Package
62
63 |section 8
64
65#include "fpsp.h"
66
67 .global PIRN,PIRZRM,PIRP
68 .global SMALRN,SMALRZRM,SMALRP
69 .global BIGRN,BIGRZRM,BIGRP
70
71PIRN:
72 .long 0x40000000,0xc90fdaa2,0x2168c235 |pi
73PIRZRM:
74 .long 0x40000000,0xc90fdaa2,0x2168c234 |pi
75PIRP:
76 .long 0x40000000,0xc90fdaa2,0x2168c235 |pi
77
78|round to nearest
79SMALRN:
80 .long 0x3ffd0000,0x9a209a84,0xfbcff798 |log10(2)
81 .long 0x40000000,0xadf85458,0xa2bb4a9a |e
82 .long 0x3fff0000,0xb8aa3b29,0x5c17f0bc |log2(e)
83 .long 0x3ffd0000,0xde5bd8a9,0x37287195 |log10(e)
84 .long 0x00000000,0x00000000,0x00000000 |0.0
85| round to zero;round to negative infinity
86SMALRZRM:
87 .long 0x3ffd0000,0x9a209a84,0xfbcff798 |log10(2)
88 .long 0x40000000,0xadf85458,0xa2bb4a9a |e
89 .long 0x3fff0000,0xb8aa3b29,0x5c17f0bb |log2(e)
90 .long 0x3ffd0000,0xde5bd8a9,0x37287195 |log10(e)
91 .long 0x00000000,0x00000000,0x00000000 |0.0
92| round to positive infinity
93SMALRP:
94 .long 0x3ffd0000,0x9a209a84,0xfbcff799 |log10(2)
95 .long 0x40000000,0xadf85458,0xa2bb4a9b |e
96 .long 0x3fff0000,0xb8aa3b29,0x5c17f0bc |log2(e)
97 .long 0x3ffd0000,0xde5bd8a9,0x37287195 |log10(e)
98 .long 0x00000000,0x00000000,0x00000000 |0.0
99
100|round to nearest
101BIGRN:
102 .long 0x3ffe0000,0xb17217f7,0xd1cf79ac |ln(2)
103 .long 0x40000000,0x935d8ddd,0xaaa8ac17 |ln(10)
104 .long 0x3fff0000,0x80000000,0x00000000 |10 ^ 0
105
106 .global PTENRN
107PTENRN:
108 .long 0x40020000,0xA0000000,0x00000000 |10 ^ 1
109 .long 0x40050000,0xC8000000,0x00000000 |10 ^ 2
110 .long 0x400C0000,0x9C400000,0x00000000 |10 ^ 4
111 .long 0x40190000,0xBEBC2000,0x00000000 |10 ^ 8
112 .long 0x40340000,0x8E1BC9BF,0x04000000 |10 ^ 16
113 .long 0x40690000,0x9DC5ADA8,0x2B70B59E |10 ^ 32
114 .long 0x40D30000,0xC2781F49,0xFFCFA6D5 |10 ^ 64
115 .long 0x41A80000,0x93BA47C9,0x80E98CE0 |10 ^ 128
116 .long 0x43510000,0xAA7EEBFB,0x9DF9DE8E |10 ^ 256
117 .long 0x46A30000,0xE319A0AE,0xA60E91C7 |10 ^ 512
118 .long 0x4D480000,0xC9767586,0x81750C17 |10 ^ 1024
119 .long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 |10 ^ 2048
120 .long 0x75250000,0xC4605202,0x8A20979B |10 ^ 4096
121|round to minus infinity
122BIGRZRM:
123 .long 0x3ffe0000,0xb17217f7,0xd1cf79ab |ln(2)
124 .long 0x40000000,0x935d8ddd,0xaaa8ac16 |ln(10)
125 .long 0x3fff0000,0x80000000,0x00000000 |10 ^ 0
126
127 .global PTENRM
128PTENRM:
129 .long 0x40020000,0xA0000000,0x00000000 |10 ^ 1
130 .long 0x40050000,0xC8000000,0x00000000 |10 ^ 2
131 .long 0x400C0000,0x9C400000,0x00000000 |10 ^ 4
132 .long 0x40190000,0xBEBC2000,0x00000000 |10 ^ 8
133 .long 0x40340000,0x8E1BC9BF,0x04000000 |10 ^ 16
134 .long 0x40690000,0x9DC5ADA8,0x2B70B59D |10 ^ 32
135 .long 0x40D30000,0xC2781F49,0xFFCFA6D5 |10 ^ 64
136 .long 0x41A80000,0x93BA47C9,0x80E98CDF |10 ^ 128
137 .long 0x43510000,0xAA7EEBFB,0x9DF9DE8D |10 ^ 256
138 .long 0x46A30000,0xE319A0AE,0xA60E91C6 |10 ^ 512
139 .long 0x4D480000,0xC9767586,0x81750C17 |10 ^ 1024
140 .long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 |10 ^ 2048
141 .long 0x75250000,0xC4605202,0x8A20979A |10 ^ 4096
142|round to positive infinity
143BIGRP:
144 .long 0x3ffe0000,0xb17217f7,0xd1cf79ac |ln(2)
145 .long 0x40000000,0x935d8ddd,0xaaa8ac17 |ln(10)
146 .long 0x3fff0000,0x80000000,0x00000000 |10 ^ 0
147
148 .global PTENRP
149PTENRP:
150 .long 0x40020000,0xA0000000,0x00000000 |10 ^ 1
151 .long 0x40050000,0xC8000000,0x00000000 |10 ^ 2
152 .long 0x400C0000,0x9C400000,0x00000000 |10 ^ 4
153 .long 0x40190000,0xBEBC2000,0x00000000 |10 ^ 8
154 .long 0x40340000,0x8E1BC9BF,0x04000000 |10 ^ 16
155 .long 0x40690000,0x9DC5ADA8,0x2B70B59E |10 ^ 32
156 .long 0x40D30000,0xC2781F49,0xFFCFA6D6 |10 ^ 64
157 .long 0x41A80000,0x93BA47C9,0x80E98CE0 |10 ^ 128
158 .long 0x43510000,0xAA7EEBFB,0x9DF9DE8E |10 ^ 256
159 .long 0x46A30000,0xE319A0AE,0xA60E91C7 |10 ^ 512
160 .long 0x4D480000,0xC9767586,0x81750C18 |10 ^ 1024
161 .long 0x5A920000,0x9E8B3B5D,0xC53D5DE6 |10 ^ 2048
162 .long 0x75250000,0xC4605202,0x8A20979B |10 ^ 4096
163
164 |xref nrm_zero
165 |xref decbin
166 |xref round
167
168 .global get_op
169 .global uns_getop
170 .global uni_getop
171get_op:
172 clrb DY_MO_FLG(%a6)
173 tstb UFLG_TMP(%a6) |test flag for unsupp/unimp state
174 beq uni_getop
175
176uns_getop:
177 btstb #direction_bit,CMDREG1B(%a6)
178 bne opclass3 |branch if a fmove out (any kind)
179 btstb #6,CMDREG1B(%a6)
180 beqs uns_notpacked
181
182 bfextu CMDREG1B(%a6){#3:#3},%d0
183 cmpb #3,%d0
184 beq pack_source |check for a packed src op, branch if so
185uns_notpacked:
186 bsr chk_dy_mo |set the dyadic/monadic flag
187 tstb DY_MO_FLG(%a6)
188 beqs src_op_ck |if monadic, go check src op
189| ;else, check dst op (fall through)
190
191 btstb #7,DTAG(%a6)
192 beqs src_op_ck |if dst op is norm, check src op
193 bras dst_ex_dnrm |else, handle destination unnorm/dnrm
194
195uni_getop:
196 bfextu CMDREG1B(%a6){#0:#6},%d0 |get opclass and src fields
197 cmpil #0x17,%d0 |if op class and size fields are $17,
198| ;it is FMOVECR; if not, continue
199|
200| If the instruction is fmovecr, exit get_op. It is handled
201| in do_func and smovecr.sa.
202|
203 bne not_fmovecr |handle fmovecr as an unimplemented inst
204 rts
205
206not_fmovecr:
207 btstb #E1,E_BYTE(%a6) |if set, there is a packed operand
208 bne pack_source |check for packed src op, branch if so
209
210| The following lines of are coded to optimize on normalized operands
211 moveb STAG(%a6),%d0
212 orb DTAG(%a6),%d0 |check if either of STAG/DTAG msb set
213 bmis dest_op_ck |if so, some op needs to be fixed
214 rts
215
216dest_op_ck:
217 btstb #7,DTAG(%a6) |check for unsupported data types in
218 beqs src_op_ck |the destination, if not, check src op
219 bsr chk_dy_mo |set dyadic/monadic flag
220 tstb DY_MO_FLG(%a6) |
221 beqs src_op_ck |if monadic, check src op
222|
223| At this point, destination has an extended denorm or unnorm.
224|
225dst_ex_dnrm:
226 movew FPTEMP_EX(%a6),%d0 |get destination exponent
227 andiw #0x7fff,%d0 |mask sign, check if exp = 0000
228 beqs src_op_ck |if denorm then check source op.
229| ;denorms are taken care of in res_func
230| ;(unsupp) or do_func (unimp)
231| ;else unnorm fall through
232 leal FPTEMP(%a6),%a0 |point a0 to dop - used in mk_norm
233 bsr mk_norm |go normalize - mk_norm returns:
234| ;L_SCR1{7:5} = operand tag
235| ; (000 = norm, 100 = denorm)
236| ;L_SCR1{4} = fpte15 or ete15
237| ; 0 = exp > $3fff
238| ; 1 = exp <= $3fff
239| ;and puts the normalized num back
240| ;on the fsave stack
241|
242 moveb L_SCR1(%a6),DTAG(%a6) |write the new tag & fpte15
243| ;to the fsave stack and fall
244| ;through to check source operand
245|
246src_op_ck:
247 btstb #7,STAG(%a6)
248 beq end_getop |check for unsupported data types on the
249| ;source operand
250 btstb #5,STAG(%a6)
251 bnes src_sd_dnrm |if bit 5 set, handle sgl/dbl denorms
252|
253| At this point only unnorms or extended denorms are possible.
254|
255src_ex_dnrm:
256 movew ETEMP_EX(%a6),%d0 |get source exponent
257 andiw #0x7fff,%d0 |mask sign, check if exp = 0000
258 beq end_getop |if denorm then exit, denorms are
259| ;handled in do_func
260 leal ETEMP(%a6),%a0 |point a0 to sop - used in mk_norm
261 bsr mk_norm |go normalize - mk_norm returns:
262| ;L_SCR1{7:5} = operand tag
263| ; (000 = norm, 100 = denorm)
264| ;L_SCR1{4} = fpte15 or ete15
265| ; 0 = exp > $3fff
266| ; 1 = exp <= $3fff
267| ;and puts the normalized num back
268| ;on the fsave stack
269|
270 moveb L_SCR1(%a6),STAG(%a6) |write the new tag & ete15
271 rts |end_getop
272
273|
274| At this point, only single or double denorms are possible.
275| If the inst is not fmove, normalize the source. If it is,
276| do nothing to the input.
277|
278src_sd_dnrm:
279 btstb #4,CMDREG1B(%a6) |differentiate between sgl/dbl denorm
280 bnes is_double
281is_single:
282 movew #0x3f81,%d1 |write bias for sgl denorm
283 bras common |goto the common code
284is_double:
285 movew #0x3c01,%d1 |write the bias for a dbl denorm
286common:
287 btstb #sign_bit,ETEMP_EX(%a6) |grab sign bit of mantissa
288 beqs pos
289 bset #15,%d1 |set sign bit because it is negative
290pos:
291 movew %d1,ETEMP_EX(%a6)
292| ;put exponent on stack
293
294 movew CMDREG1B(%a6),%d1
295 andw #0xe3ff,%d1 |clear out source specifier
296 orw #0x0800,%d1 |set source specifier to extended prec
297 movew %d1,CMDREG1B(%a6) |write back to the command word in stack
298| ;this is needed to fix unsupp data stack
299 leal ETEMP(%a6),%a0 |point a0 to sop
300
301 bsr mk_norm |convert sgl/dbl denorm to norm
302 moveb L_SCR1(%a6),STAG(%a6) |put tag into source tag reg - d0
303 rts |end_getop
304|
305| At this point, the source is definitely packed, whether
306| instruction is dyadic or monadic is still unknown
307|
308pack_source:
309 movel FPTEMP_LO(%a6),ETEMP(%a6) |write ms part of packed
310| ;number to etemp slot
311 bsr chk_dy_mo |set dyadic/monadic flag
312 bsr unpack
313
314 tstb DY_MO_FLG(%a6)
315 beqs end_getop |if monadic, exit
316| ;else, fix FPTEMP
317pack_dya:
318 bfextu CMDREG1B(%a6){#6:#3},%d0 |extract dest fp reg
319 movel #7,%d1
320 subl %d0,%d1
321 clrl %d0
322 bsetl %d1,%d0 |set up d0 as a dynamic register mask
323 fmovemx %d0,FPTEMP(%a6) |write to FPTEMP
324
325 btstb #7,DTAG(%a6) |check dest tag for unnorm or denorm
326 bne dst_ex_dnrm |else, handle the unnorm or ext denorm
327|
328| Dest is not denormalized. Check for norm, and set fpte15
329| accordingly.
330|
331 moveb DTAG(%a6),%d0
332 andib #0xf0,%d0 |strip to only dtag:fpte15
333 tstb %d0 |check for normalized value
334 bnes end_getop |if inf/nan/zero leave get_op
335 movew FPTEMP_EX(%a6),%d0
336 andiw #0x7fff,%d0
337 cmpiw #0x3fff,%d0 |check if fpte15 needs setting
338 bges end_getop |if >= $3fff, leave fpte15=0
339 orb #0x10,DTAG(%a6)
340 bras end_getop
341
342|
343| At this point, it is either an fmoveout packed, unnorm or denorm
344|
345opclass3:
346 clrb DY_MO_FLG(%a6) |set dyadic/monadic flag to monadic
347 bfextu CMDREG1B(%a6){#4:#2},%d0
348 cmpib #3,%d0
349 bne src_ex_dnrm |if not equal, must be unnorm or denorm
350| ;else it is a packed move out
351| ;exit
352end_getop:
353 rts
354
355|
356| Sets the DY_MO_FLG correctly. This is used only on if it is an
357| unsupported data type exception. Set if dyadic.
358|
359chk_dy_mo:
360 movew CMDREG1B(%a6),%d0
361 btstl #5,%d0 |testing extension command word
362 beqs set_mon |if bit 5 = 0 then monadic
363 btstl #4,%d0 |know that bit 5 = 1
364 beqs set_dya |if bit 4 = 0 then dyadic
365 andiw #0x007f,%d0 |get rid of all but extension bits {6:0}
366 cmpiw #0x0038,%d0 |if extension = $38 then fcmp (dyadic)
367 bnes set_mon
368set_dya:
369 st DY_MO_FLG(%a6) |set the inst flag type to dyadic
370 rts
371set_mon:
372 clrb DY_MO_FLG(%a6) |set the inst flag type to monadic
373 rts
374|
375| MK_NORM
376|
377| Normalizes unnormalized numbers, sets tag to norm or denorm, sets unfl
378| exception if denorm.
379|
380| CASE opclass 0x0 unsupp
381| mk_norm till msb set
382| set tag = norm
383|
384| CASE opclass 0x0 unimp
385| mk_norm till msb set or exp = 0
386| if integer bit = 0
387| tag = denorm
388| else
389| tag = norm
390|
391| CASE opclass 011 unsupp
392| mk_norm till msb set or exp = 0
393| if integer bit = 0
394| tag = denorm
395| set unfl_nmcexe = 1
396| else
397| tag = norm
398|
399| if exp <= $3fff
400| set ete15 or fpte15 = 1
401| else set ete15 or fpte15 = 0
402
403| input:
404| a0 = points to operand to be normalized
405| output:
406| L_SCR1{7:5} = operand tag (000 = norm, 100 = denorm)
407| L_SCR1{4} = fpte15 or ete15 (0 = exp > $3fff, 1 = exp <=$3fff)
408| the normalized operand is placed back on the fsave stack
409mk_norm:
410 clrl L_SCR1(%a6)
411 bclrb #sign_bit,LOCAL_EX(%a0)
412 sne LOCAL_SGN(%a0) |transform into internal extended format
413
414 cmpib #0x2c,1+EXC_VEC(%a6) |check if unimp
415 bnes uns_data |branch if unsupp
416 bsr uni_inst |call if unimp (opclass 0x0)
417 bras reload
418uns_data:
419 btstb #direction_bit,CMDREG1B(%a6) |check transfer direction
420 bnes bit_set |branch if set (opclass 011)
421 bsr uns_opx |call if opclass 0x0
422 bras reload
423bit_set:
424 bsr uns_op3 |opclass 011
425reload:
426 cmpw #0x3fff,LOCAL_EX(%a0) |if exp > $3fff
427 bgts end_mk | fpte15/ete15 already set to 0
428 bsetb #4,L_SCR1(%a6) |else set fpte15/ete15 to 1
429| ;calling routine actually sets the
430| ;value on the stack (along with the
431| ;tag), since this routine doesn't
432| ;know if it should set ete15 or fpte15
433| ;ie, it doesn't know if this is the
434| ;src op or dest op.
435end_mk:
436 bfclr LOCAL_SGN(%a0){#0:#8}
437 beqs end_mk_pos
438 bsetb #sign_bit,LOCAL_EX(%a0) |convert back to IEEE format
439end_mk_pos:
440 rts
441|
442| CASE opclass 011 unsupp
443|
444uns_op3:
445 bsr nrm_zero |normalize till msb = 1 or exp = zero
446 btstb #7,LOCAL_HI(%a0) |if msb = 1
447 bnes no_unfl |then branch
448set_unfl:
449 orw #dnrm_tag,L_SCR1(%a6) |set denorm tag
450 bsetb #unfl_bit,FPSR_EXCEPT(%a6) |set unfl exception bit
451no_unfl:
452 rts
453|
454| CASE opclass 0x0 unsupp
455|
456uns_opx:
457 bsr nrm_zero |normalize the number
458 btstb #7,LOCAL_HI(%a0) |check if integer bit (j-bit) is set
459 beqs uns_den |if clear then now have a denorm
460uns_nrm:
461 orb #norm_tag,L_SCR1(%a6) |set tag to norm
462 rts
463uns_den:
464 orb #dnrm_tag,L_SCR1(%a6) |set tag to denorm
465 rts
466|
467| CASE opclass 0x0 unimp
468|
469uni_inst:
470 bsr nrm_zero
471 btstb #7,LOCAL_HI(%a0) |check if integer bit (j-bit) is set
472 beqs uni_den |if clear then now have a denorm
473uni_nrm:
474 orb #norm_tag,L_SCR1(%a6) |set tag to norm
475 rts
476uni_den:
477 orb #dnrm_tag,L_SCR1(%a6) |set tag to denorm
478 rts
479
480|
481| Decimal to binary conversion
482|
483| Special cases of inf and NaNs are completed outside of decbin.
484| If the input is an snan, the snan bit is not set.
485|
486| input:
487| ETEMP(a6) - points to packed decimal string in memory
488| output:
489| fp0 - contains packed string converted to extended precision
490| ETEMP - same as fp0
491unpack:
492 movew CMDREG1B(%a6),%d0 |examine command word, looking for fmove's
493 andw #0x3b,%d0
494 beq move_unpack |special handling for fmove: must set FPSR_CC
495
496 movew ETEMP(%a6),%d0 |get word with inf information
497 bfextu %d0{#20:#12},%d1 |get exponent into d1
498 cmpiw #0x0fff,%d1 |test for inf or NaN
499 bnes try_zero |if not equal, it is not special
500 bfextu %d0{#17:#3},%d1 |get SE and y bits into d1
501 cmpiw #7,%d1 |SE and y bits must be on for special
502 bnes try_zero |if not on, it is not special
503|input is of the special cases of inf and NaN
504 tstl ETEMP_HI(%a6) |check ms mantissa
505 bnes fix_nan |if non-zero, it is a NaN
506 tstl ETEMP_LO(%a6) |check ls mantissa
507 bnes fix_nan |if non-zero, it is a NaN
508 bra finish |special already on stack
509fix_nan:
510 btstb #signan_bit,ETEMP_HI(%a6) |test for snan
511 bne finish
512 orl #snaniop_mask,USER_FPSR(%a6) |always set snan if it is so
513 bra finish
514try_zero:
515 movew ETEMP_EX+2(%a6),%d0 |get word 4
516 andiw #0x000f,%d0 |clear all but last ni(y)bble
517 tstw %d0 |check for zero.
518 bne not_spec
519 tstl ETEMP_HI(%a6) |check words 3 and 2
520 bne not_spec
521 tstl ETEMP_LO(%a6) |check words 1 and 0
522 bne not_spec
523 tstl ETEMP(%a6) |test sign of the zero
524 bges pos_zero
525 movel #0x80000000,ETEMP(%a6) |write neg zero to etemp
526 clrl ETEMP_HI(%a6)
527 clrl ETEMP_LO(%a6)
528 bra finish
529pos_zero:
530 clrl ETEMP(%a6)
531 clrl ETEMP_HI(%a6)
532 clrl ETEMP_LO(%a6)
533 bra finish
534
535not_spec:
536 fmovemx %fp0-%fp1,-(%a7) |save fp0 - decbin returns in it
537 bsr decbin
538 fmovex %fp0,ETEMP(%a6) |put the unpacked sop in the fsave stack
539 fmovemx (%a7)+,%fp0-%fp1
540 fmovel #0,%FPSR |clr fpsr from decbin
541 bra finish
542
543|
544| Special handling for packed move in: Same results as all other
545| packed cases, but we must set the FPSR condition codes properly.
546|
547move_unpack:
548 movew ETEMP(%a6),%d0 |get word with inf information
549 bfextu %d0{#20:#12},%d1 |get exponent into d1
550 cmpiw #0x0fff,%d1 |test for inf or NaN
551 bnes mtry_zero |if not equal, it is not special
552 bfextu %d0{#17:#3},%d1 |get SE and y bits into d1
553 cmpiw #7,%d1 |SE and y bits must be on for special
554 bnes mtry_zero |if not on, it is not special
555|input is of the special cases of inf and NaN
556 tstl ETEMP_HI(%a6) |check ms mantissa
557 bnes mfix_nan |if non-zero, it is a NaN
558 tstl ETEMP_LO(%a6) |check ls mantissa
559 bnes mfix_nan |if non-zero, it is a NaN
560|input is inf
561 orl #inf_mask,USER_FPSR(%a6) |set I bit
562 tstl ETEMP(%a6) |check sign
563 bge finish
564 orl #neg_mask,USER_FPSR(%a6) |set N bit
565 bra finish |special already on stack
566mfix_nan:
567 orl #nan_mask,USER_FPSR(%a6) |set NaN bit
568 moveb #nan_tag,STAG(%a6) |set stag to NaN
569 btstb #signan_bit,ETEMP_HI(%a6) |test for snan
570 bnes mn_snan
571 orl #snaniop_mask,USER_FPSR(%a6) |set snan bit
572 btstb #snan_bit,FPCR_ENABLE(%a6) |test for snan enabled
573 bnes mn_snan
574 bsetb #signan_bit,ETEMP_HI(%a6) |force snans to qnans
575mn_snan:
576 tstl ETEMP(%a6) |check for sign
577 bge finish |if clr, go on
578 orl #neg_mask,USER_FPSR(%a6) |set N bit
579 bra finish
580
581mtry_zero:
582 movew ETEMP_EX+2(%a6),%d0 |get word 4
583 andiw #0x000f,%d0 |clear all but last ni(y)bble
584 tstw %d0 |check for zero.
585 bnes mnot_spec
586 tstl ETEMP_HI(%a6) |check words 3 and 2
587 bnes mnot_spec
588 tstl ETEMP_LO(%a6) |check words 1 and 0
589 bnes mnot_spec
590 tstl ETEMP(%a6) |test sign of the zero
591 bges mpos_zero
592 orl #neg_mask+z_mask,USER_FPSR(%a6) |set N and Z
593 movel #0x80000000,ETEMP(%a6) |write neg zero to etemp
594 clrl ETEMP_HI(%a6)
595 clrl ETEMP_LO(%a6)
596 bras finish
597mpos_zero:
598 orl #z_mask,USER_FPSR(%a6) |set Z
599 clrl ETEMP(%a6)
600 clrl ETEMP_HI(%a6)
601 clrl ETEMP_LO(%a6)
602 bras finish
603
604mnot_spec:
605 fmovemx %fp0-%fp1,-(%a7) |save fp0 ,fp1 - decbin returns in fp0
606 bsr decbin
607 fmovex %fp0,ETEMP(%a6)
608| ;put the unpacked sop in the fsave stack
609 fmovemx (%a7)+,%fp0-%fp1
610
611finish:
612 movew CMDREG1B(%a6),%d0 |get the command word
613 andw #0xfbff,%d0 |change the source specifier field to
614| ;extended (was packed).
615 movew %d0,CMDREG1B(%a6) |write command word back to fsave stack
616| ;we need to do this so the 040 will
617| ;re-execute the inst. without taking
618| ;another packed trap.
619
620fix_stag:
621|Converted result is now in etemp on fsave stack, now set the source
622|tag (stag)
623| if (ete =$7fff) then INF or NAN
624| if (etemp = $x.0----0) then
625| stag = INF
626| else
627| stag = NAN
628| else
629| if (ete = $0000) then
630| stag = ZERO
631| else
632| stag = NORM
633|
634| Note also that the etemp_15 bit (just right of the stag) must
635| be set accordingly.
636|
637 movew ETEMP_EX(%a6),%d1
638 andiw #0x7fff,%d1 |strip sign
639 cmpw #0x7fff,%d1
640 bnes z_or_nrm
641 movel ETEMP_HI(%a6),%d1
642 bnes is_nan
643 movel ETEMP_LO(%a6),%d1
644 bnes is_nan
645is_inf:
646 moveb #0x40,STAG(%a6)
647 movel #0x40,%d0
648 rts
649is_nan:
650 moveb #0x60,STAG(%a6)
651 movel #0x60,%d0
652 rts
653z_or_nrm:
654 tstw %d1
655 bnes is_nrm
656is_zro:
657| For a zero, set etemp_15
658 moveb #0x30,STAG(%a6)
659 movel #0x20,%d0
660 rts
661is_nrm:
662| For a norm, check if the exp <= $3fff; if so, set etemp_15
663 cmpiw #0x3fff,%d1
664 bles set_bit15
665 moveb #0,STAG(%a6)
666 bras end_is_nrm
667set_bit15:
668 moveb #0x10,STAG(%a6)
669end_is_nrm:
670 movel #0,%d0
671end_fix:
672 rts
673
674end_get:
675 rts
676 |end
diff --git a/arch/m68k/fpsp040/kernel_ex.S b/arch/m68k/fpsp040/kernel_ex.S
new file mode 100644
index 000000000000..476b711967ce
--- /dev/null
+++ b/arch/m68k/fpsp040/kernel_ex.S
@@ -0,0 +1,494 @@
1|
2| kernel_ex.sa 3.3 12/19/90
3|
4| This file contains routines to force exception status in the
5| fpu for exceptional cases detected or reported within the
6| transcendental functions. Typically, the t_xx routine will
7| set the appropriate bits in the USER_FPSR word on the stack.
8| The bits are tested in gen_except.sa to determine if an exceptional
9| situation needs to be created on return from the FPSP.
10|
11
12| Copyright (C) Motorola, Inc. 1990
13| All Rights Reserved
14|
15| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
16| The copyright notice above does not evidence any
17| actual or intended publication of such source code.
18
19KERNEL_EX: |idnt 2,1 | Motorola 040 Floating Point Software Package
20
21 |section 8
22
23#include "fpsp.h"
24
25mns_inf: .long 0xffff0000,0x00000000,0x00000000
26pls_inf: .long 0x7fff0000,0x00000000,0x00000000
27nan: .long 0x7fff0000,0xffffffff,0xffffffff
28huge: .long 0x7ffe0000,0xffffffff,0xffffffff
29
30 |xref ovf_r_k
31 |xref unf_sub
32 |xref nrm_set
33
34 .global t_dz
35 .global t_dz2
36 .global t_operr
37 .global t_unfl
38 .global t_ovfl
39 .global t_ovfl2
40 .global t_inx2
41 .global t_frcinx
42 .global t_extdnrm
43 .global t_resdnrm
44 .global dst_nan
45 .global src_nan
46|
47| DZ exception
48|
49|
50| if dz trap disabled
51| store properly signed inf (use sign of etemp) into fp0
52| set FPSR exception status dz bit, condition code
53| inf bit, and accrued dz bit
54| return
55| frestore the frame into the machine (done by unimp_hd)
56|
57| else dz trap enabled
58| set exception status bit & accrued bits in FPSR
59| set flag to disable sto_res from corrupting fp register
60| return
61| frestore the frame into the machine (done by unimp_hd)
62|
63| t_dz2 is used by monadic functions such as flogn (from do_func).
64| t_dz is used by monadic functions such as satanh (from the
65| transcendental function).
66|
67t_dz2:
68 bsetb #neg_bit,FPSR_CC(%a6) |set neg bit in FPSR
69 fmovel #0,%FPSR |clr status bits (Z set)
70 btstb #dz_bit,FPCR_ENABLE(%a6) |test FPCR for dz exc enabled
71 bnes dz_ena_end
72 bras m_inf |flogx always returns -inf
73t_dz:
74 fmovel #0,%FPSR |clr status bits (Z set)
75 btstb #dz_bit,FPCR_ENABLE(%a6) |test FPCR for dz exc enabled
76 bnes dz_ena
77|
78| dz disabled
79|
80 btstb #sign_bit,ETEMP_EX(%a6) |check sign for neg or pos
81 beqs p_inf |branch if pos sign
82
83m_inf:
84 fmovemx mns_inf,%fp0-%fp0 |load -inf
85 bsetb #neg_bit,FPSR_CC(%a6) |set neg bit in FPSR
86 bras set_fpsr
87p_inf:
88 fmovemx pls_inf,%fp0-%fp0 |load +inf
89set_fpsr:
90 orl #dzinf_mask,USER_FPSR(%a6) |set I,DZ,ADZ
91 rts
92|
93| dz enabled
94|
95dz_ena:
96 btstb #sign_bit,ETEMP_EX(%a6) |check sign for neg or pos
97 beqs dz_ena_end
98 bsetb #neg_bit,FPSR_CC(%a6) |set neg bit in FPSR
99dz_ena_end:
100 orl #dzinf_mask,USER_FPSR(%a6) |set I,DZ,ADZ
101 st STORE_FLG(%a6)
102 rts
103|
104| OPERR exception
105|
106| if (operr trap disabled)
107| set FPSR exception status operr bit, condition code
108| nan bit; Store default NAN into fp0
109| frestore the frame into the machine (done by unimp_hd)
110|
111| else (operr trap enabled)
112| set FPSR exception status operr bit, accrued operr bit
113| set flag to disable sto_res from corrupting fp register
114| frestore the frame into the machine (done by unimp_hd)
115|
116t_operr:
117 orl #opnan_mask,USER_FPSR(%a6) |set NaN, OPERR, AIOP
118
119 btstb #operr_bit,FPCR_ENABLE(%a6) |test FPCR for operr enabled
120 bnes op_ena
121
122 fmovemx nan,%fp0-%fp0 |load default nan
123 rts
124op_ena:
125 st STORE_FLG(%a6) |do not corrupt destination
126 rts
127
128|
129| t_unfl --- UNFL exception
130|
131| This entry point is used by all routines requiring unfl, inex2,
132| aunfl, and ainex to be set on exit.
133|
134| On entry, a0 points to the exceptional operand. The final exceptional
135| operand is built in FP_SCR1 and only the sign from the original operand
136| is used.
137|
138t_unfl:
139 clrl FP_SCR1(%a6) |set exceptional operand to zero
140 clrl FP_SCR1+4(%a6)
141 clrl FP_SCR1+8(%a6)
142 tstb (%a0) |extract sign from caller's exop
143 bpls unfl_signok
144 bset #sign_bit,FP_SCR1(%a6)
145unfl_signok:
146 leal FP_SCR1(%a6),%a0
147 orl #unfinx_mask,USER_FPSR(%a6)
148| ;set UNFL, INEX2, AUNFL, AINEX
149unfl_con:
150 btstb #unfl_bit,FPCR_ENABLE(%a6)
151 beqs unfl_dis
152
153unfl_ena:
154 bfclr STAG(%a6){#5:#3} |clear wbtm66,wbtm1,wbtm0
155 bsetb #wbtemp15_bit,WB_BYTE(%a6) |set wbtemp15
156 bsetb #sticky_bit,STICKY(%a6) |set sticky bit
157
158 bclrb #E1,E_BYTE(%a6)
159
160unfl_dis:
161 bfextu FPCR_MODE(%a6){#0:#2},%d0 |get round precision
162
163 bclrb #sign_bit,LOCAL_EX(%a0)
164 sne LOCAL_SGN(%a0) |convert to internal ext format
165
166 bsr unf_sub |returns IEEE result at a0
167| ;and sets FPSR_CC accordingly
168
169 bfclr LOCAL_SGN(%a0){#0:#8} |convert back to IEEE ext format
170 beqs unfl_fin
171
172 bsetb #sign_bit,LOCAL_EX(%a0)
173 bsetb #sign_bit,FP_SCR1(%a6) |set sign bit of exc operand
174
175unfl_fin:
176 fmovemx (%a0),%fp0-%fp0 |store result in fp0
177 rts
178
179
180|
181| t_ovfl2 --- OVFL exception (without inex2 returned)
182|
183| This entry is used by scale to force catastrophic overflow. The
184| ovfl, aovfl, and ainex bits are set, but not the inex2 bit.
185|
186t_ovfl2:
187 orl #ovfl_inx_mask,USER_FPSR(%a6)
188 movel ETEMP(%a6),FP_SCR1(%a6)
189 movel ETEMP_HI(%a6),FP_SCR1+4(%a6)
190 movel ETEMP_LO(%a6),FP_SCR1+8(%a6)
191|
192| Check for single or double round precision. If single, check if
193| the lower 40 bits of ETEMP are zero; if not, set inex2. If double,
194| check if the lower 21 bits are zero; if not, set inex2.
195|
196 moveb FPCR_MODE(%a6),%d0
197 andib #0xc0,%d0
198 beq t_work |if extended, finish ovfl processing
199 cmpib #0x40,%d0 |test for single
200 bnes t_dbl
201t_sgl:
202 tstb ETEMP_LO(%a6)
203 bnes t_setinx2
204 movel ETEMP_HI(%a6),%d0
205 andil #0xff,%d0 |look at only lower 8 bits
206 bnes t_setinx2
207 bra t_work
208t_dbl:
209 movel ETEMP_LO(%a6),%d0
210 andil #0x7ff,%d0 |look at only lower 11 bits
211 beq t_work
212t_setinx2:
213 orl #inex2_mask,USER_FPSR(%a6)
214 bras t_work
215|
216| t_ovfl --- OVFL exception
217|
218|** Note: the exc operand is returned in ETEMP.
219|
220t_ovfl:
221 orl #ovfinx_mask,USER_FPSR(%a6)
222t_work:
223 btstb #ovfl_bit,FPCR_ENABLE(%a6) |test FPCR for ovfl enabled
224 beqs ovf_dis
225
226ovf_ena:
227 clrl FP_SCR1(%a6) |set exceptional operand
228 clrl FP_SCR1+4(%a6)
229 clrl FP_SCR1+8(%a6)
230
231 bfclr STAG(%a6){#5:#3} |clear wbtm66,wbtm1,wbtm0
232 bclrb #wbtemp15_bit,WB_BYTE(%a6) |clear wbtemp15
233 bsetb #sticky_bit,STICKY(%a6) |set sticky bit
234
235 bclrb #E1,E_BYTE(%a6)
236| ;fall through to disabled case
237
238| For disabled overflow call 'ovf_r_k'. This routine loads the
239| correct result based on the rounding precision, destination
240| format, rounding mode and sign.
241|
242ovf_dis:
243 bsr ovf_r_k |returns unsigned ETEMP_EX
244| ;and sets FPSR_CC accordingly.
245 bfclr ETEMP_SGN(%a6){#0:#8} |fix sign
246 beqs ovf_pos
247 bsetb #sign_bit,ETEMP_EX(%a6)
248 bsetb #sign_bit,FP_SCR1(%a6) |set exceptional operand sign
249ovf_pos:
250 fmovemx ETEMP(%a6),%fp0-%fp0 |move the result to fp0
251 rts
252
253
254|
255| INEX2 exception
256|
257| The inex2 and ainex bits are set.
258|
259t_inx2:
260 orl #inx2a_mask,USER_FPSR(%a6) |set INEX2, AINEX
261 rts
262
263|
264| Force Inex2
265|
266| This routine is called by the transcendental routines to force
267| the inex2 exception bits set in the FPSR. If the underflow bit
268| is set, but the underflow trap was not taken, the aunfl bit in
269| the FPSR must be set.
270|
271t_frcinx:
272 orl #inx2a_mask,USER_FPSR(%a6) |set INEX2, AINEX
273 btstb #unfl_bit,FPSR_EXCEPT(%a6) |test for unfl bit set
274 beqs no_uacc1 |if clear, do not set aunfl
275 bsetb #aunfl_bit,FPSR_AEXCEPT(%a6)
276no_uacc1:
277 rts
278
279|
280| DST_NAN
281|
282| Determine if the destination nan is signalling or non-signalling,
283| and set the FPSR bits accordingly. See the MC68040 User's Manual
284| section 3.2.2.5 NOT-A-NUMBERS.
285|
286dst_nan:
287 btstb #sign_bit,FPTEMP_EX(%a6) |test sign of nan
288 beqs dst_pos |if clr, it was positive
289 bsetb #neg_bit,FPSR_CC(%a6) |set N bit
290dst_pos:
291 btstb #signan_bit,FPTEMP_HI(%a6) |check if signalling
292 beqs dst_snan |branch if signalling
293
294 fmovel %d1,%fpcr |restore user's rmode/prec
295 fmovex FPTEMP(%a6),%fp0 |return the non-signalling nan
296|
297| Check the source nan. If it is signalling, snan will be reported.
298|
299 moveb STAG(%a6),%d0
300 andib #0xe0,%d0
301 cmpib #0x60,%d0
302 bnes no_snan
303 btstb #signan_bit,ETEMP_HI(%a6) |check if signalling
304 bnes no_snan
305 orl #snaniop_mask,USER_FPSR(%a6) |set NAN, SNAN, AIOP
306no_snan:
307 rts
308
309dst_snan:
310 btstb #snan_bit,FPCR_ENABLE(%a6) |check if trap enabled
311 beqs dst_dis |branch if disabled
312
313 orb #nan_tag,DTAG(%a6) |set up dtag for nan
314 st STORE_FLG(%a6) |do not store a result
315 orl #snaniop_mask,USER_FPSR(%a6) |set NAN, SNAN, AIOP
316 rts
317
318dst_dis:
319 bsetb #signan_bit,FPTEMP_HI(%a6) |set SNAN bit in sop
320 fmovel %d1,%fpcr |restore user's rmode/prec
321 fmovex FPTEMP(%a6),%fp0 |load non-sign. nan
322 orl #snaniop_mask,USER_FPSR(%a6) |set NAN, SNAN, AIOP
323 rts
324
325|
326| SRC_NAN
327|
328| Determine if the source nan is signalling or non-signalling,
329| and set the FPSR bits accordingly. See the MC68040 User's Manual
330| section 3.2.2.5 NOT-A-NUMBERS.
331|
332src_nan:
333 btstb #sign_bit,ETEMP_EX(%a6) |test sign of nan
334 beqs src_pos |if clr, it was positive
335 bsetb #neg_bit,FPSR_CC(%a6) |set N bit
336src_pos:
337 btstb #signan_bit,ETEMP_HI(%a6) |check if signalling
338 beqs src_snan |branch if signalling
339 fmovel %d1,%fpcr |restore user's rmode/prec
340 fmovex ETEMP(%a6),%fp0 |return the non-signalling nan
341 rts
342
343src_snan:
344 btstb #snan_bit,FPCR_ENABLE(%a6) |check if trap enabled
345 beqs src_dis |branch if disabled
346 bsetb #signan_bit,ETEMP_HI(%a6) |set SNAN bit in sop
347 orb #norm_tag,DTAG(%a6) |set up dtag for norm
348 orb #nan_tag,STAG(%a6) |set up stag for nan
349 st STORE_FLG(%a6) |do not store a result
350 orl #snaniop_mask,USER_FPSR(%a6) |set NAN, SNAN, AIOP
351 rts
352
353src_dis:
354 bsetb #signan_bit,ETEMP_HI(%a6) |set SNAN bit in sop
355 fmovel %d1,%fpcr |restore user's rmode/prec
356 fmovex ETEMP(%a6),%fp0 |load non-sign. nan
357 orl #snaniop_mask,USER_FPSR(%a6) |set NAN, SNAN, AIOP
358 rts
359
360|
361| For all functions that have a denormalized input and that f(x)=x,
362| this is the entry point
363|
364t_extdnrm:
365 orl #unfinx_mask,USER_FPSR(%a6)
366| ;set UNFL, INEX2, AUNFL, AINEX
367 bras xdnrm_con
368|
369| Entry point for scale with extended denorm. The function does
370| not set inex2, aunfl, or ainex.
371|
372t_resdnrm:
373 orl #unfl_mask,USER_FPSR(%a6)
374
375xdnrm_con:
376 btstb #unfl_bit,FPCR_ENABLE(%a6)
377 beqs xdnrm_dis
378
379|
380| If exceptions are enabled, the additional task of setting up WBTEMP
381| is needed so that when the underflow exception handler is entered,
382| the user perceives no difference between what the 040 provides vs.
383| what the FPSP provides.
384|
385xdnrm_ena:
386 movel %a0,-(%a7)
387
388 movel LOCAL_EX(%a0),FP_SCR1(%a6)
389 movel LOCAL_HI(%a0),FP_SCR1+4(%a6)
390 movel LOCAL_LO(%a0),FP_SCR1+8(%a6)
391
392 lea FP_SCR1(%a6),%a0
393
394 bclrb #sign_bit,LOCAL_EX(%a0)
395 sne LOCAL_SGN(%a0) |convert to internal ext format
396 tstw LOCAL_EX(%a0) |check if input is denorm
397 beqs xdnrm_dn |if so, skip nrm_set
398 bsr nrm_set |normalize the result (exponent
399| ;will be negative
400xdnrm_dn:
401 bclrb #sign_bit,LOCAL_EX(%a0) |take off false sign
402 bfclr LOCAL_SGN(%a0){#0:#8} |change back to IEEE ext format
403 beqs xdep
404 bsetb #sign_bit,LOCAL_EX(%a0)
405xdep:
406 bfclr STAG(%a6){#5:#3} |clear wbtm66,wbtm1,wbtm0
407 bsetb #wbtemp15_bit,WB_BYTE(%a6) |set wbtemp15
408 bclrb #sticky_bit,STICKY(%a6) |clear sticky bit
409 bclrb #E1,E_BYTE(%a6)
410 movel (%a7)+,%a0
411xdnrm_dis:
412 bfextu FPCR_MODE(%a6){#0:#2},%d0 |get round precision
413 bnes not_ext |if not round extended, store
414| ;IEEE defaults
415is_ext:
416 btstb #sign_bit,LOCAL_EX(%a0)
417 beqs xdnrm_store
418
419 bsetb #neg_bit,FPSR_CC(%a6) |set N bit in FPSR_CC
420
421 bras xdnrm_store
422
423not_ext:
424 bclrb #sign_bit,LOCAL_EX(%a0)
425 sne LOCAL_SGN(%a0) |convert to internal ext format
426 bsr unf_sub |returns IEEE result pointed by
427| ;a0; sets FPSR_CC accordingly
428 bfclr LOCAL_SGN(%a0){#0:#8} |convert back to IEEE ext format
429 beqs xdnrm_store
430 bsetb #sign_bit,LOCAL_EX(%a0)
431xdnrm_store:
432 fmovemx (%a0),%fp0-%fp0 |store result in fp0
433 rts
434
435|
436| This subroutine is used for dyadic operations that use an extended
437| denorm within the kernel. The approach used is to capture the frame,
438| fix/restore.
439|
440 .global t_avoid_unsupp
441t_avoid_unsupp:
442 link %a2,#-LOCAL_SIZE |so that a2 fpsp.h negative
443| ;offsets may be used
444 fsave -(%a7)
445 tstb 1(%a7) |check if idle, exit if so
446 beq idle_end
447 btstb #E1,E_BYTE(%a2) |check for an E1 exception if
448| ;enabled, there is an unsupp
449 beq end_avun |else, exit
450 btstb #7,DTAG(%a2) |check for denorm destination
451 beqs src_den |else, must be a source denorm
452|
453| handle destination denorm
454|
455 lea FPTEMP(%a2),%a0
456 btstb #sign_bit,LOCAL_EX(%a0)
457 sne LOCAL_SGN(%a0) |convert to internal ext format
458 bclrb #7,DTAG(%a2) |set DTAG to norm
459 bsr nrm_set |normalize result, exponent
460| ;will become negative
461 bclrb #sign_bit,LOCAL_EX(%a0) |get rid of fake sign
462 bfclr LOCAL_SGN(%a0){#0:#8} |convert back to IEEE ext format
463 beqs ck_src_den |check if source is also denorm
464 bsetb #sign_bit,LOCAL_EX(%a0)
465ck_src_den:
466 btstb #7,STAG(%a2)
467 beqs end_avun
468src_den:
469 lea ETEMP(%a2),%a0
470 btstb #sign_bit,LOCAL_EX(%a0)
471 sne LOCAL_SGN(%a0) |convert to internal ext format
472 bclrb #7,STAG(%a2) |set STAG to norm
473 bsr nrm_set |normalize result, exponent
474| ;will become negative
475 bclrb #sign_bit,LOCAL_EX(%a0) |get rid of fake sign
476 bfclr LOCAL_SGN(%a0){#0:#8} |convert back to IEEE ext format
477 beqs den_com
478 bsetb #sign_bit,LOCAL_EX(%a0)
479den_com:
480 moveb #0xfe,CU_SAVEPC(%a2) |set continue frame
481 clrw NMNEXC(%a2) |clear NMNEXC
482 bclrb #E1,E_BYTE(%a2)
483| fmove.l %FPSR,FPSR_SHADOW(%a2)
484| bset.b #SFLAG,E_BYTE(%a2)
485| bset.b #XFLAG,T_BYTE(%a2)
486end_avun:
487 frestore (%a7)+
488 unlk %a2
489 rts
490idle_end:
491 addl #4,%a7
492 unlk %a2
493 rts
494 |end
diff --git a/arch/m68k/fpsp040/res_func.S b/arch/m68k/fpsp040/res_func.S
new file mode 100644
index 000000000000..8f6b95217865
--- /dev/null
+++ b/arch/m68k/fpsp040/res_func.S
@@ -0,0 +1,2040 @@
1|
2| res_func.sa 3.9 7/29/91
3|
4| Normalizes denormalized numbers if necessary and updates the
5| stack frame. The function is then restored back into the
6| machine and the 040 completes the operation. This routine
7| is only used by the unsupported data type/format handler.
8| (Exception vector 55).
9|
10| For packed move out (fmove.p fpm,<ea>) the operation is
11| completed here; data is packed and moved to user memory.
12| The stack is restored to the 040 only in the case of a
13| reportable exception in the conversion.
14|
15|
16| Copyright (C) Motorola, Inc. 1990
17| All Rights Reserved
18|
19| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
20| The copyright notice above does not evidence any
21| actual or intended publication of such source code.
22
23RES_FUNC: |idnt 2,1 | Motorola 040 Floating Point Software Package
24
25 |section 8
26
27#include "fpsp.h"
28
29sp_bnds: .short 0x3f81,0x407e
30 .short 0x3f6a,0x0000
31dp_bnds: .short 0x3c01,0x43fe
32 .short 0x3bcd,0x0000
33
34 |xref mem_write
35 |xref bindec
36 |xref get_fline
37 |xref round
38 |xref denorm
39 |xref dest_ext
40 |xref dest_dbl
41 |xref dest_sgl
42 |xref unf_sub
43 |xref nrm_set
44 |xref dnrm_lp
45 |xref ovf_res
46 |xref reg_dest
47 |xref t_ovfl
48 |xref t_unfl
49
50 .global res_func
51 .global p_move
52
53res_func:
54 clrb DNRM_FLG(%a6)
55 clrb RES_FLG(%a6)
56 clrb CU_ONLY(%a6)
57 tstb DY_MO_FLG(%a6)
58 beqs monadic
59dyadic:
60 btstb #7,DTAG(%a6) |if dop = norm=000, zero=001,
61| ;inf=010 or nan=011
62 beqs monadic |then branch
63| ;else denorm
64| HANDLE DESTINATION DENORM HERE
65| ;set dtag to norm
66| ;write the tag & fpte15 to the fstack
67 leal FPTEMP(%a6),%a0
68
69 bclrb #sign_bit,LOCAL_EX(%a0)
70 sne LOCAL_SGN(%a0)
71
72 bsr nrm_set |normalize number (exp will go negative)
73 bclrb #sign_bit,LOCAL_EX(%a0) |get rid of false sign
74 bfclr LOCAL_SGN(%a0){#0:#8} |change back to IEEE ext format
75 beqs dpos
76 bsetb #sign_bit,LOCAL_EX(%a0)
77dpos:
78 bfclr DTAG(%a6){#0:#4} |set tag to normalized, FPTE15 = 0
79 bsetb #4,DTAG(%a6) |set FPTE15
80 orb #0x0f,DNRM_FLG(%a6)
81monadic:
82 leal ETEMP(%a6),%a0
83 btstb #direction_bit,CMDREG1B(%a6) |check direction
84 bne opclass3 |it is a mv out
85|
86| At this point, only opclass 0 and 2 possible
87|
88 btstb #7,STAG(%a6) |if sop = norm=000, zero=001,
89| ;inf=010 or nan=011
90 bne mon_dnrm |else denorm
91 tstb DY_MO_FLG(%a6) |all cases of dyadic instructions would
92 bne normal |require normalization of denorm
93
94| At this point:
95| monadic instructions: fabs = $18 fneg = $1a ftst = $3a
96| fmove = $00 fsmove = $40 fdmove = $44
97| fsqrt = $05* fssqrt = $41 fdsqrt = $45
98| (*fsqrt reencoded to $05)
99|
100 movew CMDREG1B(%a6),%d0 |get command register
101 andil #0x7f,%d0 |strip to only command word
102|
103| At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and
104| fdsqrt are possible.
105| For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize)
106| For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize)
107|
108 btstl #0,%d0
109 bne normal |weed out fsqrt instructions
110|
111| cu_norm handles fmove in instructions with normalized inputs.
112| The routine round is used to correctly round the input for the
113| destination precision and mode.
114|
115cu_norm:
116 st CU_ONLY(%a6) |set cu-only inst flag
117 movew CMDREG1B(%a6),%d0
118 andib #0x3b,%d0 |isolate bits to select inst
119 tstb %d0
120 beql cu_nmove |if zero, it is an fmove
121 cmpib #0x18,%d0
122 beql cu_nabs |if $18, it is fabs
123 cmpib #0x1a,%d0
124 beql cu_nneg |if $1a, it is fneg
125|
126| Inst is ftst. Check the source operand and set the cc's accordingly.
127| No write is done, so simply rts.
128|
129cu_ntst:
130 movew LOCAL_EX(%a0),%d0
131 bclrl #15,%d0
132 sne LOCAL_SGN(%a0)
133 beqs cu_ntpo
134 orl #neg_mask,USER_FPSR(%a6) |set N
135cu_ntpo:
136 cmpiw #0x7fff,%d0 |test for inf/nan
137 bnes cu_ntcz
138 tstl LOCAL_HI(%a0)
139 bnes cu_ntn
140 tstl LOCAL_LO(%a0)
141 bnes cu_ntn
142 orl #inf_mask,USER_FPSR(%a6)
143 rts
144cu_ntn:
145 orl #nan_mask,USER_FPSR(%a6)
146 movel ETEMP_EX(%a6),FPTEMP_EX(%a6) |set up fptemp sign for
147| ;snan handler
148
149 rts
150cu_ntcz:
151 tstl LOCAL_HI(%a0)
152 bnel cu_ntsx
153 tstl LOCAL_LO(%a0)
154 bnel cu_ntsx
155 orl #z_mask,USER_FPSR(%a6)
156cu_ntsx:
157 rts
158|
159| Inst is fabs. Execute the absolute value function on the input.
160| Branch to the fmove code. If the operand is NaN, do nothing.
161|
162cu_nabs:
163 moveb STAG(%a6),%d0
164 btstl #5,%d0 |test for NaN or zero
165 bne wr_etemp |if either, simply write it
166 bclrb #7,LOCAL_EX(%a0) |do abs
167 bras cu_nmove |fmove code will finish
168|
169| Inst is fneg. Execute the negate value function on the input.
170| Fall though to the fmove code. If the operand is NaN, do nothing.
171|
172cu_nneg:
173 moveb STAG(%a6),%d0
174 btstl #5,%d0 |test for NaN or zero
175 bne wr_etemp |if either, simply write it
176 bchgb #7,LOCAL_EX(%a0) |do neg
177|
178| Inst is fmove. This code also handles all result writes.
179| If bit 2 is set, round is forced to double. If it is clear,
180| and bit 6 is set, round is forced to single. If both are clear,
181| the round precision is found in the fpcr. If the rounding precision
182| is double or single, round the result before the write.
183|
184cu_nmove:
185 moveb STAG(%a6),%d0
186 andib #0xe0,%d0 |isolate stag bits
187 bne wr_etemp |if not norm, simply write it
188 btstb #2,CMDREG1B+1(%a6) |check for rd
189 bne cu_nmrd
190 btstb #6,CMDREG1B+1(%a6) |check for rs
191 bne cu_nmrs
192|
193| The move or operation is not with forced precision. Test for
194| nan or inf as the input; if so, simply write it to FPn. Use the
195| FPCR_MODE byte to get rounding on norms and zeros.
196|
197cu_nmnr:
198 bfextu FPCR_MODE(%a6){#0:#2},%d0
199 tstb %d0 |check for extended
200 beq cu_wrexn |if so, just write result
201 cmpib #1,%d0 |check for single
202 beq cu_nmrs |fall through to double
203|
204| The move is fdmove or round precision is double.
205|
206cu_nmrd:
207 movel #2,%d0 |set up the size for denorm
208 movew LOCAL_EX(%a0),%d1 |compare exponent to double threshold
209 andw #0x7fff,%d1
210 cmpw #0x3c01,%d1
211 bls cu_nunfl
212 bfextu FPCR_MODE(%a6){#2:#2},%d1 |get rmode
213 orl #0x00020000,%d1 |or in rprec (double)
214 clrl %d0 |clear g,r,s for round
215 bclrb #sign_bit,LOCAL_EX(%a0) |convert to internal format
216 sne LOCAL_SGN(%a0)
217 bsrl round
218 bfclr LOCAL_SGN(%a0){#0:#8}
219 beqs cu_nmrdc
220 bsetb #sign_bit,LOCAL_EX(%a0)
221cu_nmrdc:
222 movew LOCAL_EX(%a0),%d1 |check for overflow
223 andw #0x7fff,%d1
224 cmpw #0x43ff,%d1
225 bge cu_novfl |take care of overflow case
226 bra cu_wrexn
227|
228| The move is fsmove or round precision is single.
229|
230cu_nmrs:
231 movel #1,%d0
232 movew LOCAL_EX(%a0),%d1
233 andw #0x7fff,%d1
234 cmpw #0x3f81,%d1
235 bls cu_nunfl
236 bfextu FPCR_MODE(%a6){#2:#2},%d1
237 orl #0x00010000,%d1
238 clrl %d0
239 bclrb #sign_bit,LOCAL_EX(%a0)
240 sne LOCAL_SGN(%a0)
241 bsrl round
242 bfclr LOCAL_SGN(%a0){#0:#8}
243 beqs cu_nmrsc
244 bsetb #sign_bit,LOCAL_EX(%a0)
245cu_nmrsc:
246 movew LOCAL_EX(%a0),%d1
247 andw #0x7FFF,%d1
248 cmpw #0x407f,%d1
249 blt cu_wrexn
250|
251| The operand is above precision boundaries. Use t_ovfl to
252| generate the correct value.
253|
254cu_novfl:
255 bsr t_ovfl
256 bra cu_wrexn
257|
258| The operand is below precision boundaries. Use denorm to
259| generate the correct value.
260|
261cu_nunfl:
262 bclrb #sign_bit,LOCAL_EX(%a0)
263 sne LOCAL_SGN(%a0)
264 bsr denorm
265 bfclr LOCAL_SGN(%a0){#0:#8} |change back to IEEE ext format
266 beqs cu_nucont
267 bsetb #sign_bit,LOCAL_EX(%a0)
268cu_nucont:
269 bfextu FPCR_MODE(%a6){#2:#2},%d1
270 btstb #2,CMDREG1B+1(%a6) |check for rd
271 bne inst_d
272 btstb #6,CMDREG1B+1(%a6) |check for rs
273 bne inst_s
274 swap %d1
275 moveb FPCR_MODE(%a6),%d1
276 lsrb #6,%d1
277 swap %d1
278 bra inst_sd
279inst_d:
280 orl #0x00020000,%d1
281 bra inst_sd
282inst_s:
283 orl #0x00010000,%d1
284inst_sd:
285 bclrb #sign_bit,LOCAL_EX(%a0)
286 sne LOCAL_SGN(%a0)
287 bsrl round
288 bfclr LOCAL_SGN(%a0){#0:#8}
289 beqs cu_nuflp
290 bsetb #sign_bit,LOCAL_EX(%a0)
291cu_nuflp:
292 btstb #inex2_bit,FPSR_EXCEPT(%a6)
293 beqs cu_nuninx
294 orl #aunfl_mask,USER_FPSR(%a6) |if the round was inex, set AUNFL
295cu_nuninx:
296 tstl LOCAL_HI(%a0) |test for zero
297 bnes cu_nunzro
298 tstl LOCAL_LO(%a0)
299 bnes cu_nunzro
300|
301| The mantissa is zero from the denorm loop. Check sign and rmode
302| to see if rounding should have occurred which would leave the lsb.
303|
304 movel USER_FPCR(%a6),%d0
305 andil #0x30,%d0 |isolate rmode
306 cmpil #0x20,%d0
307 blts cu_nzro
308 bnes cu_nrp
309cu_nrm:
310 tstw LOCAL_EX(%a0) |if positive, set lsb
311 bges cu_nzro
312 btstb #7,FPCR_MODE(%a6) |check for double
313 beqs cu_nincs
314 bras cu_nincd
315cu_nrp:
316 tstw LOCAL_EX(%a0) |if positive, set lsb
317 blts cu_nzro
318 btstb #7,FPCR_MODE(%a6) |check for double
319 beqs cu_nincs
320cu_nincd:
321 orl #0x800,LOCAL_LO(%a0) |inc for double
322 bra cu_nunzro
323cu_nincs:
324 orl #0x100,LOCAL_HI(%a0) |inc for single
325 bra cu_nunzro
326cu_nzro:
327 orl #z_mask,USER_FPSR(%a6)
328 moveb STAG(%a6),%d0
329 andib #0xe0,%d0
330 cmpib #0x40,%d0 |check if input was tagged zero
331 beqs cu_numv
332cu_nunzro:
333 orl #unfl_mask,USER_FPSR(%a6) |set unfl
334cu_numv:
335 movel (%a0),ETEMP(%a6)
336 movel 4(%a0),ETEMP_HI(%a6)
337 movel 8(%a0),ETEMP_LO(%a6)
338|
339| Write the result to memory, setting the fpsr cc bits. NaN and Inf
340| bypass cu_wrexn.
341|
342cu_wrexn:
343 tstw LOCAL_EX(%a0) |test for zero
344 beqs cu_wrzero
345 cmpw #0x8000,LOCAL_EX(%a0) |test for zero
346 bnes cu_wreon
347cu_wrzero:
348 orl #z_mask,USER_FPSR(%a6) |set Z bit
349cu_wreon:
350 tstw LOCAL_EX(%a0)
351 bpl wr_etemp
352 orl #neg_mask,USER_FPSR(%a6)
353 bra wr_etemp
354
355|
356| HANDLE SOURCE DENORM HERE
357|
358| ;clear denorm stag to norm
359| ;write the new tag & ete15 to the fstack
360mon_dnrm:
361|
362| At this point, check for the cases in which normalizing the
363| denorm produces incorrect results.
364|
365 tstb DY_MO_FLG(%a6) |all cases of dyadic instructions would
366 bnes nrm_src |require normalization of denorm
367
368| At this point:
369| monadic instructions: fabs = $18 fneg = $1a ftst = $3a
370| fmove = $00 fsmove = $40 fdmove = $44
371| fsqrt = $05* fssqrt = $41 fdsqrt = $45
372| (*fsqrt reencoded to $05)
373|
374 movew CMDREG1B(%a6),%d0 |get command register
375 andil #0x7f,%d0 |strip to only command word
376|
377| At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and
378| fdsqrt are possible.
379| For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize)
380| For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize)
381|
382 btstl #0,%d0
383 bnes nrm_src |weed out fsqrt instructions
384 st CU_ONLY(%a6) |set cu-only inst flag
385 bra cu_dnrm |fmove, fabs, fneg, ftst
386| ;cases go to cu_dnrm
387nrm_src:
388 bclrb #sign_bit,LOCAL_EX(%a0)
389 sne LOCAL_SGN(%a0)
390 bsr nrm_set |normalize number (exponent will go
391| ; negative)
392 bclrb #sign_bit,LOCAL_EX(%a0) |get rid of false sign
393
394 bfclr LOCAL_SGN(%a0){#0:#8} |change back to IEEE ext format
395 beqs spos
396 bsetb #sign_bit,LOCAL_EX(%a0)
397spos:
398 bfclr STAG(%a6){#0:#4} |set tag to normalized, FPTE15 = 0
399 bsetb #4,STAG(%a6) |set ETE15
400 orb #0xf0,DNRM_FLG(%a6)
401normal:
402 tstb DNRM_FLG(%a6) |check if any of the ops were denorms
403 bne ck_wrap |if so, check if it is a potential
404| ;wrap-around case
405fix_stk:
406 moveb #0xfe,CU_SAVEPC(%a6)
407 bclrb #E1,E_BYTE(%a6)
408
409 clrw NMNEXC(%a6)
410
411 st RES_FLG(%a6) |indicate that a restore is needed
412 rts
413
414|
415| cu_dnrm handles all cu-only instructions (fmove, fabs, fneg, and
416| ftst) completely in software without an frestore to the 040.
417|
418cu_dnrm:
419 st CU_ONLY(%a6)
420 movew CMDREG1B(%a6),%d0
421 andib #0x3b,%d0 |isolate bits to select inst
422 tstb %d0
423 beql cu_dmove |if zero, it is an fmove
424 cmpib #0x18,%d0
425 beql cu_dabs |if $18, it is fabs
426 cmpib #0x1a,%d0
427 beql cu_dneg |if $1a, it is fneg
428|
429| Inst is ftst. Check the source operand and set the cc's accordingly.
430| No write is done, so simply rts.
431|
432cu_dtst:
433 movew LOCAL_EX(%a0),%d0
434 bclrl #15,%d0
435 sne LOCAL_SGN(%a0)
436 beqs cu_dtpo
437 orl #neg_mask,USER_FPSR(%a6) |set N
438cu_dtpo:
439 cmpiw #0x7fff,%d0 |test for inf/nan
440 bnes cu_dtcz
441 tstl LOCAL_HI(%a0)
442 bnes cu_dtn
443 tstl LOCAL_LO(%a0)
444 bnes cu_dtn
445 orl #inf_mask,USER_FPSR(%a6)
446 rts
447cu_dtn:
448 orl #nan_mask,USER_FPSR(%a6)
449 movel ETEMP_EX(%a6),FPTEMP_EX(%a6) |set up fptemp sign for
450| ;snan handler
451 rts
452cu_dtcz:
453 tstl LOCAL_HI(%a0)
454 bnel cu_dtsx
455 tstl LOCAL_LO(%a0)
456 bnel cu_dtsx
457 orl #z_mask,USER_FPSR(%a6)
458cu_dtsx:
459 rts
460|
461| Inst is fabs. Execute the absolute value function on the input.
462| Branch to the fmove code.
463|
464cu_dabs:
465 bclrb #7,LOCAL_EX(%a0) |do abs
466 bras cu_dmove |fmove code will finish
467|
468| Inst is fneg. Execute the negate value function on the input.
469| Fall though to the fmove code.
470|
471cu_dneg:
472 bchgb #7,LOCAL_EX(%a0) |do neg
473|
474| Inst is fmove. This code also handles all result writes.
475| If bit 2 is set, round is forced to double. If it is clear,
476| and bit 6 is set, round is forced to single. If both are clear,
477| the round precision is found in the fpcr. If the rounding precision
478| is double or single, the result is zero, and the mode is checked
479| to determine if the lsb of the result should be set.
480|
481cu_dmove:
482 btstb #2,CMDREG1B+1(%a6) |check for rd
483 bne cu_dmrd
484 btstb #6,CMDREG1B+1(%a6) |check for rs
485 bne cu_dmrs
486|
487| The move or operation is not with forced precision. Use the
488| FPCR_MODE byte to get rounding.
489|
490cu_dmnr:
491 bfextu FPCR_MODE(%a6){#0:#2},%d0
492 tstb %d0 |check for extended
493 beq cu_wrexd |if so, just write result
494 cmpib #1,%d0 |check for single
495 beq cu_dmrs |fall through to double
496|
497| The move is fdmove or round precision is double. Result is zero.
498| Check rmode for rp or rm and set lsb accordingly.
499|
500cu_dmrd:
501 bfextu FPCR_MODE(%a6){#2:#2},%d1 |get rmode
502 tstw LOCAL_EX(%a0) |check sign
503 blts cu_dmdn
504 cmpib #3,%d1 |check for rp
505 bne cu_dpd |load double pos zero
506 bra cu_dpdr |load double pos zero w/lsb
507cu_dmdn:
508 cmpib #2,%d1 |check for rm
509 bne cu_dnd |load double neg zero
510 bra cu_dndr |load double neg zero w/lsb
511|
512| The move is fsmove or round precision is single. Result is zero.
513| Check for rp or rm and set lsb accordingly.
514|
515cu_dmrs:
516 bfextu FPCR_MODE(%a6){#2:#2},%d1 |get rmode
517 tstw LOCAL_EX(%a0) |check sign
518 blts cu_dmsn
519 cmpib #3,%d1 |check for rp
520 bne cu_spd |load single pos zero
521 bra cu_spdr |load single pos zero w/lsb
522cu_dmsn:
523 cmpib #2,%d1 |check for rm
524 bne cu_snd |load single neg zero
525 bra cu_sndr |load single neg zero w/lsb
526|
527| The precision is extended, so the result in etemp is correct.
528| Simply set unfl (not inex2 or aunfl) and write the result to
529| the correct fp register.
530cu_wrexd:
531 orl #unfl_mask,USER_FPSR(%a6)
532 tstw LOCAL_EX(%a0)
533 beq wr_etemp
534 orl #neg_mask,USER_FPSR(%a6)
535 bra wr_etemp
536|
537| These routines write +/- zero in double format. The routines
538| cu_dpdr and cu_dndr set the double lsb.
539|
540cu_dpd:
541 movel #0x3c010000,LOCAL_EX(%a0) |force pos double zero
542 clrl LOCAL_HI(%a0)
543 clrl LOCAL_LO(%a0)
544 orl #z_mask,USER_FPSR(%a6)
545 orl #unfinx_mask,USER_FPSR(%a6)
546 bra wr_etemp
547cu_dpdr:
548 movel #0x3c010000,LOCAL_EX(%a0) |force pos double zero
549 clrl LOCAL_HI(%a0)
550 movel #0x800,LOCAL_LO(%a0) |with lsb set
551 orl #unfinx_mask,USER_FPSR(%a6)
552 bra wr_etemp
553cu_dnd:
554 movel #0xbc010000,LOCAL_EX(%a0) |force pos double zero
555 clrl LOCAL_HI(%a0)
556 clrl LOCAL_LO(%a0)
557 orl #z_mask,USER_FPSR(%a6)
558 orl #neg_mask,USER_FPSR(%a6)
559 orl #unfinx_mask,USER_FPSR(%a6)
560 bra wr_etemp
561cu_dndr:
562 movel #0xbc010000,LOCAL_EX(%a0) |force pos double zero
563 clrl LOCAL_HI(%a0)
564 movel #0x800,LOCAL_LO(%a0) |with lsb set
565 orl #neg_mask,USER_FPSR(%a6)
566 orl #unfinx_mask,USER_FPSR(%a6)
567 bra wr_etemp
568|
569| These routines write +/- zero in single format. The routines
570| cu_dpdr and cu_dndr set the single lsb.
571|
572cu_spd:
573 movel #0x3f810000,LOCAL_EX(%a0) |force pos single zero
574 clrl LOCAL_HI(%a0)
575 clrl LOCAL_LO(%a0)
576 orl #z_mask,USER_FPSR(%a6)
577 orl #unfinx_mask,USER_FPSR(%a6)
578 bra wr_etemp
579cu_spdr:
580 movel #0x3f810000,LOCAL_EX(%a0) |force pos single zero
581 movel #0x100,LOCAL_HI(%a0) |with lsb set
582 clrl LOCAL_LO(%a0)
583 orl #unfinx_mask,USER_FPSR(%a6)
584 bra wr_etemp
585cu_snd:
586 movel #0xbf810000,LOCAL_EX(%a0) |force pos single zero
587 clrl LOCAL_HI(%a0)
588 clrl LOCAL_LO(%a0)
589 orl #z_mask,USER_FPSR(%a6)
590 orl #neg_mask,USER_FPSR(%a6)
591 orl #unfinx_mask,USER_FPSR(%a6)
592 bra wr_etemp
593cu_sndr:
594 movel #0xbf810000,LOCAL_EX(%a0) |force pos single zero
595 movel #0x100,LOCAL_HI(%a0) |with lsb set
596 clrl LOCAL_LO(%a0)
597 orl #neg_mask,USER_FPSR(%a6)
598 orl #unfinx_mask,USER_FPSR(%a6)
599 bra wr_etemp
600
601|
602| This code checks for 16-bit overflow conditions on dyadic
603| operations which are not restorable into the floating-point
604| unit and must be completed in software. Basically, this
605| condition exists with a very large norm and a denorm. One
606| of the operands must be denormalized to enter this code.
607|
608| Flags used:
609| DY_MO_FLG contains 0 for monadic op, $ff for dyadic
610| DNRM_FLG contains $00 for neither op denormalized
611| $0f for the destination op denormalized
612| $f0 for the source op denormalized
613| $ff for both ops denormalized
614|
615| The wrap-around condition occurs for add, sub, div, and cmp
616| when
617|
618| abs(dest_exp - src_exp) >= $8000
619|
620| and for mul when
621|
622| (dest_exp + src_exp) < $0
623|
624| we must process the operation here if this case is true.
625|
626| The rts following the frcfpn routine is the exit from res_func
627| for this condition. The restore flag (RES_FLG) is left clear.
628| No frestore is done unless an exception is to be reported.
629|
630| For fadd:
631| if(sign_of(dest) != sign_of(src))
632| replace exponent of src with $3fff (keep sign)
633| use fpu to perform dest+new_src (user's rmode and X)
634| clr sticky
635| else
636| set sticky
637| call round with user's precision and mode
638| move result to fpn and wbtemp
639|
640| For fsub:
641| if(sign_of(dest) == sign_of(src))
642| replace exponent of src with $3fff (keep sign)
643| use fpu to perform dest+new_src (user's rmode and X)
644| clr sticky
645| else
646| set sticky
647| call round with user's precision and mode
648| move result to fpn and wbtemp
649|
650| For fdiv/fsgldiv:
651| if(both operands are denorm)
652| restore_to_fpu;
653| if(dest is norm)
654| force_ovf;
655| else(dest is denorm)
656| force_unf:
657|
658| For fcmp:
659| if(dest is norm)
660| N = sign_of(dest);
661| else(dest is denorm)
662| N = sign_of(src);
663|
664| For fmul:
665| if(both operands are denorm)
666| force_unf;
667| if((dest_exp + src_exp) < 0)
668| force_unf:
669| else
670| restore_to_fpu;
671|
672| local equates:
673 .set addcode,0x22
674 .set subcode,0x28
675 .set mulcode,0x23
676 .set divcode,0x20
677 .set cmpcode,0x38
678ck_wrap:
679 | tstb DY_MO_FLG(%a6) ;check for fsqrt
680 beq fix_stk |if zero, it is fsqrt
681 movew CMDREG1B(%a6),%d0
682 andiw #0x3b,%d0 |strip to command bits
683 cmpiw #addcode,%d0
684 beq wrap_add
685 cmpiw #subcode,%d0
686 beq wrap_sub
687 cmpiw #mulcode,%d0
688 beq wrap_mul
689 cmpiw #cmpcode,%d0
690 beq wrap_cmp
691|
692| Inst is fdiv.
693|
694wrap_div:
695 cmpb #0xff,DNRM_FLG(%a6) |if both ops denorm,
696 beq fix_stk |restore to fpu
697|
698| One of the ops is denormalized. Test for wrap condition
699| and force the result.
700|
701 cmpb #0x0f,DNRM_FLG(%a6) |check for dest denorm
702 bnes div_srcd
703div_destd:
704 bsrl ckinf_ns
705 bne fix_stk
706 bfextu ETEMP_EX(%a6){#1:#15},%d0 |get src exp (always pos)
707 bfexts FPTEMP_EX(%a6){#1:#15},%d1 |get dest exp (always neg)
708 subl %d1,%d0 |subtract dest from src
709 cmpl #0x7fff,%d0
710 blt fix_stk |if less, not wrap case
711 clrb WBTEMP_SGN(%a6)
712 movew ETEMP_EX(%a6),%d0 |find the sign of the result
713 movew FPTEMP_EX(%a6),%d1
714 eorw %d1,%d0
715 andiw #0x8000,%d0
716 beq force_unf
717 st WBTEMP_SGN(%a6)
718 bra force_unf
719
720ckinf_ns:
721 moveb STAG(%a6),%d0 |check source tag for inf or nan
722 bra ck_in_com
723ckinf_nd:
724 moveb DTAG(%a6),%d0 |check destination tag for inf or nan
725ck_in_com:
726 andib #0x60,%d0 |isolate tag bits
727 cmpb #0x40,%d0 |is it inf?
728 beq nan_or_inf |not wrap case
729 cmpb #0x60,%d0 |is it nan?
730 beq nan_or_inf |yes, not wrap case?
731 cmpb #0x20,%d0 |is it a zero?
732 beq nan_or_inf |yes
733 clrl %d0
734 rts |then ; it is either a zero of norm,
735| ;check wrap case
736nan_or_inf:
737 moveql #-1,%d0
738 rts
739
740
741
742div_srcd:
743 bsrl ckinf_nd
744 bne fix_stk
745 bfextu FPTEMP_EX(%a6){#1:#15},%d0 |get dest exp (always pos)
746 bfexts ETEMP_EX(%a6){#1:#15},%d1 |get src exp (always neg)
747 subl %d1,%d0 |subtract src from dest
748 cmpl #0x8000,%d0
749 blt fix_stk |if less, not wrap case
750 clrb WBTEMP_SGN(%a6)
751 movew ETEMP_EX(%a6),%d0 |find the sign of the result
752 movew FPTEMP_EX(%a6),%d1
753 eorw %d1,%d0
754 andiw #0x8000,%d0
755 beqs force_ovf
756 st WBTEMP_SGN(%a6)
757|
758| This code handles the case of the instruction resulting in
759| an overflow condition.
760|
761force_ovf:
762 bclrb #E1,E_BYTE(%a6)
763 orl #ovfl_inx_mask,USER_FPSR(%a6)
764 clrw NMNEXC(%a6)
765 leal WBTEMP(%a6),%a0 |point a0 to memory location
766 movew CMDREG1B(%a6),%d0
767 btstl #6,%d0 |test for forced precision
768 beqs frcovf_fpcr
769 btstl #2,%d0 |check for double
770 bnes frcovf_dbl
771 movel #0x1,%d0 |inst is forced single
772 bras frcovf_rnd
773frcovf_dbl:
774 movel #0x2,%d0 |inst is forced double
775 bras frcovf_rnd
776frcovf_fpcr:
777 bfextu FPCR_MODE(%a6){#0:#2},%d0 |inst not forced - use fpcr prec
778frcovf_rnd:
779
780| The 881/882 does not set inex2 for the following case, so the
781| line is commented out to be compatible with 881/882
782| tst.b %d0
783| beq.b frcovf_x
784| or.l #inex2_mask,USER_FPSR(%a6) ;if prec is s or d, set inex2
785
786|frcovf_x:
787 bsrl ovf_res |get correct result based on
788| ;round precision/mode. This
789| ;sets FPSR_CC correctly
790| ;returns in external format
791 bfclr WBTEMP_SGN(%a6){#0:#8}
792 beq frcfpn
793 bsetb #sign_bit,WBTEMP_EX(%a6)
794 bra frcfpn
795|
796| Inst is fadd.
797|
798wrap_add:
799 cmpb #0xff,DNRM_FLG(%a6) |if both ops denorm,
800 beq fix_stk |restore to fpu
801|
802| One of the ops is denormalized. Test for wrap condition
803| and complete the instruction.
804|
805 cmpb #0x0f,DNRM_FLG(%a6) |check for dest denorm
806 bnes add_srcd
807add_destd:
808 bsrl ckinf_ns
809 bne fix_stk
810 bfextu ETEMP_EX(%a6){#1:#15},%d0 |get src exp (always pos)
811 bfexts FPTEMP_EX(%a6){#1:#15},%d1 |get dest exp (always neg)
812 subl %d1,%d0 |subtract dest from src
813 cmpl #0x8000,%d0
814 blt fix_stk |if less, not wrap case
815 bra add_wrap
816add_srcd:
817 bsrl ckinf_nd
818 bne fix_stk
819 bfextu FPTEMP_EX(%a6){#1:#15},%d0 |get dest exp (always pos)
820 bfexts ETEMP_EX(%a6){#1:#15},%d1 |get src exp (always neg)
821 subl %d1,%d0 |subtract src from dest
822 cmpl #0x8000,%d0
823 blt fix_stk |if less, not wrap case
824|
825| Check the signs of the operands. If they are unlike, the fpu
826| can be used to add the norm and 1.0 with the sign of the
827| denorm and it will correctly generate the result in extended
828| precision. We can then call round with no sticky and the result
829| will be correct for the user's rounding mode and precision. If
830| the signs are the same, we call round with the sticky bit set
831| and the result will be correct for the user's rounding mode and
832| precision.
833|
834add_wrap:
835 movew ETEMP_EX(%a6),%d0
836 movew FPTEMP_EX(%a6),%d1
837 eorw %d1,%d0
838 andiw #0x8000,%d0
839 beq add_same
840|
841| The signs are unlike.
842|
843 cmpb #0x0f,DNRM_FLG(%a6) |is dest the denorm?
844 bnes add_u_srcd
845 movew FPTEMP_EX(%a6),%d0
846 andiw #0x8000,%d0
847 orw #0x3fff,%d0 |force the exponent to +/- 1
848 movew %d0,FPTEMP_EX(%a6) |in the denorm
849 movel USER_FPCR(%a6),%d0
850 andil #0x30,%d0
851 fmovel %d0,%fpcr |set up users rmode and X
852 fmovex ETEMP(%a6),%fp0
853 faddx FPTEMP(%a6),%fp0
854 leal WBTEMP(%a6),%a0 |point a0 to wbtemp in frame
855 fmovel %fpsr,%d1
856 orl %d1,USER_FPSR(%a6) |capture cc's and inex from fadd
857 fmovex %fp0,WBTEMP(%a6) |write result to memory
858 lsrl #4,%d0 |put rmode in lower 2 bits
859 movel USER_FPCR(%a6),%d1
860 andil #0xc0,%d1
861 lsrl #6,%d1 |put precision in upper word
862 swap %d1
863 orl %d0,%d1 |set up for round call
864 clrl %d0 |force sticky to zero
865 bclrb #sign_bit,WBTEMP_EX(%a6)
866 sne WBTEMP_SGN(%a6)
867 bsrl round |round result to users rmode & prec
868 bfclr WBTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format
869 beq frcfpnr
870 bsetb #sign_bit,WBTEMP_EX(%a6)
871 bra frcfpnr
872add_u_srcd:
873 movew ETEMP_EX(%a6),%d0
874 andiw #0x8000,%d0
875 orw #0x3fff,%d0 |force the exponent to +/- 1
876 movew %d0,ETEMP_EX(%a6) |in the denorm
877 movel USER_FPCR(%a6),%d0
878 andil #0x30,%d0
879 fmovel %d0,%fpcr |set up users rmode and X
880 fmovex ETEMP(%a6),%fp0
881 faddx FPTEMP(%a6),%fp0
882 fmovel %fpsr,%d1
883 orl %d1,USER_FPSR(%a6) |capture cc's and inex from fadd
884 leal WBTEMP(%a6),%a0 |point a0 to wbtemp in frame
885 fmovex %fp0,WBTEMP(%a6) |write result to memory
886 lsrl #4,%d0 |put rmode in lower 2 bits
887 movel USER_FPCR(%a6),%d1
888 andil #0xc0,%d1
889 lsrl #6,%d1 |put precision in upper word
890 swap %d1
891 orl %d0,%d1 |set up for round call
892 clrl %d0 |force sticky to zero
893 bclrb #sign_bit,WBTEMP_EX(%a6)
894 sne WBTEMP_SGN(%a6) |use internal format for round
895 bsrl round |round result to users rmode & prec
896 bfclr WBTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format
897 beq frcfpnr
898 bsetb #sign_bit,WBTEMP_EX(%a6)
899 bra frcfpnr
900|
901| Signs are alike:
902|
903add_same:
904 cmpb #0x0f,DNRM_FLG(%a6) |is dest the denorm?
905 bnes add_s_srcd
906add_s_destd:
907 leal ETEMP(%a6),%a0
908 movel USER_FPCR(%a6),%d0
909 andil #0x30,%d0
910 lsrl #4,%d0 |put rmode in lower 2 bits
911 movel USER_FPCR(%a6),%d1
912 andil #0xc0,%d1
913 lsrl #6,%d1 |put precision in upper word
914 swap %d1
915 orl %d0,%d1 |set up for round call
916 movel #0x20000000,%d0 |set sticky for round
917 bclrb #sign_bit,ETEMP_EX(%a6)
918 sne ETEMP_SGN(%a6)
919 bsrl round |round result to users rmode & prec
920 bfclr ETEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format
921 beqs add_s_dclr
922 bsetb #sign_bit,ETEMP_EX(%a6)
923add_s_dclr:
924 leal WBTEMP(%a6),%a0
925 movel ETEMP(%a6),(%a0) |write result to wbtemp
926 movel ETEMP_HI(%a6),4(%a0)
927 movel ETEMP_LO(%a6),8(%a0)
928 tstw ETEMP_EX(%a6)
929 bgt add_ckovf
930 orl #neg_mask,USER_FPSR(%a6)
931 bra add_ckovf
932add_s_srcd:
933 leal FPTEMP(%a6),%a0
934 movel USER_FPCR(%a6),%d0
935 andil #0x30,%d0
936 lsrl #4,%d0 |put rmode in lower 2 bits
937 movel USER_FPCR(%a6),%d1
938 andil #0xc0,%d1
939 lsrl #6,%d1 |put precision in upper word
940 swap %d1
941 orl %d0,%d1 |set up for round call
942 movel #0x20000000,%d0 |set sticky for round
943 bclrb #sign_bit,FPTEMP_EX(%a6)
944 sne FPTEMP_SGN(%a6)
945 bsrl round |round result to users rmode & prec
946 bfclr FPTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format
947 beqs add_s_sclr
948 bsetb #sign_bit,FPTEMP_EX(%a6)
949add_s_sclr:
950 leal WBTEMP(%a6),%a0
951 movel FPTEMP(%a6),(%a0) |write result to wbtemp
952 movel FPTEMP_HI(%a6),4(%a0)
953 movel FPTEMP_LO(%a6),8(%a0)
954 tstw FPTEMP_EX(%a6)
955 bgt add_ckovf
956 orl #neg_mask,USER_FPSR(%a6)
957add_ckovf:
958 movew WBTEMP_EX(%a6),%d0
959 andiw #0x7fff,%d0
960 cmpiw #0x7fff,%d0
961 bne frcfpnr
962|
963| The result has overflowed to $7fff exponent. Set I, ovfl,
964| and aovfl, and clr the mantissa (incorrectly set by the
965| round routine.)
966|
967 orl #inf_mask+ovfl_inx_mask,USER_FPSR(%a6)
968 clrl 4(%a0)
969 bra frcfpnr
970|
971| Inst is fsub.
972|
973wrap_sub:
974 cmpb #0xff,DNRM_FLG(%a6) |if both ops denorm,
975 beq fix_stk |restore to fpu
976|
977| One of the ops is denormalized. Test for wrap condition
978| and complete the instruction.
979|
980 cmpb #0x0f,DNRM_FLG(%a6) |check for dest denorm
981 bnes sub_srcd
982sub_destd:
983 bsrl ckinf_ns
984 bne fix_stk
985 bfextu ETEMP_EX(%a6){#1:#15},%d0 |get src exp (always pos)
986 bfexts FPTEMP_EX(%a6){#1:#15},%d1 |get dest exp (always neg)
987 subl %d1,%d0 |subtract src from dest
988 cmpl #0x8000,%d0
989 blt fix_stk |if less, not wrap case
990 bra sub_wrap
991sub_srcd:
992 bsrl ckinf_nd
993 bne fix_stk
994 bfextu FPTEMP_EX(%a6){#1:#15},%d0 |get dest exp (always pos)
995 bfexts ETEMP_EX(%a6){#1:#15},%d1 |get src exp (always neg)
996 subl %d1,%d0 |subtract dest from src
997 cmpl #0x8000,%d0
998 blt fix_stk |if less, not wrap case
999|
1000| Check the signs of the operands. If they are alike, the fpu
1001| can be used to subtract from the norm 1.0 with the sign of the
1002| denorm and it will correctly generate the result in extended
1003| precision. We can then call round with no sticky and the result
1004| will be correct for the user's rounding mode and precision. If
1005| the signs are unlike, we call round with the sticky bit set
1006| and the result will be correct for the user's rounding mode and
1007| precision.
1008|
1009sub_wrap:
1010 movew ETEMP_EX(%a6),%d0
1011 movew FPTEMP_EX(%a6),%d1
1012 eorw %d1,%d0
1013 andiw #0x8000,%d0
1014 bne sub_diff
1015|
1016| The signs are alike.
1017|
1018 cmpb #0x0f,DNRM_FLG(%a6) |is dest the denorm?
1019 bnes sub_u_srcd
1020 movew FPTEMP_EX(%a6),%d0
1021 andiw #0x8000,%d0
1022 orw #0x3fff,%d0 |force the exponent to +/- 1
1023 movew %d0,FPTEMP_EX(%a6) |in the denorm
1024 movel USER_FPCR(%a6),%d0
1025 andil #0x30,%d0
1026 fmovel %d0,%fpcr |set up users rmode and X
1027 fmovex FPTEMP(%a6),%fp0
1028 fsubx ETEMP(%a6),%fp0
1029 fmovel %fpsr,%d1
1030 orl %d1,USER_FPSR(%a6) |capture cc's and inex from fadd
1031 leal WBTEMP(%a6),%a0 |point a0 to wbtemp in frame
1032 fmovex %fp0,WBTEMP(%a6) |write result to memory
1033 lsrl #4,%d0 |put rmode in lower 2 bits
1034 movel USER_FPCR(%a6),%d1
1035 andil #0xc0,%d1
1036 lsrl #6,%d1 |put precision in upper word
1037 swap %d1
1038 orl %d0,%d1 |set up for round call
1039 clrl %d0 |force sticky to zero
1040 bclrb #sign_bit,WBTEMP_EX(%a6)
1041 sne WBTEMP_SGN(%a6)
1042 bsrl round |round result to users rmode & prec
1043 bfclr WBTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format
1044 beq frcfpnr
1045 bsetb #sign_bit,WBTEMP_EX(%a6)
1046 bra frcfpnr
1047sub_u_srcd:
1048 movew ETEMP_EX(%a6),%d0
1049 andiw #0x8000,%d0
1050 orw #0x3fff,%d0 |force the exponent to +/- 1
1051 movew %d0,ETEMP_EX(%a6) |in the denorm
1052 movel USER_FPCR(%a6),%d0
1053 andil #0x30,%d0
1054 fmovel %d0,%fpcr |set up users rmode and X
1055 fmovex FPTEMP(%a6),%fp0
1056 fsubx ETEMP(%a6),%fp0
1057 fmovel %fpsr,%d1
1058 orl %d1,USER_FPSR(%a6) |capture cc's and inex from fadd
1059 leal WBTEMP(%a6),%a0 |point a0 to wbtemp in frame
1060 fmovex %fp0,WBTEMP(%a6) |write result to memory
1061 lsrl #4,%d0 |put rmode in lower 2 bits
1062 movel USER_FPCR(%a6),%d1
1063 andil #0xc0,%d1
1064 lsrl #6,%d1 |put precision in upper word
1065 swap %d1
1066 orl %d0,%d1 |set up for round call
1067 clrl %d0 |force sticky to zero
1068 bclrb #sign_bit,WBTEMP_EX(%a6)
1069 sne WBTEMP_SGN(%a6)
1070 bsrl round |round result to users rmode & prec
1071 bfclr WBTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format
1072 beq frcfpnr
1073 bsetb #sign_bit,WBTEMP_EX(%a6)
1074 bra frcfpnr
1075|
1076| Signs are unlike:
1077|
1078sub_diff:
1079 cmpb #0x0f,DNRM_FLG(%a6) |is dest the denorm?
1080 bnes sub_s_srcd
1081sub_s_destd:
1082 leal ETEMP(%a6),%a0
1083 movel USER_FPCR(%a6),%d0
1084 andil #0x30,%d0
1085 lsrl #4,%d0 |put rmode in lower 2 bits
1086 movel USER_FPCR(%a6),%d1
1087 andil #0xc0,%d1
1088 lsrl #6,%d1 |put precision in upper word
1089 swap %d1
1090 orl %d0,%d1 |set up for round call
1091 movel #0x20000000,%d0 |set sticky for round
1092|
1093| Since the dest is the denorm, the sign is the opposite of the
1094| norm sign.
1095|
1096 eoriw #0x8000,ETEMP_EX(%a6) |flip sign on result
1097 tstw ETEMP_EX(%a6)
1098 bgts sub_s_dwr
1099 orl #neg_mask,USER_FPSR(%a6)
1100sub_s_dwr:
1101 bclrb #sign_bit,ETEMP_EX(%a6)
1102 sne ETEMP_SGN(%a6)
1103 bsrl round |round result to users rmode & prec
1104 bfclr ETEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format
1105 beqs sub_s_dclr
1106 bsetb #sign_bit,ETEMP_EX(%a6)
1107sub_s_dclr:
1108 leal WBTEMP(%a6),%a0
1109 movel ETEMP(%a6),(%a0) |write result to wbtemp
1110 movel ETEMP_HI(%a6),4(%a0)
1111 movel ETEMP_LO(%a6),8(%a0)
1112 bra sub_ckovf
1113sub_s_srcd:
1114 leal FPTEMP(%a6),%a0
1115 movel USER_FPCR(%a6),%d0
1116 andil #0x30,%d0
1117 lsrl #4,%d0 |put rmode in lower 2 bits
1118 movel USER_FPCR(%a6),%d1
1119 andil #0xc0,%d1
1120 lsrl #6,%d1 |put precision in upper word
1121 swap %d1
1122 orl %d0,%d1 |set up for round call
1123 movel #0x20000000,%d0 |set sticky for round
1124 bclrb #sign_bit,FPTEMP_EX(%a6)
1125 sne FPTEMP_SGN(%a6)
1126 bsrl round |round result to users rmode & prec
1127 bfclr FPTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format
1128 beqs sub_s_sclr
1129 bsetb #sign_bit,FPTEMP_EX(%a6)
1130sub_s_sclr:
1131 leal WBTEMP(%a6),%a0
1132 movel FPTEMP(%a6),(%a0) |write result to wbtemp
1133 movel FPTEMP_HI(%a6),4(%a0)
1134 movel FPTEMP_LO(%a6),8(%a0)
1135 tstw FPTEMP_EX(%a6)
1136 bgt sub_ckovf
1137 orl #neg_mask,USER_FPSR(%a6)
1138sub_ckovf:
1139 movew WBTEMP_EX(%a6),%d0
1140 andiw #0x7fff,%d0
1141 cmpiw #0x7fff,%d0
1142 bne frcfpnr
1143|
1144| The result has overflowed to $7fff exponent. Set I, ovfl,
1145| and aovfl, and clr the mantissa (incorrectly set by the
1146| round routine.)
1147|
1148 orl #inf_mask+ovfl_inx_mask,USER_FPSR(%a6)
1149 clrl 4(%a0)
1150 bra frcfpnr
1151|
1152| Inst is fcmp.
1153|
1154wrap_cmp:
1155 cmpb #0xff,DNRM_FLG(%a6) |if both ops denorm,
1156 beq fix_stk |restore to fpu
1157|
1158| One of the ops is denormalized. Test for wrap condition
1159| and complete the instruction.
1160|
1161 cmpb #0x0f,DNRM_FLG(%a6) |check for dest denorm
1162 bnes cmp_srcd
1163cmp_destd:
1164 bsrl ckinf_ns
1165 bne fix_stk
1166 bfextu ETEMP_EX(%a6){#1:#15},%d0 |get src exp (always pos)
1167 bfexts FPTEMP_EX(%a6){#1:#15},%d1 |get dest exp (always neg)
1168 subl %d1,%d0 |subtract dest from src
1169 cmpl #0x8000,%d0
1170 blt fix_stk |if less, not wrap case
1171 tstw ETEMP_EX(%a6) |set N to ~sign_of(src)
1172 bge cmp_setn
1173 rts
1174cmp_srcd:
1175 bsrl ckinf_nd
1176 bne fix_stk
1177 bfextu FPTEMP_EX(%a6){#1:#15},%d0 |get dest exp (always pos)
1178 bfexts ETEMP_EX(%a6){#1:#15},%d1 |get src exp (always neg)
1179 subl %d1,%d0 |subtract src from dest
1180 cmpl #0x8000,%d0
1181 blt fix_stk |if less, not wrap case
1182 tstw FPTEMP_EX(%a6) |set N to sign_of(dest)
1183 blt cmp_setn
1184 rts
1185cmp_setn:
1186 orl #neg_mask,USER_FPSR(%a6)
1187 rts
1188
1189|
1190| Inst is fmul.
1191|
1192wrap_mul:
1193 cmpb #0xff,DNRM_FLG(%a6) |if both ops denorm,
1194 beq force_unf |force an underflow (really!)
1195|
1196| One of the ops is denormalized. Test for wrap condition
1197| and complete the instruction.
1198|
1199 cmpb #0x0f,DNRM_FLG(%a6) |check for dest denorm
1200 bnes mul_srcd
1201mul_destd:
1202 bsrl ckinf_ns
1203 bne fix_stk
1204 bfextu ETEMP_EX(%a6){#1:#15},%d0 |get src exp (always pos)
1205 bfexts FPTEMP_EX(%a6){#1:#15},%d1 |get dest exp (always neg)
1206 addl %d1,%d0 |subtract dest from src
1207 bgt fix_stk
1208 bra force_unf
1209mul_srcd:
1210 bsrl ckinf_nd
1211 bne fix_stk
1212 bfextu FPTEMP_EX(%a6){#1:#15},%d0 |get dest exp (always pos)
1213 bfexts ETEMP_EX(%a6){#1:#15},%d1 |get src exp (always neg)
1214 addl %d1,%d0 |subtract src from dest
1215 bgt fix_stk
1216
1217|
1218| This code handles the case of the instruction resulting in
1219| an underflow condition.
1220|
1221force_unf:
1222 bclrb #E1,E_BYTE(%a6)
1223 orl #unfinx_mask,USER_FPSR(%a6)
1224 clrw NMNEXC(%a6)
1225 clrb WBTEMP_SGN(%a6)
1226 movew ETEMP_EX(%a6),%d0 |find the sign of the result
1227 movew FPTEMP_EX(%a6),%d1
1228 eorw %d1,%d0
1229 andiw #0x8000,%d0
1230 beqs frcunfcont
1231 st WBTEMP_SGN(%a6)
1232frcunfcont:
1233 lea WBTEMP(%a6),%a0 |point a0 to memory location
1234 movew CMDREG1B(%a6),%d0
1235 btstl #6,%d0 |test for forced precision
1236 beqs frcunf_fpcr
1237 btstl #2,%d0 |check for double
1238 bnes frcunf_dbl
1239 movel #0x1,%d0 |inst is forced single
1240 bras frcunf_rnd
1241frcunf_dbl:
1242 movel #0x2,%d0 |inst is forced double
1243 bras frcunf_rnd
1244frcunf_fpcr:
1245 bfextu FPCR_MODE(%a6){#0:#2},%d0 |inst not forced - use fpcr prec
1246frcunf_rnd:
1247 bsrl unf_sub |get correct result based on
1248| ;round precision/mode. This
1249| ;sets FPSR_CC correctly
1250 bfclr WBTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format
1251 beqs frcfpn
1252 bsetb #sign_bit,WBTEMP_EX(%a6)
1253 bra frcfpn
1254
1255|
1256| Write the result to the user's fpn. All results must be HUGE to be
1257| written; otherwise the results would have overflowed or underflowed.
1258| If the rounding precision is single or double, the ovf_res routine
1259| is needed to correctly supply the max value.
1260|
1261frcfpnr:
1262 movew CMDREG1B(%a6),%d0
1263 btstl #6,%d0 |test for forced precision
1264 beqs frcfpn_fpcr
1265 btstl #2,%d0 |check for double
1266 bnes frcfpn_dbl
1267 movel #0x1,%d0 |inst is forced single
1268 bras frcfpn_rnd
1269frcfpn_dbl:
1270 movel #0x2,%d0 |inst is forced double
1271 bras frcfpn_rnd
1272frcfpn_fpcr:
1273 bfextu FPCR_MODE(%a6){#0:#2},%d0 |inst not forced - use fpcr prec
1274 tstb %d0
1275 beqs frcfpn |if extended, write what you got
1276frcfpn_rnd:
1277 bclrb #sign_bit,WBTEMP_EX(%a6)
1278 sne WBTEMP_SGN(%a6)
1279 bsrl ovf_res |get correct result based on
1280| ;round precision/mode. This
1281| ;sets FPSR_CC correctly
1282 bfclr WBTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format
1283 beqs frcfpn_clr
1284 bsetb #sign_bit,WBTEMP_EX(%a6)
1285frcfpn_clr:
1286 orl #ovfinx_mask,USER_FPSR(%a6)
1287|
1288| Perform the write.
1289|
1290frcfpn:
1291 bfextu CMDREG1B(%a6){#6:#3},%d0 |extract fp destination register
1292 cmpib #3,%d0
1293 bles frc0123 |check if dest is fp0-fp3
1294 movel #7,%d1
1295 subl %d0,%d1
1296 clrl %d0
1297 bsetl %d1,%d0
1298 fmovemx WBTEMP(%a6),%d0
1299 rts
1300frc0123:
1301 cmpib #0,%d0
1302 beqs frc0_dst
1303 cmpib #1,%d0
1304 beqs frc1_dst
1305 cmpib #2,%d0
1306 beqs frc2_dst
1307frc3_dst:
1308 movel WBTEMP_EX(%a6),USER_FP3(%a6)
1309 movel WBTEMP_HI(%a6),USER_FP3+4(%a6)
1310 movel WBTEMP_LO(%a6),USER_FP3+8(%a6)
1311 rts
1312frc2_dst:
1313 movel WBTEMP_EX(%a6),USER_FP2(%a6)
1314 movel WBTEMP_HI(%a6),USER_FP2+4(%a6)
1315 movel WBTEMP_LO(%a6),USER_FP2+8(%a6)
1316 rts
1317frc1_dst:
1318 movel WBTEMP_EX(%a6),USER_FP1(%a6)
1319 movel WBTEMP_HI(%a6),USER_FP1+4(%a6)
1320 movel WBTEMP_LO(%a6),USER_FP1+8(%a6)
1321 rts
1322frc0_dst:
1323 movel WBTEMP_EX(%a6),USER_FP0(%a6)
1324 movel WBTEMP_HI(%a6),USER_FP0+4(%a6)
1325 movel WBTEMP_LO(%a6),USER_FP0+8(%a6)
1326 rts
1327
1328|
1329| Write etemp to fpn.
1330| A check is made on enabled and signalled snan exceptions,
1331| and the destination is not overwritten if this condition exists.
1332| This code is designed to make fmoveins of unsupported data types
1333| faster.
1334|
1335wr_etemp:
1336 btstb #snan_bit,FPSR_EXCEPT(%a6) |if snan is set, and
1337 beqs fmoveinc |enabled, force restore
1338 btstb #snan_bit,FPCR_ENABLE(%a6) |and don't overwrite
1339 beqs fmoveinc |the dest
1340 movel ETEMP_EX(%a6),FPTEMP_EX(%a6) |set up fptemp sign for
1341| ;snan handler
1342 tstb ETEMP(%a6) |check for negative
1343 blts snan_neg
1344 rts
1345snan_neg:
1346 orl #neg_bit,USER_FPSR(%a6) |snan is negative; set N
1347 rts
1348fmoveinc:
1349 clrw NMNEXC(%a6)
1350 bclrb #E1,E_BYTE(%a6)
1351 moveb STAG(%a6),%d0 |check if stag is inf
1352 andib #0xe0,%d0
1353 cmpib #0x40,%d0
1354 bnes fminc_cnan
1355 orl #inf_mask,USER_FPSR(%a6) |if inf, nothing yet has set I
1356 tstw LOCAL_EX(%a0) |check sign
1357 bges fminc_con
1358 orl #neg_mask,USER_FPSR(%a6)
1359 bra fminc_con
1360fminc_cnan:
1361 cmpib #0x60,%d0 |check if stag is NaN
1362 bnes fminc_czero
1363 orl #nan_mask,USER_FPSR(%a6) |if nan, nothing yet has set NaN
1364 movel ETEMP_EX(%a6),FPTEMP_EX(%a6) |set up fptemp sign for
1365| ;snan handler
1366 tstw LOCAL_EX(%a0) |check sign
1367 bges fminc_con
1368 orl #neg_mask,USER_FPSR(%a6)
1369 bra fminc_con
1370fminc_czero:
1371 cmpib #0x20,%d0 |check if zero
1372 bnes fminc_con
1373 orl #z_mask,USER_FPSR(%a6) |if zero, set Z
1374 tstw LOCAL_EX(%a0) |check sign
1375 bges fminc_con
1376 orl #neg_mask,USER_FPSR(%a6)
1377fminc_con:
1378 bfextu CMDREG1B(%a6){#6:#3},%d0 |extract fp destination register
1379 cmpib #3,%d0
1380 bles fp0123 |check if dest is fp0-fp3
1381 movel #7,%d1
1382 subl %d0,%d1
1383 clrl %d0
1384 bsetl %d1,%d0
1385 fmovemx ETEMP(%a6),%d0
1386 rts
1387
1388fp0123:
1389 cmpib #0,%d0
1390 beqs fp0_dst
1391 cmpib #1,%d0
1392 beqs fp1_dst
1393 cmpib #2,%d0
1394 beqs fp2_dst
1395fp3_dst:
1396 movel ETEMP_EX(%a6),USER_FP3(%a6)
1397 movel ETEMP_HI(%a6),USER_FP3+4(%a6)
1398 movel ETEMP_LO(%a6),USER_FP3+8(%a6)
1399 rts
1400fp2_dst:
1401 movel ETEMP_EX(%a6),USER_FP2(%a6)
1402 movel ETEMP_HI(%a6),USER_FP2+4(%a6)
1403 movel ETEMP_LO(%a6),USER_FP2+8(%a6)
1404 rts
1405fp1_dst:
1406 movel ETEMP_EX(%a6),USER_FP1(%a6)
1407 movel ETEMP_HI(%a6),USER_FP1+4(%a6)
1408 movel ETEMP_LO(%a6),USER_FP1+8(%a6)
1409 rts
1410fp0_dst:
1411 movel ETEMP_EX(%a6),USER_FP0(%a6)
1412 movel ETEMP_HI(%a6),USER_FP0+4(%a6)
1413 movel ETEMP_LO(%a6),USER_FP0+8(%a6)
1414 rts
1415
1416opclass3:
1417 st CU_ONLY(%a6)
1418 movew CMDREG1B(%a6),%d0 |check if packed moveout
1419 andiw #0x0c00,%d0 |isolate last 2 bits of size field
1420 cmpiw #0x0c00,%d0 |if size is 011 or 111, it is packed
1421 beq pack_out |else it is norm or denorm
1422 bra mv_out
1423
1424
1425|
1426| MOVE OUT
1427|
1428
1429mv_tbl:
1430 .long li
1431 .long sgp
1432 .long xp
1433 .long mvout_end |should never be taken
1434 .long wi
1435 .long dp
1436 .long bi
1437 .long mvout_end |should never be taken
1438mv_out:
1439 bfextu CMDREG1B(%a6){#3:#3},%d1 |put source specifier in d1
1440 leal mv_tbl,%a0
1441 movel %a0@(%d1:l:4),%a0
1442 jmp (%a0)
1443
1444|
1445| This exit is for move-out to memory. The aunfl bit is
1446| set if the result is inex and unfl is signalled.
1447|
1448mvout_end:
1449 btstb #inex2_bit,FPSR_EXCEPT(%a6)
1450 beqs no_aufl
1451 btstb #unfl_bit,FPSR_EXCEPT(%a6)
1452 beqs no_aufl
1453 bsetb #aunfl_bit,FPSR_AEXCEPT(%a6)
1454no_aufl:
1455 clrw NMNEXC(%a6)
1456 bclrb #E1,E_BYTE(%a6)
1457 fmovel #0,%FPSR |clear any cc bits from res_func
1458|
1459| Return ETEMP to extended format from internal extended format so
1460| that gen_except will have a correctly signed value for ovfl/unfl
1461| handlers.
1462|
1463 bfclr ETEMP_SGN(%a6){#0:#8}
1464 beqs mvout_con
1465 bsetb #sign_bit,ETEMP_EX(%a6)
1466mvout_con:
1467 rts
1468|
1469| This exit is for move-out to int register. The aunfl bit is
1470| not set in any case for this move.
1471|
1472mvouti_end:
1473 clrw NMNEXC(%a6)
1474 bclrb #E1,E_BYTE(%a6)
1475 fmovel #0,%FPSR |clear any cc bits from res_func
1476|
1477| Return ETEMP to extended format from internal extended format so
1478| that gen_except will have a correctly signed value for ovfl/unfl
1479| handlers.
1480|
1481 bfclr ETEMP_SGN(%a6){#0:#8}
1482 beqs mvouti_con
1483 bsetb #sign_bit,ETEMP_EX(%a6)
1484mvouti_con:
1485 rts
1486|
1487| li is used to handle a long integer source specifier
1488|
1489
1490li:
1491 moveql #4,%d0 |set byte count
1492
1493 btstb #7,STAG(%a6) |check for extended denorm
1494 bne int_dnrm |if so, branch
1495
1496 fmovemx ETEMP(%a6),%fp0-%fp0
1497 fcmpd #0x41dfffffffc00000,%fp0
1498| 41dfffffffc00000 in dbl prec = 401d0000fffffffe00000000 in ext prec
1499 fbge lo_plrg
1500 fcmpd #0xc1e0000000000000,%fp0
1501| c1e0000000000000 in dbl prec = c01e00008000000000000000 in ext prec
1502 fble lo_nlrg
1503|
1504| at this point, the answer is between the largest pos and neg values
1505|
1506 movel USER_FPCR(%a6),%d1 |use user's rounding mode
1507 andil #0x30,%d1
1508 fmovel %d1,%fpcr
1509 fmovel %fp0,L_SCR1(%a6) |let the 040 perform conversion
1510 fmovel %fpsr,%d1
1511 orl %d1,USER_FPSR(%a6) |capture inex2/ainex if set
1512 bra int_wrt
1513
1514
1515lo_plrg:
1516 movel #0x7fffffff,L_SCR1(%a6) |answer is largest positive int
1517 fbeq int_wrt |exact answer
1518 fcmpd #0x41dfffffffe00000,%fp0
1519| 41dfffffffe00000 in dbl prec = 401d0000ffffffff00000000 in ext prec
1520 fbge int_operr |set operr
1521 bra int_inx |set inexact
1522
1523lo_nlrg:
1524 movel #0x80000000,L_SCR1(%a6)
1525 fbeq int_wrt |exact answer
1526 fcmpd #0xc1e0000000100000,%fp0
1527| c1e0000000100000 in dbl prec = c01e00008000000080000000 in ext prec
1528 fblt int_operr |set operr
1529 bra int_inx |set inexact
1530
1531|
1532| wi is used to handle a word integer source specifier
1533|
1534
1535wi:
1536 moveql #2,%d0 |set byte count
1537
1538 btstb #7,STAG(%a6) |check for extended denorm
1539 bne int_dnrm |branch if so
1540
1541 fmovemx ETEMP(%a6),%fp0-%fp0
1542 fcmps #0x46fffe00,%fp0
1543| 46fffe00 in sgl prec = 400d0000fffe000000000000 in ext prec
1544 fbge wo_plrg
1545 fcmps #0xc7000000,%fp0
1546| c7000000 in sgl prec = c00e00008000000000000000 in ext prec
1547 fble wo_nlrg
1548
1549|
1550| at this point, the answer is between the largest pos and neg values
1551|
1552 movel USER_FPCR(%a6),%d1 |use user's rounding mode
1553 andil #0x30,%d1
1554 fmovel %d1,%fpcr
1555 fmovew %fp0,L_SCR1(%a6) |let the 040 perform conversion
1556 fmovel %fpsr,%d1
1557 orl %d1,USER_FPSR(%a6) |capture inex2/ainex if set
1558 bra int_wrt
1559
1560wo_plrg:
1561 movew #0x7fff,L_SCR1(%a6) |answer is largest positive int
1562 fbeq int_wrt |exact answer
1563 fcmps #0x46ffff00,%fp0
1564| 46ffff00 in sgl prec = 400d0000ffff000000000000 in ext prec
1565 fbge int_operr |set operr
1566 bra int_inx |set inexact
1567
1568wo_nlrg:
1569 movew #0x8000,L_SCR1(%a6)
1570 fbeq int_wrt |exact answer
1571 fcmps #0xc7000080,%fp0
1572| c7000080 in sgl prec = c00e00008000800000000000 in ext prec
1573 fblt int_operr |set operr
1574 bra int_inx |set inexact
1575
1576|
1577| bi is used to handle a byte integer source specifier
1578|
1579
1580bi:
1581 moveql #1,%d0 |set byte count
1582
1583 btstb #7,STAG(%a6) |check for extended denorm
1584 bne int_dnrm |branch if so
1585
1586 fmovemx ETEMP(%a6),%fp0-%fp0
1587 fcmps #0x42fe0000,%fp0
1588| 42fe0000 in sgl prec = 40050000fe00000000000000 in ext prec
1589 fbge by_plrg
1590 fcmps #0xc3000000,%fp0
1591| c3000000 in sgl prec = c00600008000000000000000 in ext prec
1592 fble by_nlrg
1593
1594|
1595| at this point, the answer is between the largest pos and neg values
1596|
1597 movel USER_FPCR(%a6),%d1 |use user's rounding mode
1598 andil #0x30,%d1
1599 fmovel %d1,%fpcr
1600 fmoveb %fp0,L_SCR1(%a6) |let the 040 perform conversion
1601 fmovel %fpsr,%d1
1602 orl %d1,USER_FPSR(%a6) |capture inex2/ainex if set
1603 bra int_wrt
1604
1605by_plrg:
1606 moveb #0x7f,L_SCR1(%a6) |answer is largest positive int
1607 fbeq int_wrt |exact answer
1608 fcmps #0x42ff0000,%fp0
1609| 42ff0000 in sgl prec = 40050000ff00000000000000 in ext prec
1610 fbge int_operr |set operr
1611 bra int_inx |set inexact
1612
1613by_nlrg:
1614 moveb #0x80,L_SCR1(%a6)
1615 fbeq int_wrt |exact answer
1616 fcmps #0xc3008000,%fp0
1617| c3008000 in sgl prec = c00600008080000000000000 in ext prec
1618 fblt int_operr |set operr
1619 bra int_inx |set inexact
1620
1621|
1622| Common integer routines
1623|
1624| int_drnrm---account for possible nonzero result for round up with positive
1625| operand and round down for negative answer. In the first case (result = 1)
1626| byte-width (store in d0) of result must be honored. In the second case,
1627| -1 in L_SCR1(a6) will cover all contingencies (FMOVE.B/W/L out).
1628
1629int_dnrm:
1630 movel #0,L_SCR1(%a6) | initialize result to 0
1631 bfextu FPCR_MODE(%a6){#2:#2},%d1 | d1 is the rounding mode
1632 cmpb #2,%d1
1633 bmis int_inx | if RN or RZ, done
1634 bnes int_rp | if RP, continue below
1635 tstw ETEMP(%a6) | RM: store -1 in L_SCR1 if src is negative
1636 bpls int_inx | otherwise result is 0
1637 movel #-1,L_SCR1(%a6)
1638 bras int_inx
1639int_rp:
1640 tstw ETEMP(%a6) | RP: store +1 of proper width in L_SCR1 if
1641| ; source is greater than 0
1642 bmis int_inx | otherwise, result is 0
1643 lea L_SCR1(%a6),%a1 | a1 is address of L_SCR1
1644 addal %d0,%a1 | offset by destination width -1
1645 subal #1,%a1
1646 bsetb #0,(%a1) | set low bit at a1 address
1647int_inx:
1648 oril #inx2a_mask,USER_FPSR(%a6)
1649 bras int_wrt
1650int_operr:
1651 fmovemx %fp0-%fp0,FPTEMP(%a6) |FPTEMP must contain the extended
1652| ;precision source that needs to be
1653| ;converted to integer this is required
1654| ;if the operr exception is enabled.
1655| ;set operr/aiop (no inex2 on int ovfl)
1656
1657 oril #opaop_mask,USER_FPSR(%a6)
1658| ;fall through to perform int_wrt
1659int_wrt:
1660 movel EXC_EA(%a6),%a1 |load destination address
1661 tstl %a1 |check to see if it is a dest register
1662 beqs wrt_dn |write data register
1663 lea L_SCR1(%a6),%a0 |point to supervisor source address
1664 bsrl mem_write
1665 bra mvouti_end
1666
1667wrt_dn:
1668 movel %d0,-(%sp) |d0 currently contains the size to write
1669 bsrl get_fline |get_fline returns Dn in d0
1670 andiw #0x7,%d0 |isolate register
1671 movel (%sp)+,%d1 |get size
1672 cmpil #4,%d1 |most frequent case
1673 beqs sz_long
1674 cmpil #2,%d1
1675 bnes sz_con
1676 orl #8,%d0 |add 'word' size to register#
1677 bras sz_con
1678sz_long:
1679 orl #0x10,%d0 |add 'long' size to register#
1680sz_con:
1681 movel %d0,%d1 |reg_dest expects size:reg in d1
1682 bsrl reg_dest |load proper data register
1683 bra mvouti_end
1684xp:
1685 lea ETEMP(%a6),%a0
1686 bclrb #sign_bit,LOCAL_EX(%a0)
1687 sne LOCAL_SGN(%a0)
1688 btstb #7,STAG(%a6) |check for extended denorm
1689 bne xdnrm
1690 clrl %d0
1691 bras do_fp |do normal case
1692sgp:
1693 lea ETEMP(%a6),%a0
1694 bclrb #sign_bit,LOCAL_EX(%a0)
1695 sne LOCAL_SGN(%a0)
1696 btstb #7,STAG(%a6) |check for extended denorm
1697 bne sp_catas |branch if so
1698 movew LOCAL_EX(%a0),%d0
1699 lea sp_bnds,%a1
1700 cmpw (%a1),%d0
1701 blt sp_under
1702 cmpw 2(%a1),%d0
1703 bgt sp_over
1704 movel #1,%d0 |set destination format to single
1705 bras do_fp |do normal case
1706dp:
1707 lea ETEMP(%a6),%a0
1708 bclrb #sign_bit,LOCAL_EX(%a0)
1709 sne LOCAL_SGN(%a0)
1710
1711 btstb #7,STAG(%a6) |check for extended denorm
1712 bne dp_catas |branch if so
1713
1714 movew LOCAL_EX(%a0),%d0
1715 lea dp_bnds,%a1
1716
1717 cmpw (%a1),%d0
1718 blt dp_under
1719 cmpw 2(%a1),%d0
1720 bgt dp_over
1721
1722 movel #2,%d0 |set destination format to double
1723| ;fall through to do_fp
1724|
1725do_fp:
1726 bfextu FPCR_MODE(%a6){#2:#2},%d1 |rnd mode in d1
1727 swap %d0 |rnd prec in upper word
1728 addl %d0,%d1 |d1 has PREC/MODE info
1729
1730 clrl %d0 |clear g,r,s
1731
1732 bsrl round |round
1733
1734 movel %a0,%a1
1735 movel EXC_EA(%a6),%a0
1736
1737 bfextu CMDREG1B(%a6){#3:#3},%d1 |extract destination format
1738| ;at this point only the dest
1739| ;formats sgl, dbl, ext are
1740| ;possible
1741 cmpb #2,%d1
1742 bgts ddbl |double=5, extended=2, single=1
1743 bnes dsgl
1744| ;fall through to dext
1745dext:
1746 bsrl dest_ext
1747 bra mvout_end
1748dsgl:
1749 bsrl dest_sgl
1750 bra mvout_end
1751ddbl:
1752 bsrl dest_dbl
1753 bra mvout_end
1754
1755|
1756| Handle possible denorm or catastrophic underflow cases here
1757|
1758xdnrm:
1759 bsr set_xop |initialize WBTEMP
1760 bsetb #wbtemp15_bit,WB_BYTE(%a6) |set wbtemp15
1761
1762 movel %a0,%a1
1763 movel EXC_EA(%a6),%a0 |a0 has the destination pointer
1764 bsrl dest_ext |store to memory
1765 bsetb #unfl_bit,FPSR_EXCEPT(%a6)
1766 bra mvout_end
1767
1768sp_under:
1769 bsetb #etemp15_bit,STAG(%a6)
1770
1771 cmpw 4(%a1),%d0
1772 blts sp_catas |catastrophic underflow case
1773
1774 movel #1,%d0 |load in round precision
1775 movel #sgl_thresh,%d1 |load in single denorm threshold
1776 bsrl dpspdnrm |expects d1 to have the proper
1777| ;denorm threshold
1778 bsrl dest_sgl |stores value to destination
1779 bsetb #unfl_bit,FPSR_EXCEPT(%a6)
1780 bra mvout_end |exit
1781
1782dp_under:
1783 bsetb #etemp15_bit,STAG(%a6)
1784
1785 cmpw 4(%a1),%d0
1786 blts dp_catas |catastrophic underflow case
1787
1788 movel #dbl_thresh,%d1 |load in double precision threshold
1789 movel #2,%d0
1790 bsrl dpspdnrm |expects d1 to have proper
1791| ;denorm threshold
1792| ;expects d0 to have round precision
1793 bsrl dest_dbl |store value to destination
1794 bsetb #unfl_bit,FPSR_EXCEPT(%a6)
1795 bra mvout_end |exit
1796
1797|
1798| Handle catastrophic underflow cases here
1799|
1800sp_catas:
1801| Temp fix for z bit set in unf_sub
1802 movel USER_FPSR(%a6),-(%a7)
1803
1804 movel #1,%d0 |set round precision to sgl
1805
1806 bsrl unf_sub |a0 points to result
1807
1808 movel (%a7)+,USER_FPSR(%a6)
1809
1810 movel #1,%d0
1811 subw %d0,LOCAL_EX(%a0) |account for difference between
1812| ;denorm/norm bias
1813
1814 movel %a0,%a1 |a1 has the operand input
1815 movel EXC_EA(%a6),%a0 |a0 has the destination pointer
1816
1817 bsrl dest_sgl |store the result
1818 oril #unfinx_mask,USER_FPSR(%a6)
1819 bra mvout_end
1820
1821dp_catas:
1822| Temp fix for z bit set in unf_sub
1823 movel USER_FPSR(%a6),-(%a7)
1824
1825 movel #2,%d0 |set round precision to dbl
1826 bsrl unf_sub |a0 points to result
1827
1828 movel (%a7)+,USER_FPSR(%a6)
1829
1830 movel #1,%d0
1831 subw %d0,LOCAL_EX(%a0) |account for difference between
1832| ;denorm/norm bias
1833
1834 movel %a0,%a1 |a1 has the operand input
1835 movel EXC_EA(%a6),%a0 |a0 has the destination pointer
1836
1837 bsrl dest_dbl |store the result
1838 oril #unfinx_mask,USER_FPSR(%a6)
1839 bra mvout_end
1840
1841|
1842| Handle catastrophic overflow cases here
1843|
1844sp_over:
1845| Temp fix for z bit set in unf_sub
1846 movel USER_FPSR(%a6),-(%a7)
1847
1848 movel #1,%d0
1849 leal FP_SCR1(%a6),%a0 |use FP_SCR1 for creating result
1850 movel ETEMP_EX(%a6),(%a0)
1851 movel ETEMP_HI(%a6),4(%a0)
1852 movel ETEMP_LO(%a6),8(%a0)
1853 bsrl ovf_res
1854
1855 movel (%a7)+,USER_FPSR(%a6)
1856
1857 movel %a0,%a1
1858 movel EXC_EA(%a6),%a0
1859 bsrl dest_sgl
1860 orl #ovfinx_mask,USER_FPSR(%a6)
1861 bra mvout_end
1862
1863dp_over:
1864| Temp fix for z bit set in ovf_res
1865 movel USER_FPSR(%a6),-(%a7)
1866
1867 movel #2,%d0
1868 leal FP_SCR1(%a6),%a0 |use FP_SCR1 for creating result
1869 movel ETEMP_EX(%a6),(%a0)
1870 movel ETEMP_HI(%a6),4(%a0)
1871 movel ETEMP_LO(%a6),8(%a0)
1872 bsrl ovf_res
1873
1874 movel (%a7)+,USER_FPSR(%a6)
1875
1876 movel %a0,%a1
1877 movel EXC_EA(%a6),%a0
1878 bsrl dest_dbl
1879 orl #ovfinx_mask,USER_FPSR(%a6)
1880 bra mvout_end
1881
1882|
1883| DPSPDNRM
1884|
1885| This subroutine takes an extended normalized number and denormalizes
1886| it to the given round precision. This subroutine also decrements
1887| the input operand's exponent by 1 to account for the fact that
1888| dest_sgl or dest_dbl expects a normalized number's bias.
1889|
1890| Input: a0 points to a normalized number in internal extended format
1891| d0 is the round precision (=1 for sgl; =2 for dbl)
1892| d1 is the single precision or double precision
1893| denorm threshold
1894|
1895| Output: (In the format for dest_sgl or dest_dbl)
1896| a0 points to the destination
1897| a1 points to the operand
1898|
1899| Exceptions: Reports inexact 2 exception by setting USER_FPSR bits
1900|
1901dpspdnrm:
1902 movel %d0,-(%a7) |save round precision
1903 clrl %d0 |clear initial g,r,s
1904 bsrl dnrm_lp |careful with d0, it's needed by round
1905
1906 bfextu FPCR_MODE(%a6){#2:#2},%d1 |get rounding mode
1907 swap %d1
1908 movew 2(%a7),%d1 |set rounding precision
1909 swap %d1 |at this point d1 has PREC/MODE info
1910 bsrl round |round result, sets the inex bit in
1911| ;USER_FPSR if needed
1912
1913 movew #1,%d0
1914 subw %d0,LOCAL_EX(%a0) |account for difference in denorm
1915| ;vs norm bias
1916
1917 movel %a0,%a1 |a1 has the operand input
1918 movel EXC_EA(%a6),%a0 |a0 has the destination pointer
1919 addw #4,%a7 |pop stack
1920 rts
1921|
1922| SET_XOP initialized WBTEMP with the value pointed to by a0
1923| input: a0 points to input operand in the internal extended format
1924|
1925set_xop:
1926 movel LOCAL_EX(%a0),WBTEMP_EX(%a6)
1927 movel LOCAL_HI(%a0),WBTEMP_HI(%a6)
1928 movel LOCAL_LO(%a0),WBTEMP_LO(%a6)
1929 bfclr WBTEMP_SGN(%a6){#0:#8}
1930 beqs sxop
1931 bsetb #sign_bit,WBTEMP_EX(%a6)
1932sxop:
1933 bfclr STAG(%a6){#5:#4} |clear wbtm66,wbtm1,wbtm0,sbit
1934 rts
1935|
1936| P_MOVE
1937|
1938p_movet:
1939 .long p_move
1940 .long p_movez
1941 .long p_movei
1942 .long p_moven
1943 .long p_move
1944p_regd:
1945 .long p_dyd0
1946 .long p_dyd1
1947 .long p_dyd2
1948 .long p_dyd3
1949 .long p_dyd4
1950 .long p_dyd5
1951 .long p_dyd6
1952 .long p_dyd7
1953
1954pack_out:
1955 leal p_movet,%a0 |load jmp table address
1956 movew STAG(%a6),%d0 |get source tag
1957 bfextu %d0{#16:#3},%d0 |isolate source bits
1958 movel (%a0,%d0.w*4),%a0 |load a0 with routine label for tag
1959 jmp (%a0) |go to the routine
1960
1961p_write:
1962 movel #0x0c,%d0 |get byte count
1963 movel EXC_EA(%a6),%a1 |get the destination address
1964 bsr mem_write |write the user's destination
1965 moveb #0,CU_SAVEPC(%a6) |set the cu save pc to all 0's
1966
1967|
1968| Also note that the dtag must be set to norm here - this is because
1969| the 040 uses the dtag to execute the correct microcode.
1970|
1971 bfclr DTAG(%a6){#0:#3} |set dtag to norm
1972
1973 rts
1974
1975| Notes on handling of special case (zero, inf, and nan) inputs:
1976| 1. Operr is not signalled if the k-factor is greater than 18.
1977| 2. Per the manual, status bits are not set.
1978|
1979
1980p_move:
1981 movew CMDREG1B(%a6),%d0
1982 btstl #kfact_bit,%d0 |test for dynamic k-factor
1983 beqs statick |if clear, k-factor is static
1984dynamick:
1985 bfextu %d0{#25:#3},%d0 |isolate register for dynamic k-factor
1986 lea p_regd,%a0
1987 movel %a0@(%d0:l:4),%a0
1988 jmp (%a0)
1989statick:
1990 andiw #0x007f,%d0 |get k-factor
1991 bfexts %d0{#25:#7},%d0 |sign extend d0 for bindec
1992 leal ETEMP(%a6),%a0 |a0 will point to the packed decimal
1993 bsrl bindec |perform the convert; data at a6
1994 leal FP_SCR1(%a6),%a0 |load a0 with result address
1995 bral p_write
1996p_movez:
1997 leal ETEMP(%a6),%a0 |a0 will point to the packed decimal
1998 clrw 2(%a0) |clear lower word of exp
1999 clrl 4(%a0) |load second lword of ZERO
2000 clrl 8(%a0) |load third lword of ZERO
2001 bra p_write |go write results
2002p_movei:
2003 fmovel #0,%FPSR |clear aiop
2004 leal ETEMP(%a6),%a0 |a0 will point to the packed decimal
2005 clrw 2(%a0) |clear lower word of exp
2006 bra p_write |go write the result
2007p_moven:
2008 leal ETEMP(%a6),%a0 |a0 will point to the packed decimal
2009 clrw 2(%a0) |clear lower word of exp
2010 bra p_write |go write the result
2011
2012|
2013| Routines to read the dynamic k-factor from Dn.
2014|
2015p_dyd0:
2016 movel USER_D0(%a6),%d0
2017 bras statick
2018p_dyd1:
2019 movel USER_D1(%a6),%d0
2020 bras statick
2021p_dyd2:
2022 movel %d2,%d0
2023 bras statick
2024p_dyd3:
2025 movel %d3,%d0
2026 bras statick
2027p_dyd4:
2028 movel %d4,%d0
2029 bras statick
2030p_dyd5:
2031 movel %d5,%d0
2032 bras statick
2033p_dyd6:
2034 movel %d6,%d0
2035 bra statick
2036p_dyd7:
2037 movel %d7,%d0
2038 bra statick
2039
2040 |end
diff --git a/arch/m68k/fpsp040/round.S b/arch/m68k/fpsp040/round.S
new file mode 100644
index 000000000000..00f98068783f
--- /dev/null
+++ b/arch/m68k/fpsp040/round.S
@@ -0,0 +1,649 @@
1|
2| round.sa 3.4 7/29/91
3|
4| handle rounding and normalization tasks
5|
6|
7|
8| Copyright (C) Motorola, Inc. 1990
9| All Rights Reserved
10|
11| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
12| The copyright notice above does not evidence any
13| actual or intended publication of such source code.
14
15|ROUND idnt 2,1 | Motorola 040 Floating Point Software Package
16
17 |section 8
18
19#include "fpsp.h"
20
21|
22| round --- round result according to precision/mode
23|
24| a0 points to the input operand in the internal extended format
25| d1(high word) contains rounding precision:
26| ext = $0000xxxx
27| sgl = $0001xxxx
28| dbl = $0002xxxx
29| d1(low word) contains rounding mode:
30| RN = $xxxx0000
31| RZ = $xxxx0001
32| RM = $xxxx0010
33| RP = $xxxx0011
34| d0{31:29} contains the g,r,s bits (extended)
35|
36| On return the value pointed to by a0 is correctly rounded,
37| a0 is preserved and the g-r-s bits in d0 are cleared.
38| The result is not typed - the tag field is invalid. The
39| result is still in the internal extended format.
40|
41| The INEX bit of USER_FPSR will be set if the rounded result was
42| inexact (i.e. if any of the g-r-s bits were set).
43|
44
45 .global round
46round:
47| If g=r=s=0 then result is exact and round is done, else set
48| the inex flag in status reg and continue.
49|
50 bsrs ext_grs |this subroutine looks at the
51| :rounding precision and sets
52| ;the appropriate g-r-s bits.
53 tstl %d0 |if grs are zero, go force
54 bne rnd_cont |lower bits to zero for size
55
56 swap %d1 |set up d1.w for round prec.
57 bra truncate
58
59rnd_cont:
60|
61| Use rounding mode as an index into a jump table for these modes.
62|
63 orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex
64 lea mode_tab,%a1
65 movel (%a1,%d1.w*4),%a1
66 jmp (%a1)
67|
68| Jump table indexed by rounding mode in d1.w. All following assumes
69| grs != 0.
70|
71mode_tab:
72 .long rnd_near
73 .long rnd_zero
74 .long rnd_mnus
75 .long rnd_plus
76|
77| ROUND PLUS INFINITY
78|
79| If sign of fp number = 0 (positive), then add 1 to l.
80|
81rnd_plus:
82 swap %d1 |set up d1 for round prec.
83 tstb LOCAL_SGN(%a0) |check for sign
84 bmi truncate |if positive then truncate
85 movel #0xffffffff,%d0 |force g,r,s to be all f's
86 lea add_to_l,%a1
87 movel (%a1,%d1.w*4),%a1
88 jmp (%a1)
89|
90| ROUND MINUS INFINITY
91|
92| If sign of fp number = 1 (negative), then add 1 to l.
93|
94rnd_mnus:
95 swap %d1 |set up d1 for round prec.
96 tstb LOCAL_SGN(%a0) |check for sign
97 bpl truncate |if negative then truncate
98 movel #0xffffffff,%d0 |force g,r,s to be all f's
99 lea add_to_l,%a1
100 movel (%a1,%d1.w*4),%a1
101 jmp (%a1)
102|
103| ROUND ZERO
104|
105| Always truncate.
106rnd_zero:
107 swap %d1 |set up d1 for round prec.
108 bra truncate
109|
110|
111| ROUND NEAREST
112|
113| If (g=1), then add 1 to l and if (r=s=0), then clear l
114| Note that this will round to even in case of a tie.
115|
116rnd_near:
117 swap %d1 |set up d1 for round prec.
118 asll #1,%d0 |shift g-bit to c-bit
119 bcc truncate |if (g=1) then
120 lea add_to_l,%a1
121 movel (%a1,%d1.w*4),%a1
122 jmp (%a1)
123
124|
125| ext_grs --- extract guard, round and sticky bits
126|
127| Input: d1 = PREC:ROUND
128| Output: d0{31:29}= guard, round, sticky
129|
130| The ext_grs extract the guard/round/sticky bits according to the
131| selected rounding precision. It is called by the round subroutine
132| only. All registers except d0 are kept intact. d0 becomes an
133| updated guard,round,sticky in d0{31:29}
134|
135| Notes: the ext_grs uses the round PREC, and therefore has to swap d1
136| prior to usage, and needs to restore d1 to original.
137|
138ext_grs:
139 swap %d1 |have d1.w point to round precision
140 cmpiw #0,%d1
141 bnes sgl_or_dbl
142 bras end_ext_grs
143
144sgl_or_dbl:
145 moveml %d2/%d3,-(%a7) |make some temp registers
146 cmpiw #1,%d1
147 bnes grs_dbl
148grs_sgl:
149 bfextu LOCAL_HI(%a0){#24:#2},%d3 |sgl prec. g-r are 2 bits right
150 movel #30,%d2 |of the sgl prec. limits
151 lsll %d2,%d3 |shift g-r bits to MSB of d3
152 movel LOCAL_HI(%a0),%d2 |get word 2 for s-bit test
153 andil #0x0000003f,%d2 |s bit is the or of all other
154 bnes st_stky |bits to the right of g-r
155 tstl LOCAL_LO(%a0) |test lower mantissa
156 bnes st_stky |if any are set, set sticky
157 tstl %d0 |test original g,r,s
158 bnes st_stky |if any are set, set sticky
159 bras end_sd |if words 3 and 4 are clr, exit
160grs_dbl:
161 bfextu LOCAL_LO(%a0){#21:#2},%d3 |dbl-prec. g-r are 2 bits right
162 movel #30,%d2 |of the dbl prec. limits
163 lsll %d2,%d3 |shift g-r bits to the MSB of d3
164 movel LOCAL_LO(%a0),%d2 |get lower mantissa for s-bit test
165 andil #0x000001ff,%d2 |s bit is the or-ing of all
166 bnes st_stky |other bits to the right of g-r
167 tstl %d0 |test word original g,r,s
168 bnes st_stky |if any are set, set sticky
169 bras end_sd |if clear, exit
170st_stky:
171 bset #rnd_stky_bit,%d3
172end_sd:
173 movel %d3,%d0 |return grs to d0
174 moveml (%a7)+,%d2/%d3 |restore scratch registers
175end_ext_grs:
176 swap %d1 |restore d1 to original
177 rts
178
179|******************* Local Equates
180 .set ad_1_sgl,0x00000100 | constant to add 1 to l-bit in sgl prec
181 .set ad_1_dbl,0x00000800 | constant to add 1 to l-bit in dbl prec
182
183
184|Jump table for adding 1 to the l-bit indexed by rnd prec
185
186add_to_l:
187 .long add_ext
188 .long add_sgl
189 .long add_dbl
190 .long add_dbl
191|
192| ADD SINGLE
193|
194add_sgl:
195 addl #ad_1_sgl,LOCAL_HI(%a0)
196 bccs scc_clr |no mantissa overflow
197 roxrw LOCAL_HI(%a0) |shift v-bit back in
198 roxrw LOCAL_HI+2(%a0) |shift v-bit back in
199 addw #0x1,LOCAL_EX(%a0) |and incr exponent
200scc_clr:
201 tstl %d0 |test for rs = 0
202 bnes sgl_done
203 andiw #0xfe00,LOCAL_HI+2(%a0) |clear the l-bit
204sgl_done:
205 andil #0xffffff00,LOCAL_HI(%a0) |truncate bits beyond sgl limit
206 clrl LOCAL_LO(%a0) |clear d2
207 rts
208
209|
210| ADD EXTENDED
211|
212add_ext:
213 addql #1,LOCAL_LO(%a0) |add 1 to l-bit
214 bccs xcc_clr |test for carry out
215 addql #1,LOCAL_HI(%a0) |propagate carry
216 bccs xcc_clr
217 roxrw LOCAL_HI(%a0) |mant is 0 so restore v-bit
218 roxrw LOCAL_HI+2(%a0) |mant is 0 so restore v-bit
219 roxrw LOCAL_LO(%a0)
220 roxrw LOCAL_LO+2(%a0)
221 addw #0x1,LOCAL_EX(%a0) |and inc exp
222xcc_clr:
223 tstl %d0 |test rs = 0
224 bnes add_ext_done
225 andib #0xfe,LOCAL_LO+3(%a0) |clear the l bit
226add_ext_done:
227 rts
228|
229| ADD DOUBLE
230|
231add_dbl:
232 addl #ad_1_dbl,LOCAL_LO(%a0)
233 bccs dcc_clr
234 addql #1,LOCAL_HI(%a0) |propagate carry
235 bccs dcc_clr
236 roxrw LOCAL_HI(%a0) |mant is 0 so restore v-bit
237 roxrw LOCAL_HI+2(%a0) |mant is 0 so restore v-bit
238 roxrw LOCAL_LO(%a0)
239 roxrw LOCAL_LO+2(%a0)
240 addw #0x1,LOCAL_EX(%a0) |incr exponent
241dcc_clr:
242 tstl %d0 |test for rs = 0
243 bnes dbl_done
244 andiw #0xf000,LOCAL_LO+2(%a0) |clear the l-bit
245
246dbl_done:
247 andil #0xfffff800,LOCAL_LO(%a0) |truncate bits beyond dbl limit
248 rts
249
250error:
251 rts
252|
253| Truncate all other bits
254|
255trunct:
256 .long end_rnd
257 .long sgl_done
258 .long dbl_done
259 .long dbl_done
260
261truncate:
262 lea trunct,%a1
263 movel (%a1,%d1.w*4),%a1
264 jmp (%a1)
265
266end_rnd:
267 rts
268
269|
270| NORMALIZE
271|
272| These routines (nrm_zero & nrm_set) normalize the unnorm. This
273| is done by shifting the mantissa left while decrementing the
274| exponent.
275|
276| NRM_SET shifts and decrements until there is a 1 set in the integer
277| bit of the mantissa (msb in d1).
278|
279| NRM_ZERO shifts and decrements until there is a 1 set in the integer
280| bit of the mantissa (msb in d1) unless this would mean the exponent
281| would go less than 0. In that case the number becomes a denorm - the
282| exponent (d0) is set to 0 and the mantissa (d1 & d2) is not
283| normalized.
284|
285| Note that both routines have been optimized (for the worst case) and
286| therefore do not have the easy to follow decrement/shift loop.
287|
288| NRM_ZERO
289|
290| Distance to first 1 bit in mantissa = X
291| Distance to 0 from exponent = Y
292| If X < Y
293| Then
294| nrm_set
295| Else
296| shift mantissa by Y
297| set exponent = 0
298|
299|input:
300| FP_SCR1 = exponent, ms mantissa part, ls mantissa part
301|output:
302| L_SCR1{4} = fpte15 or ete15 bit
303|
304 .global nrm_zero
305nrm_zero:
306 movew LOCAL_EX(%a0),%d0
307 cmpw #64,%d0 |see if exp > 64
308 bmis d0_less
309 bsr nrm_set |exp > 64 so exp won't exceed 0
310 rts
311d0_less:
312 moveml %d2/%d3/%d5/%d6,-(%a7)
313 movel LOCAL_HI(%a0),%d1
314 movel LOCAL_LO(%a0),%d2
315
316 bfffo %d1{#0:#32},%d3 |get the distance to the first 1
317| ;in ms mant
318 beqs ms_clr |branch if no bits were set
319 cmpw %d3,%d0 |of X>Y
320 bmis greater |then exp will go past 0 (neg) if
321| ;it is just shifted
322 bsr nrm_set |else exp won't go past 0
323 moveml (%a7)+,%d2/%d3/%d5/%d6
324 rts
325greater:
326 movel %d2,%d6 |save ls mant in d6
327 lsll %d0,%d2 |shift ls mant by count
328 lsll %d0,%d1 |shift ms mant by count
329 movel #32,%d5
330 subl %d0,%d5 |make op a denorm by shifting bits
331 lsrl %d5,%d6 |by the number in the exp, then
332| ;set exp = 0.
333 orl %d6,%d1 |shift the ls mant bits into the ms mant
334 movel #0,%d0 |same as if decremented exp to 0
335| ;while shifting
336 movew %d0,LOCAL_EX(%a0)
337 movel %d1,LOCAL_HI(%a0)
338 movel %d2,LOCAL_LO(%a0)
339 moveml (%a7)+,%d2/%d3/%d5/%d6
340 rts
341ms_clr:
342 bfffo %d2{#0:#32},%d3 |check if any bits set in ls mant
343 beqs all_clr |branch if none set
344 addw #32,%d3
345 cmpw %d3,%d0 |if X>Y
346 bmis greater |then branch
347 bsr nrm_set |else exp won't go past 0
348 moveml (%a7)+,%d2/%d3/%d5/%d6
349 rts
350all_clr:
351 movew #0,LOCAL_EX(%a0) |no mantissa bits set. Set exp = 0.
352 moveml (%a7)+,%d2/%d3/%d5/%d6
353 rts
354|
355| NRM_SET
356|
357 .global nrm_set
358nrm_set:
359 movel %d7,-(%a7)
360 bfffo LOCAL_HI(%a0){#0:#32},%d7 |find first 1 in ms mant to d7)
361 beqs lower |branch if ms mant is all 0's
362
363 movel %d6,-(%a7)
364
365 subw %d7,LOCAL_EX(%a0) |sub exponent by count
366 movel LOCAL_HI(%a0),%d0 |d0 has ms mant
367 movel LOCAL_LO(%a0),%d1 |d1 has ls mant
368
369 lsll %d7,%d0 |shift first 1 to j bit position
370 movel %d1,%d6 |copy ls mant into d6
371 lsll %d7,%d6 |shift ls mant by count
372 movel %d6,LOCAL_LO(%a0) |store ls mant into memory
373 moveql #32,%d6
374 subl %d7,%d6 |continue shift
375 lsrl %d6,%d1 |shift off all bits but those that will
376| ;be shifted into ms mant
377 orl %d1,%d0 |shift the ls mant bits into the ms mant
378 movel %d0,LOCAL_HI(%a0) |store ms mant into memory
379 moveml (%a7)+,%d7/%d6 |restore registers
380 rts
381
382|
383| We get here if ms mant was = 0, and we assume ls mant has bits
384| set (otherwise this would have been tagged a zero not a denorm).
385|
386lower:
387 movew LOCAL_EX(%a0),%d0 |d0 has exponent
388 movel LOCAL_LO(%a0),%d1 |d1 has ls mant
389 subw #32,%d0 |account for ms mant being all zeros
390 bfffo %d1{#0:#32},%d7 |find first 1 in ls mant to d7)
391 subw %d7,%d0 |subtract shift count from exp
392 lsll %d7,%d1 |shift first 1 to integer bit in ms mant
393 movew %d0,LOCAL_EX(%a0) |store ms mant
394 movel %d1,LOCAL_HI(%a0) |store exp
395 clrl LOCAL_LO(%a0) |clear ls mant
396 movel (%a7)+,%d7
397 rts
398|
399| denorm --- denormalize an intermediate result
400|
401| Used by underflow.
402|
403| Input:
404| a0 points to the operand to be denormalized
405| (in the internal extended format)
406|
407| d0: rounding precision
408| Output:
409| a0 points to the denormalized result
410| (in the internal extended format)
411|
412| d0 is guard,round,sticky
413|
414| d0 comes into this routine with the rounding precision. It
415| is then loaded with the denormalized exponent threshold for the
416| rounding precision.
417|
418
419 .global denorm
420denorm:
421 btstb #6,LOCAL_EX(%a0) |check for exponents between $7fff-$4000
422 beqs no_sgn_ext
423 bsetb #7,LOCAL_EX(%a0) |sign extend if it is so
424no_sgn_ext:
425
426 cmpib #0,%d0 |if 0 then extended precision
427 bnes not_ext |else branch
428
429 clrl %d1 |load d1 with ext threshold
430 clrl %d0 |clear the sticky flag
431 bsr dnrm_lp |denormalize the number
432 tstb %d1 |check for inex
433 beq no_inex |if clr, no inex
434 bras dnrm_inex |if set, set inex
435
436not_ext:
437 cmpil #1,%d0 |if 1 then single precision
438 beqs load_sgl |else must be 2, double prec
439
440load_dbl:
441 movew #dbl_thresh,%d1 |put copy of threshold in d1
442 movel %d1,%d0 |copy d1 into d0
443 subw LOCAL_EX(%a0),%d0 |diff = threshold - exp
444 cmpw #67,%d0 |if diff > 67 (mant + grs bits)
445 bpls chk_stky |then branch (all bits would be
446| ; shifted off in denorm routine)
447 clrl %d0 |else clear the sticky flag
448 bsr dnrm_lp |denormalize the number
449 tstb %d1 |check flag
450 beqs no_inex |if clr, no inex
451 bras dnrm_inex |if set, set inex
452
453load_sgl:
454 movew #sgl_thresh,%d1 |put copy of threshold in d1
455 movel %d1,%d0 |copy d1 into d0
456 subw LOCAL_EX(%a0),%d0 |diff = threshold - exp
457 cmpw #67,%d0 |if diff > 67 (mant + grs bits)
458 bpls chk_stky |then branch (all bits would be
459| ; shifted off in denorm routine)
460 clrl %d0 |else clear the sticky flag
461 bsr dnrm_lp |denormalize the number
462 tstb %d1 |check flag
463 beqs no_inex |if clr, no inex
464 bras dnrm_inex |if set, set inex
465
466chk_stky:
467 tstl LOCAL_HI(%a0) |check for any bits set
468 bnes set_stky
469 tstl LOCAL_LO(%a0) |check for any bits set
470 bnes set_stky
471 bras clr_mant
472set_stky:
473 orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex
474 movel #0x20000000,%d0 |set sticky bit in return value
475clr_mant:
476 movew %d1,LOCAL_EX(%a0) |load exp with threshold
477 movel #0,LOCAL_HI(%a0) |set d1 = 0 (ms mantissa)
478 movel #0,LOCAL_LO(%a0) |set d2 = 0 (ms mantissa)
479 rts
480dnrm_inex:
481 orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex
482no_inex:
483 rts
484
485|
486| dnrm_lp --- normalize exponent/mantissa to specified threshold
487|
488| Input:
489| a0 points to the operand to be denormalized
490| d0{31:29} initial guard,round,sticky
491| d1{15:0} denormalization threshold
492| Output:
493| a0 points to the denormalized operand
494| d0{31:29} final guard,round,sticky
495| d1.b inexact flag: all ones means inexact result
496|
497| The LOCAL_LO and LOCAL_GRS parts of the value are copied to FP_SCR2
498| so that bfext can be used to extract the new low part of the mantissa.
499| Dnrm_lp can be called with a0 pointing to ETEMP or WBTEMP and there
500| is no LOCAL_GRS scratch word following it on the fsave frame.
501|
502 .global dnrm_lp
503dnrm_lp:
504 movel %d2,-(%sp) |save d2 for temp use
505 btstb #E3,E_BYTE(%a6) |test for type E3 exception
506 beqs not_E3 |not type E3 exception
507 bfextu WBTEMP_GRS(%a6){#6:#3},%d2 |extract guard,round, sticky bit
508 movel #29,%d0
509 lsll %d0,%d2 |shift g,r,s to their positions
510 movel %d2,%d0
511not_E3:
512 movel (%sp)+,%d2 |restore d2
513 movel LOCAL_LO(%a0),FP_SCR2+LOCAL_LO(%a6)
514 movel %d0,FP_SCR2+LOCAL_GRS(%a6)
515 movel %d1,%d0 |copy the denorm threshold
516 subw LOCAL_EX(%a0),%d1 |d1 = threshold - uns exponent
517 bles no_lp |d1 <= 0
518 cmpw #32,%d1
519 blts case_1 |0 = d1 < 32
520 cmpw #64,%d1
521 blts case_2 |32 <= d1 < 64
522 bra case_3 |d1 >= 64
523|
524| No normalization necessary
525|
526no_lp:
527 clrb %d1 |set no inex2 reported
528 movel FP_SCR2+LOCAL_GRS(%a6),%d0 |restore original g,r,s
529 rts
530|
531| case (0<d1<32)
532|
533case_1:
534 movel %d2,-(%sp)
535 movew %d0,LOCAL_EX(%a0) |exponent = denorm threshold
536 movel #32,%d0
537 subw %d1,%d0 |d0 = 32 - d1
538 bfextu LOCAL_EX(%a0){%d0:#32},%d2
539 bfextu %d2{%d1:%d0},%d2 |d2 = new LOCAL_HI
540 bfextu LOCAL_HI(%a0){%d0:#32},%d1 |d1 = new LOCAL_LO
541 bfextu FP_SCR2+LOCAL_LO(%a6){%d0:#32},%d0 |d0 = new G,R,S
542 movel %d2,LOCAL_HI(%a0) |store new LOCAL_HI
543 movel %d1,LOCAL_LO(%a0) |store new LOCAL_LO
544 clrb %d1
545 bftst %d0{#2:#30}
546 beqs c1nstky
547 bsetl #rnd_stky_bit,%d0
548 st %d1
549c1nstky:
550 movel FP_SCR2+LOCAL_GRS(%a6),%d2 |restore original g,r,s
551 andil #0xe0000000,%d2 |clear all but G,R,S
552 tstl %d2 |test if original G,R,S are clear
553 beqs grs_clear
554 orl #0x20000000,%d0 |set sticky bit in d0
555grs_clear:
556 andil #0xe0000000,%d0 |clear all but G,R,S
557 movel (%sp)+,%d2
558 rts
559|
560| case (32<=d1<64)
561|
562case_2:
563 movel %d2,-(%sp)
564 movew %d0,LOCAL_EX(%a0) |unsigned exponent = threshold
565 subw #32,%d1 |d1 now between 0 and 32
566 movel #32,%d0
567 subw %d1,%d0 |d0 = 32 - d1
568 bfextu LOCAL_EX(%a0){%d0:#32},%d2
569 bfextu %d2{%d1:%d0},%d2 |d2 = new LOCAL_LO
570 bfextu LOCAL_HI(%a0){%d0:#32},%d1 |d1 = new G,R,S
571 bftst %d1{#2:#30}
572 bnes c2_sstky |bra if sticky bit to be set
573 bftst FP_SCR2+LOCAL_LO(%a6){%d0:#32}
574 bnes c2_sstky |bra if sticky bit to be set
575 movel %d1,%d0
576 clrb %d1
577 bras end_c2
578c2_sstky:
579 movel %d1,%d0
580 bsetl #rnd_stky_bit,%d0
581 st %d1
582end_c2:
583 clrl LOCAL_HI(%a0) |store LOCAL_HI = 0
584 movel %d2,LOCAL_LO(%a0) |store LOCAL_LO
585 movel FP_SCR2+LOCAL_GRS(%a6),%d2 |restore original g,r,s
586 andil #0xe0000000,%d2 |clear all but G,R,S
587 tstl %d2 |test if original G,R,S are clear
588 beqs clear_grs
589 orl #0x20000000,%d0 |set sticky bit in d0
590clear_grs:
591 andil #0xe0000000,%d0 |get rid of all but G,R,S
592 movel (%sp)+,%d2
593 rts
594|
595| d1 >= 64 Force the exponent to be the denorm threshold with the
596| correct sign.
597|
598case_3:
599 movew %d0,LOCAL_EX(%a0)
600 tstw LOCAL_SGN(%a0)
601 bges c3con
602c3neg:
603 orl #0x80000000,LOCAL_EX(%a0)
604c3con:
605 cmpw #64,%d1
606 beqs sixty_four
607 cmpw #65,%d1
608 beqs sixty_five
609|
610| Shift value is out of range. Set d1 for inex2 flag and
611| return a zero with the given threshold.
612|
613 clrl LOCAL_HI(%a0)
614 clrl LOCAL_LO(%a0)
615 movel #0x20000000,%d0
616 st %d1
617 rts
618
619sixty_four:
620 movel LOCAL_HI(%a0),%d0
621 bfextu %d0{#2:#30},%d1
622 andil #0xc0000000,%d0
623 bras c3com
624
625sixty_five:
626 movel LOCAL_HI(%a0),%d0
627 bfextu %d0{#1:#31},%d1
628 andil #0x80000000,%d0
629 lsrl #1,%d0 |shift high bit into R bit
630
631c3com:
632 tstl %d1
633 bnes c3ssticky
634 tstl LOCAL_LO(%a0)
635 bnes c3ssticky
636 tstb FP_SCR2+LOCAL_GRS(%a6)
637 bnes c3ssticky
638 clrb %d1
639 bras c3end
640
641c3ssticky:
642 bsetl #rnd_stky_bit,%d0
643 st %d1
644c3end:
645 clrl LOCAL_HI(%a0)
646 clrl LOCAL_LO(%a0)
647 rts
648
649 |end
diff --git a/arch/m68k/fpsp040/sacos.S b/arch/m68k/fpsp040/sacos.S
new file mode 100644
index 000000000000..83b00ab1c48f
--- /dev/null
+++ b/arch/m68k/fpsp040/sacos.S
@@ -0,0 +1,115 @@
1|
2| sacos.sa 3.3 12/19/90
3|
4| Description: The entry point sAcos computes the inverse cosine of
5| an input argument; sAcosd does the same except for denormalized
6| input.
7|
8| Input: Double-extended number X in location pointed to
9| by address register a0.
10|
11| Output: The value arccos(X) returned in floating-point register Fp0.
12|
13| Accuracy and Monotonicity: The returned result is within 3 ulps in
14| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
15| result is subsequently rounded to double precision. The
16| result is provably monotonic in double precision.
17|
18| Speed: The program sCOS takes approximately 310 cycles.
19|
20| Algorithm:
21|
22| ACOS
23| 1. If |X| >= 1, go to 3.
24|
25| 2. (|X| < 1) Calculate acos(X) by
26| z := (1-X) / (1+X)
27| acos(X) = 2 * atan( sqrt(z) ).
28| Exit.
29|
30| 3. If |X| > 1, go to 5.
31|
32| 4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit.
33|
34| 5. (|X| > 1) Generate an invalid operation by 0 * infinity.
35| Exit.
36|
37
38| Copyright (C) Motorola, Inc. 1990
39| All Rights Reserved
40|
41| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
42| The copyright notice above does not evidence any
43| actual or intended publication of such source code.
44
45|SACOS idnt 2,1 | Motorola 040 Floating Point Software Package
46
47 |section 8
48
49PI: .long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000
50PIBY2: .long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
51
52 |xref t_operr
53 |xref t_frcinx
54 |xref satan
55
56 .global sacosd
57sacosd:
58|--ACOS(X) = PI/2 FOR DENORMALIZED X
59 fmovel %d1,%fpcr | ...load user's rounding mode/precision
60 fmovex PIBY2,%fp0
61 bra t_frcinx
62
63 .global sacos
64sacos:
65 fmovex (%a0),%fp0 | ...LOAD INPUT
66
67 movel (%a0),%d0 | ...pack exponent with upper 16 fraction
68 movew 4(%a0),%d0
69 andil #0x7FFFFFFF,%d0
70 cmpil #0x3FFF8000,%d0
71 bges ACOSBIG
72
73|--THIS IS THE USUAL CASE, |X| < 1
74|--ACOS(X) = 2 * ATAN( SQRT( (1-X)/(1+X) ) )
75
76 fmoves #0x3F800000,%fp1
77 faddx %fp0,%fp1 | ...1+X
78 fnegx %fp0 | ... -X
79 fadds #0x3F800000,%fp0 | ...1-X
80 fdivx %fp1,%fp0 | ...(1-X)/(1+X)
81 fsqrtx %fp0 | ...SQRT((1-X)/(1+X))
82 fmovemx %fp0-%fp0,(%a0) | ...overwrite input
83 movel %d1,-(%sp) |save original users fpcr
84 clrl %d1
85 bsr satan | ...ATAN(SQRT([1-X]/[1+X]))
86 fmovel (%sp)+,%fpcr |restore users exceptions
87 faddx %fp0,%fp0 | ...2 * ATAN( STUFF )
88 bra t_frcinx
89
90ACOSBIG:
91 fabsx %fp0
92 fcmps #0x3F800000,%fp0
93 fbgt t_operr |cause an operr exception
94
95|--|X| = 1, ACOS(X) = 0 OR PI
96 movel (%a0),%d0 | ...pack exponent with upper 16 fraction
97 movew 4(%a0),%d0
98 cmpl #0,%d0 |D0 has original exponent+fraction
99 bgts ACOSP1
100
101|--X = -1
102|Returns PI and inexact exception
103 fmovex PI,%fp0
104 fmovel %d1,%FPCR
105 fadds #0x00800000,%fp0 |cause an inexact exception to be put
106| ;into the 040 - will not trap until next
107| ;fp inst.
108 bra t_frcinx
109
110ACOSP1:
111 fmovel %d1,%FPCR
112 fmoves #0x00000000,%fp0
113 rts |Facos ; of +1 is exact
114
115 |end
diff --git a/arch/m68k/fpsp040/sasin.S b/arch/m68k/fpsp040/sasin.S
new file mode 100644
index 000000000000..5647a6043903
--- /dev/null
+++ b/arch/m68k/fpsp040/sasin.S
@@ -0,0 +1,104 @@
1|
2| sasin.sa 3.3 12/19/90
3|
4| Description: The entry point sAsin computes the inverse sine of
5| an input argument; sAsind does the same except for denormalized
6| input.
7|
8| Input: Double-extended number X in location pointed to
9| by address register a0.
10|
11| Output: The value arcsin(X) returned in floating-point register Fp0.
12|
13| Accuracy and Monotonicity: The returned result is within 3 ulps in
14| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
15| result is subsequently rounded to double precision. The
16| result is provably monotonic in double precision.
17|
18| Speed: The program sASIN takes approximately 310 cycles.
19|
20| Algorithm:
21|
22| ASIN
23| 1. If |X| >= 1, go to 3.
24|
25| 2. (|X| < 1) Calculate asin(X) by
26| z := sqrt( [1-X][1+X] )
27| asin(X) = atan( x / z ).
28| Exit.
29|
30| 3. If |X| > 1, go to 5.
31|
32| 4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.
33|
34| 5. (|X| > 1) Generate an invalid operation by 0 * infinity.
35| Exit.
36|
37
38| Copyright (C) Motorola, Inc. 1990
39| All Rights Reserved
40|
41| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
42| The copyright notice above does not evidence any
43| actual or intended publication of such source code.
44
45|SASIN idnt 2,1 | Motorola 040 Floating Point Software Package
46
47 |section 8
48
49PIBY2: .long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
50
51 |xref t_operr
52 |xref t_frcinx
53 |xref t_extdnrm
54 |xref satan
55
56 .global sasind
57sasind:
58|--ASIN(X) = X FOR DENORMALIZED X
59
60 bra t_extdnrm
61
62 .global sasin
63sasin:
64 fmovex (%a0),%fp0 | ...LOAD INPUT
65
66 movel (%a0),%d0
67 movew 4(%a0),%d0
68 andil #0x7FFFFFFF,%d0
69 cmpil #0x3FFF8000,%d0
70 bges asinbig
71
72|--THIS IS THE USUAL CASE, |X| < 1
73|--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) )
74
75 fmoves #0x3F800000,%fp1
76 fsubx %fp0,%fp1 | ...1-X
77 fmovemx %fp2-%fp2,-(%a7)
78 fmoves #0x3F800000,%fp2
79 faddx %fp0,%fp2 | ...1+X
80 fmulx %fp2,%fp1 | ...(1+X)(1-X)
81 fmovemx (%a7)+,%fp2-%fp2
82 fsqrtx %fp1 | ...SQRT([1-X][1+X])
83 fdivx %fp1,%fp0 | ...X/SQRT([1-X][1+X])
84 fmovemx %fp0-%fp0,(%a0)
85 bsr satan
86 bra t_frcinx
87
88asinbig:
89 fabsx %fp0 | ...|X|
90 fcmps #0x3F800000,%fp0
91 fbgt t_operr |cause an operr exception
92
93|--|X| = 1, ASIN(X) = +- PI/2.
94
95 fmovex PIBY2,%fp0
96 movel (%a0),%d0
97 andil #0x80000000,%d0 | ...SIGN BIT OF X
98 oril #0x3F800000,%d0 | ...+-1 IN SGL FORMAT
99 movel %d0,-(%sp) | ...push SIGN(X) IN SGL-FMT
100 fmovel %d1,%FPCR
101 fmuls (%sp)+,%fp0
102 bra t_frcinx
103
104 |end
diff --git a/arch/m68k/fpsp040/satan.S b/arch/m68k/fpsp040/satan.S
new file mode 100644
index 000000000000..20dae222d51e
--- /dev/null
+++ b/arch/m68k/fpsp040/satan.S
@@ -0,0 +1,478 @@
1|
2| satan.sa 3.3 12/19/90
3|
4| The entry point satan computes the arctangent of an
5| input value. satand does the same except the input value is a
6| denormalized number.
7|
8| Input: Double-extended value in memory location pointed to by address
9| register a0.
10|
11| Output: Arctan(X) returned in floating-point register Fp0.
12|
13| Accuracy and Monotonicity: The returned result is within 2 ulps in
14| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
15| result is subsequently rounded to double precision. The
16| result is provably monotonic in double precision.
17|
18| Speed: The program satan takes approximately 160 cycles for input
19| argument X such that 1/16 < |X| < 16. For the other arguments,
20| the program will run no worse than 10% slower.
21|
22| Algorithm:
23| Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5.
24|
25| Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. Note that k = -4, -3,..., or 3.
26| Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 significant bits
27| of X with a bit-1 attached at the 6-th bit position. Define u
28| to be u = (X-F) / (1 + X*F).
29|
30| Step 3. Approximate arctan(u) by a polynomial poly.
31|
32| Step 4. Return arctan(F) + poly, arctan(F) is fetched from a table of values
33| calculated beforehand. Exit.
34|
35| Step 5. If |X| >= 16, go to Step 7.
36|
37| Step 6. Approximate arctan(X) by an odd polynomial in X. Exit.
38|
39| Step 7. Define X' = -1/X. Approximate arctan(X') by an odd polynomial in X'.
40| Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit.
41|
42
43| Copyright (C) Motorola, Inc. 1990
44| All Rights Reserved
45|
46| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
47| The copyright notice above does not evidence any
48| actual or intended publication of such source code.
49
50|satan idnt 2,1 | Motorola 040 Floating Point Software Package
51
52 |section 8
53
54#include "fpsp.h"
55
56BOUNDS1: .long 0x3FFB8000,0x4002FFFF
57
58ONE: .long 0x3F800000
59
60 .long 0x00000000
61
62ATANA3: .long 0xBFF6687E,0x314987D8
63ATANA2: .long 0x4002AC69,0x34A26DB3
64
65ATANA1: .long 0xBFC2476F,0x4E1DA28E
66ATANB6: .long 0x3FB34444,0x7F876989
67
68ATANB5: .long 0xBFB744EE,0x7FAF45DB
69ATANB4: .long 0x3FBC71C6,0x46940220
70
71ATANB3: .long 0xBFC24924,0x921872F9
72ATANB2: .long 0x3FC99999,0x99998FA9
73
74ATANB1: .long 0xBFD55555,0x55555555
75ATANC5: .long 0xBFB70BF3,0x98539E6A
76
77ATANC4: .long 0x3FBC7187,0x962D1D7D
78ATANC3: .long 0xBFC24924,0x827107B8
79
80ATANC2: .long 0x3FC99999,0x9996263E
81ATANC1: .long 0xBFD55555,0x55555536
82
83PPIBY2: .long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
84NPIBY2: .long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
85PTINY: .long 0x00010000,0x80000000,0x00000000,0x00000000
86NTINY: .long 0x80010000,0x80000000,0x00000000,0x00000000
87
88ATANTBL:
89 .long 0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
90 .long 0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
91 .long 0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
92 .long 0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
93 .long 0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
94 .long 0x3FFB0000,0xAB98E943,0x62765619,0x00000000
95 .long 0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
96 .long 0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
97 .long 0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
98 .long 0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
99 .long 0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
100 .long 0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
101 .long 0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
102 .long 0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
103 .long 0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
104 .long 0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
105 .long 0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
106 .long 0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
107 .long 0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
108 .long 0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
109 .long 0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
110 .long 0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
111 .long 0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
112 .long 0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
113 .long 0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
114 .long 0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
115 .long 0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
116 .long 0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
117 .long 0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
118 .long 0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
119 .long 0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
120 .long 0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
121 .long 0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
122 .long 0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
123 .long 0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
124 .long 0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
125 .long 0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
126 .long 0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
127 .long 0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
128 .long 0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
129 .long 0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
130 .long 0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
131 .long 0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
132 .long 0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
133 .long 0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
134 .long 0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
135 .long 0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
136 .long 0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
137 .long 0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
138 .long 0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
139 .long 0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
140 .long 0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
141 .long 0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
142 .long 0x3FFE0000,0x97731420,0x365E538C,0x00000000
143 .long 0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
144 .long 0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
145 .long 0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
146 .long 0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
147 .long 0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
148 .long 0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
149 .long 0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
150 .long 0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
151 .long 0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
152 .long 0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
153 .long 0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
154 .long 0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
155 .long 0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
156 .long 0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
157 .long 0x3FFE0000,0xE8771129,0xC4353259,0x00000000
158 .long 0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
159 .long 0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
160 .long 0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
161 .long 0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
162 .long 0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
163 .long 0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
164 .long 0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
165 .long 0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
166 .long 0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
167 .long 0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
168 .long 0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
169 .long 0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
170 .long 0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
171 .long 0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
172 .long 0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
173 .long 0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
174 .long 0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
175 .long 0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
176 .long 0x3FFF0000,0x9F100575,0x006CC571,0x00000000
177 .long 0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
178 .long 0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
179 .long 0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
180 .long 0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
181 .long 0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
182 .long 0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
183 .long 0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
184 .long 0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
185 .long 0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
186 .long 0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
187 .long 0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
188 .long 0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
189 .long 0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
190 .long 0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
191 .long 0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
192 .long 0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
193 .long 0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
194 .long 0x3FFF0000,0xB525529D,0x562246BD,0x00000000
195 .long 0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
196 .long 0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
197 .long 0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
198 .long 0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
199 .long 0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
200 .long 0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
201 .long 0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
202 .long 0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
203 .long 0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
204 .long 0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
205 .long 0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
206 .long 0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
207 .long 0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
208 .long 0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
209 .long 0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
210 .long 0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
211 .long 0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
212 .long 0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
213 .long 0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
214 .long 0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
215 .long 0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
216 .long 0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
217
218 .set X,FP_SCR1
219 .set XDCARE,X+2
220 .set XFRAC,X+4
221 .set XFRACLO,X+8
222
223 .set ATANF,FP_SCR2
224 .set ATANFHI,ATANF+4
225 .set ATANFLO,ATANF+8
226
227
228 | xref t_frcinx
229 |xref t_extdnrm
230
231 .global satand
232satand:
233|--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
234
235 bra t_extdnrm
236
237 .global satan
238satan:
239|--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
240
241 fmovex (%a0),%fp0 | ...LOAD INPUT
242
243 movel (%a0),%d0
244 movew 4(%a0),%d0
245 fmovex %fp0,X(%a6)
246 andil #0x7FFFFFFF,%d0
247
248 cmpil #0x3FFB8000,%d0 | ...|X| >= 1/16?
249 bges ATANOK1
250 bra ATANSM
251
252ATANOK1:
253 cmpil #0x4002FFFF,%d0 | ...|X| < 16 ?
254 bles ATANMAIN
255 bra ATANBIG
256
257
258|--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
259|--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
260|--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
261|--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
262|--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
263|--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
264|--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
265|--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
266|--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
267|--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
268|--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
269|--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
270|--WILL INVOLVE A VERY LONG POLYNOMIAL.
271
272|--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
273|--WE CHOSE F TO BE +-2^K * 1.BBBB1
274|--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
275|--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
276|--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
277|-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
278
279ATANMAIN:
280
281 movew #0x0000,XDCARE(%a6) | ...CLEAN UP X JUST IN CASE
282 andil #0xF8000000,XFRAC(%a6) | ...FIRST 5 BITS
283 oril #0x04000000,XFRAC(%a6) | ...SET 6-TH BIT TO 1
284 movel #0x00000000,XFRACLO(%a6) | ...LOCATION OF X IS NOW F
285
286 fmovex %fp0,%fp1 | ...FP1 IS X
287 fmulx X(%a6),%fp1 | ...FP1 IS X*F, NOTE THAT X*F > 0
288 fsubx X(%a6),%fp0 | ...FP0 IS X-F
289 fadds #0x3F800000,%fp1 | ...FP1 IS 1 + X*F
290 fdivx %fp1,%fp0 | ...FP0 IS U = (X-F)/(1+X*F)
291
292|--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
293|--CREATE ATAN(F) AND STORE IT IN ATANF, AND
294|--SAVE REGISTERS FP2.
295
296 movel %d2,-(%a7) | ...SAVE d2 TEMPORARILY
297 movel %d0,%d2 | ...THE EXPO AND 16 BITS OF X
298 andil #0x00007800,%d0 | ...4 VARYING BITS OF F'S FRACTION
299 andil #0x7FFF0000,%d2 | ...EXPONENT OF F
300 subil #0x3FFB0000,%d2 | ...K+4
301 asrl #1,%d2
302 addl %d2,%d0 | ...THE 7 BITS IDENTIFYING F
303 asrl #7,%d0 | ...INDEX INTO TBL OF ATAN(|F|)
304 lea ATANTBL,%a1
305 addal %d0,%a1 | ...ADDRESS OF ATAN(|F|)
306 movel (%a1)+,ATANF(%a6)
307 movel (%a1)+,ATANFHI(%a6)
308 movel (%a1)+,ATANFLO(%a6) | ...ATANF IS NOW ATAN(|F|)
309 movel X(%a6),%d0 | ...LOAD SIGN AND EXPO. AGAIN
310 andil #0x80000000,%d0 | ...SIGN(F)
311 orl %d0,ATANF(%a6) | ...ATANF IS NOW SIGN(F)*ATAN(|F|)
312 movel (%a7)+,%d2 | ...RESTORE d2
313
314|--THAT'S ALL I HAVE TO DO FOR NOW,
315|--BUT ALAS, THE DIVIDE IS STILL CRANKING!
316
317|--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
318|--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
319|--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
320|--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
321|--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3.
322|--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
323|--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
324
325
326 fmovex %fp0,%fp1
327 fmulx %fp1,%fp1
328 fmoved ATANA3,%fp2
329 faddx %fp1,%fp2 | ...A3+V
330 fmulx %fp1,%fp2 | ...V*(A3+V)
331 fmulx %fp0,%fp1 | ...U*V
332 faddd ATANA2,%fp2 | ...A2+V*(A3+V)
333 fmuld ATANA1,%fp1 | ...A1*U*V
334 fmulx %fp2,%fp1 | ...A1*U*V*(A2+V*(A3+V))
335
336 faddx %fp1,%fp0 | ...ATAN(U), FP1 RELEASED
337 fmovel %d1,%FPCR |restore users exceptions
338 faddx ATANF(%a6),%fp0 | ...ATAN(X)
339 bra t_frcinx
340
341ATANBORS:
342|--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
343|--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
344 cmpil #0x3FFF8000,%d0
345 bgt ATANBIG | ...I.E. |X| >= 16
346
347ATANSM:
348|--|X| <= 1/16
349|--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
350|--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
351|--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
352|--WHERE Y = X*X, AND Z = Y*Y.
353
354 cmpil #0x3FD78000,%d0
355 blt ATANTINY
356|--COMPUTE POLYNOMIAL
357 fmulx %fp0,%fp0 | ...FP0 IS Y = X*X
358
359
360 movew #0x0000,XDCARE(%a6)
361
362 fmovex %fp0,%fp1
363 fmulx %fp1,%fp1 | ...FP1 IS Z = Y*Y
364
365 fmoved ATANB6,%fp2
366 fmoved ATANB5,%fp3
367
368 fmulx %fp1,%fp2 | ...Z*B6
369 fmulx %fp1,%fp3 | ...Z*B5
370
371 faddd ATANB4,%fp2 | ...B4+Z*B6
372 faddd ATANB3,%fp3 | ...B3+Z*B5
373
374 fmulx %fp1,%fp2 | ...Z*(B4+Z*B6)
375 fmulx %fp3,%fp1 | ...Z*(B3+Z*B5)
376
377 faddd ATANB2,%fp2 | ...B2+Z*(B4+Z*B6)
378 faddd ATANB1,%fp1 | ...B1+Z*(B3+Z*B5)
379
380 fmulx %fp0,%fp2 | ...Y*(B2+Z*(B4+Z*B6))
381 fmulx X(%a6),%fp0 | ...X*Y
382
383 faddx %fp2,%fp1 | ...[B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
384
385
386 fmulx %fp1,%fp0 | ...X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
387
388 fmovel %d1,%FPCR |restore users exceptions
389 faddx X(%a6),%fp0
390
391 bra t_frcinx
392
393ATANTINY:
394|--|X| < 2^(-40), ATAN(X) = X
395 movew #0x0000,XDCARE(%a6)
396
397 fmovel %d1,%FPCR |restore users exceptions
398 fmovex X(%a6),%fp0 |last inst - possible exception set
399
400 bra t_frcinx
401
402ATANBIG:
403|--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE,
404|--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
405 cmpil #0x40638000,%d0
406 bgt ATANHUGE
407
408|--APPROXIMATE ATAN(-1/X) BY
409|--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
410|--THIS CAN BE RE-WRITTEN AS
411|--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
412
413 fmoves #0xBF800000,%fp1 | ...LOAD -1
414 fdivx %fp0,%fp1 | ...FP1 IS -1/X
415
416
417|--DIVIDE IS STILL CRANKING
418
419 fmovex %fp1,%fp0 | ...FP0 IS X'
420 fmulx %fp0,%fp0 | ...FP0 IS Y = X'*X'
421 fmovex %fp1,X(%a6) | ...X IS REALLY X'
422
423 fmovex %fp0,%fp1
424 fmulx %fp1,%fp1 | ...FP1 IS Z = Y*Y
425
426 fmoved ATANC5,%fp3
427 fmoved ATANC4,%fp2
428
429 fmulx %fp1,%fp3 | ...Z*C5
430 fmulx %fp1,%fp2 | ...Z*B4
431
432 faddd ATANC3,%fp3 | ...C3+Z*C5
433 faddd ATANC2,%fp2 | ...C2+Z*C4
434
435 fmulx %fp3,%fp1 | ...Z*(C3+Z*C5), FP3 RELEASED
436 fmulx %fp0,%fp2 | ...Y*(C2+Z*C4)
437
438 faddd ATANC1,%fp1 | ...C1+Z*(C3+Z*C5)
439 fmulx X(%a6),%fp0 | ...X'*Y
440
441 faddx %fp2,%fp1 | ...[Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
442
443
444 fmulx %fp1,%fp0 | ...X'*Y*([B1+Z*(B3+Z*B5)]
445| ... +[Y*(B2+Z*(B4+Z*B6))])
446 faddx X(%a6),%fp0
447
448 fmovel %d1,%FPCR |restore users exceptions
449
450 btstb #7,(%a0)
451 beqs pos_big
452
453neg_big:
454 faddx NPIBY2,%fp0
455 bra t_frcinx
456
457pos_big:
458 faddx PPIBY2,%fp0
459 bra t_frcinx
460
461ATANHUGE:
462|--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
463 btstb #7,(%a0)
464 beqs pos_huge
465
466neg_huge:
467 fmovex NPIBY2,%fp0
468 fmovel %d1,%fpcr
469 fsubx NTINY,%fp0
470 bra t_frcinx
471
472pos_huge:
473 fmovex PPIBY2,%fp0
474 fmovel %d1,%fpcr
475 fsubx PTINY,%fp0
476 bra t_frcinx
477
478 |end
diff --git a/arch/m68k/fpsp040/satanh.S b/arch/m68k/fpsp040/satanh.S
new file mode 100644
index 000000000000..20f07810bcda
--- /dev/null
+++ b/arch/m68k/fpsp040/satanh.S
@@ -0,0 +1,104 @@
1|
2| satanh.sa 3.3 12/19/90
3|
4| The entry point satanh computes the inverse
5| hyperbolic tangent of
6| an input argument; satanhd does the same except for denormalized
7| input.
8|
9| Input: Double-extended number X in location pointed to
10| by address register a0.
11|
12| Output: The value arctanh(X) returned in floating-point register Fp0.
13|
14| Accuracy and Monotonicity: The returned result is within 3 ulps in
15| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
16| result is subsequently rounded to double precision. The
17| result is provably monotonic in double precision.
18|
19| Speed: The program satanh takes approximately 270 cycles.
20|
21| Algorithm:
22|
23| ATANH
24| 1. If |X| >= 1, go to 3.
25|
26| 2. (|X| < 1) Calculate atanh(X) by
27| sgn := sign(X)
28| y := |X|
29| z := 2y/(1-y)
30| atanh(X) := sgn * (1/2) * logp1(z)
31| Exit.
32|
33| 3. If |X| > 1, go to 5.
34|
35| 4. (|X| = 1) Generate infinity with an appropriate sign and
36| divide-by-zero by
37| sgn := sign(X)
38| atan(X) := sgn / (+0).
39| Exit.
40|
41| 5. (|X| > 1) Generate an invalid operation by 0 * infinity.
42| Exit.
43|
44
45| Copyright (C) Motorola, Inc. 1990
46| All Rights Reserved
47|
48| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
49| The copyright notice above does not evidence any
50| actual or intended publication of such source code.
51
52|satanh idnt 2,1 | Motorola 040 Floating Point Software Package
53
54 |section 8
55
56 |xref t_dz
57 |xref t_operr
58 |xref t_frcinx
59 |xref t_extdnrm
60 |xref slognp1
61
62 .global satanhd
63satanhd:
64|--ATANH(X) = X FOR DENORMALIZED X
65
66 bra t_extdnrm
67
68 .global satanh
69satanh:
70 movel (%a0),%d0
71 movew 4(%a0),%d0
72 andil #0x7FFFFFFF,%d0
73 cmpil #0x3FFF8000,%d0
74 bges ATANHBIG
75
76|--THIS IS THE USUAL CASE, |X| < 1
77|--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z).
78
79 fabsx (%a0),%fp0 | ...Y = |X|
80 fmovex %fp0,%fp1
81 fnegx %fp1 | ...-Y
82 faddx %fp0,%fp0 | ...2Y
83 fadds #0x3F800000,%fp1 | ...1-Y
84 fdivx %fp1,%fp0 | ...2Y/(1-Y)
85 movel (%a0),%d0
86 andil #0x80000000,%d0
87 oril #0x3F000000,%d0 | ...SIGN(X)*HALF
88 movel %d0,-(%sp)
89
90 fmovemx %fp0-%fp0,(%a0) | ...overwrite input
91 movel %d1,-(%sp)
92 clrl %d1
93 bsr slognp1 | ...LOG1P(Z)
94 fmovel (%sp)+,%fpcr
95 fmuls (%sp)+,%fp0
96 bra t_frcinx
97
98ATANHBIG:
99 fabsx (%a0),%fp0 | ...|X|
100 fcmps #0x3F800000,%fp0
101 fbgt t_operr
102 bra t_dz
103
104 |end
diff --git a/arch/m68k/fpsp040/scale.S b/arch/m68k/fpsp040/scale.S
new file mode 100644
index 000000000000..5c9b805265f2
--- /dev/null
+++ b/arch/m68k/fpsp040/scale.S
@@ -0,0 +1,371 @@
1|
2| scale.sa 3.3 7/30/91
3|
4| The entry point sSCALE computes the destination operand
5| scaled by the source operand. If the absolute value of
6| the source operand is (>= 2^14) an overflow or underflow
7| is returned.
8|
9| The entry point sscale is called from do_func to emulate
10| the fscale unimplemented instruction.
11|
12| Input: Double-extended destination operand in FPTEMP,
13| double-extended source operand in ETEMP.
14|
15| Output: The function returns scale(X,Y) to fp0.
16|
17| Modifies: fp0.
18|
19| Algorithm:
20|
21| Copyright (C) Motorola, Inc. 1990
22| All Rights Reserved
23|
24| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
25| The copyright notice above does not evidence any
26| actual or intended publication of such source code.
27
28|SCALE idnt 2,1 | Motorola 040 Floating Point Software Package
29
30 |section 8
31
32#include "fpsp.h"
33
34 |xref t_ovfl2
35 |xref t_unfl
36 |xref round
37 |xref t_resdnrm
38
39SRC_BNDS: .short 0x3fff,0x400c
40
41|
42| This entry point is used by the unimplemented instruction exception
43| handler.
44|
45|
46|
47| FSCALE
48|
49 .global sscale
50sscale:
51 fmovel #0,%fpcr |clr user enabled exc
52 clrl %d1
53 movew FPTEMP(%a6),%d1 |get dest exponent
54 smi L_SCR1(%a6) |use L_SCR1 to hold sign
55 andil #0x7fff,%d1 |strip sign
56 movew ETEMP(%a6),%d0 |check src bounds
57 andiw #0x7fff,%d0 |clr sign bit
58 cmp2w SRC_BNDS,%d0
59 bccs src_in
60 cmpiw #0x400c,%d0 |test for too large
61 bge src_out
62|
63| The source input is below 1, so we check for denormalized numbers
64| and set unfl.
65|
66src_small:
67 moveb DTAG(%a6),%d0
68 andib #0xe0,%d0
69 tstb %d0
70 beqs no_denorm
71 st STORE_FLG(%a6) |dest already contains result
72 orl #unfl_mask,USER_FPSR(%a6) |set UNFL
73den_done:
74 leal FPTEMP(%a6),%a0
75 bra t_resdnrm
76no_denorm:
77 fmovel USER_FPCR(%a6),%FPCR
78 fmovex FPTEMP(%a6),%fp0 |simply return dest
79 rts
80
81
82|
83| Source is within 2^14 range. To perform the int operation,
84| move it to d0.
85|
86src_in:
87 fmovex ETEMP(%a6),%fp0 |move in src for int
88 fmovel #rz_mode,%fpcr |force rz for src conversion
89 fmovel %fp0,%d0 |int src to d0
90 fmovel #0,%FPSR |clr status from above
91 tstw ETEMP(%a6) |check src sign
92 blt src_neg
93|
94| Source is positive. Add the src to the dest exponent.
95| The result can be denormalized, if src = 0, or overflow,
96| if the result of the add sets a bit in the upper word.
97|
98src_pos:
99 tstw %d1 |check for denorm
100 beq dst_dnrm
101 addl %d0,%d1 |add src to dest exp
102 beqs denorm |if zero, result is denorm
103 cmpil #0x7fff,%d1 |test for overflow
104 bges ovfl
105 tstb L_SCR1(%a6)
106 beqs spos_pos
107 orw #0x8000,%d1
108spos_pos:
109 movew %d1,FPTEMP(%a6) |result in FPTEMP
110 fmovel USER_FPCR(%a6),%FPCR
111 fmovex FPTEMP(%a6),%fp0 |write result to fp0
112 rts
113ovfl:
114 tstb L_SCR1(%a6)
115 beqs sovl_pos
116 orw #0x8000,%d1
117sovl_pos:
118 movew FPTEMP(%a6),ETEMP(%a6) |result in ETEMP
119 movel FPTEMP_HI(%a6),ETEMP_HI(%a6)
120 movel FPTEMP_LO(%a6),ETEMP_LO(%a6)
121 bra t_ovfl2
122
123denorm:
124 tstb L_SCR1(%a6)
125 beqs den_pos
126 orw #0x8000,%d1
127den_pos:
128 tstl FPTEMP_HI(%a6) |check j bit
129 blts nden_exit |if set, not denorm
130 movew %d1,ETEMP(%a6) |input expected in ETEMP
131 movel FPTEMP_HI(%a6),ETEMP_HI(%a6)
132 movel FPTEMP_LO(%a6),ETEMP_LO(%a6)
133 orl #unfl_bit,USER_FPSR(%a6) |set unfl
134 leal ETEMP(%a6),%a0
135 bra t_resdnrm
136nden_exit:
137 movew %d1,FPTEMP(%a6) |result in FPTEMP
138 fmovel USER_FPCR(%a6),%FPCR
139 fmovex FPTEMP(%a6),%fp0 |write result to fp0
140 rts
141
142|
143| Source is negative. Add the src to the dest exponent.
144| (The result exponent will be reduced). The result can be
145| denormalized.
146|
147src_neg:
148 addl %d0,%d1 |add src to dest
149 beqs denorm |if zero, result is denorm
150 blts fix_dnrm |if negative, result is
151| ;needing denormalization
152 tstb L_SCR1(%a6)
153 beqs sneg_pos
154 orw #0x8000,%d1
155sneg_pos:
156 movew %d1,FPTEMP(%a6) |result in FPTEMP
157 fmovel USER_FPCR(%a6),%FPCR
158 fmovex FPTEMP(%a6),%fp0 |write result to fp0
159 rts
160
161
162|
163| The result exponent is below denorm value. Test for catastrophic
164| underflow and force zero if true. If not, try to shift the
165| mantissa right until a zero exponent exists.
166|
167fix_dnrm:
168 cmpiw #0xffc0,%d1 |lower bound for normalization
169 blt fix_unfl |if lower, catastrophic unfl
170 movew %d1,%d0 |use d0 for exp
171 movel %d2,-(%a7) |free d2 for norm
172 movel FPTEMP_HI(%a6),%d1
173 movel FPTEMP_LO(%a6),%d2
174 clrl L_SCR2(%a6)
175fix_loop:
176 addw #1,%d0 |drive d0 to 0
177 lsrl #1,%d1 |while shifting the
178 roxrl #1,%d2 |mantissa to the right
179 bccs no_carry
180 st L_SCR2(%a6) |use L_SCR2 to capture inex
181no_carry:
182 tstw %d0 |it is finished when
183 blts fix_loop |d0 is zero or the mantissa
184 tstb L_SCR2(%a6)
185 beqs tst_zero
186 orl #unfl_inx_mask,USER_FPSR(%a6)
187| ;set unfl, aunfl, ainex
188|
189| Test for zero. If zero, simply use fmove to return +/- zero
190| to the fpu.
191|
192tst_zero:
193 clrw FPTEMP_EX(%a6)
194 tstb L_SCR1(%a6) |test for sign
195 beqs tst_con
196 orw #0x8000,FPTEMP_EX(%a6) |set sign bit
197tst_con:
198 movel %d1,FPTEMP_HI(%a6)
199 movel %d2,FPTEMP_LO(%a6)
200 movel (%a7)+,%d2
201 tstl %d1
202 bnes not_zero
203 tstl FPTEMP_LO(%a6)
204 bnes not_zero
205|
206| Result is zero. Check for rounding mode to set lsb. If the
207| mode is rp, and the zero is positive, return smallest denorm.
208| If the mode is rm, and the zero is negative, return smallest
209| negative denorm.
210|
211 btstb #5,FPCR_MODE(%a6) |test if rm or rp
212 beqs no_dir
213 btstb #4,FPCR_MODE(%a6) |check which one
214 beqs zer_rm
215zer_rp:
216 tstb L_SCR1(%a6) |check sign
217 bnes no_dir |if set, neg op, no inc
218 movel #1,FPTEMP_LO(%a6) |set lsb
219 bras sm_dnrm
220zer_rm:
221 tstb L_SCR1(%a6) |check sign
222 beqs no_dir |if clr, neg op, no inc
223 movel #1,FPTEMP_LO(%a6) |set lsb
224 orl #neg_mask,USER_FPSR(%a6) |set N
225 bras sm_dnrm
226no_dir:
227 fmovel USER_FPCR(%a6),%FPCR
228 fmovex FPTEMP(%a6),%fp0 |use fmove to set cc's
229 rts
230
231|
232| The rounding mode changed the zero to a smallest denorm. Call
233| t_resdnrm with exceptional operand in ETEMP.
234|
235sm_dnrm:
236 movel FPTEMP_EX(%a6),ETEMP_EX(%a6)
237 movel FPTEMP_HI(%a6),ETEMP_HI(%a6)
238 movel FPTEMP_LO(%a6),ETEMP_LO(%a6)
239 leal ETEMP(%a6),%a0
240 bra t_resdnrm
241
242|
243| Result is still denormalized.
244|
245not_zero:
246 orl #unfl_mask,USER_FPSR(%a6) |set unfl
247 tstb L_SCR1(%a6) |check for sign
248 beqs fix_exit
249 orl #neg_mask,USER_FPSR(%a6) |set N
250fix_exit:
251 bras sm_dnrm
252
253
254|
255| The result has underflowed to zero. Return zero and set
256| unfl, aunfl, and ainex.
257|
258fix_unfl:
259 orl #unfl_inx_mask,USER_FPSR(%a6)
260 btstb #5,FPCR_MODE(%a6) |test if rm or rp
261 beqs no_dir2
262 btstb #4,FPCR_MODE(%a6) |check which one
263 beqs zer_rm2
264zer_rp2:
265 tstb L_SCR1(%a6) |check sign
266 bnes no_dir2 |if set, neg op, no inc
267 clrl FPTEMP_EX(%a6)
268 clrl FPTEMP_HI(%a6)
269 movel #1,FPTEMP_LO(%a6) |set lsb
270 bras sm_dnrm |return smallest denorm
271zer_rm2:
272 tstb L_SCR1(%a6) |check sign
273 beqs no_dir2 |if clr, neg op, no inc
274 movew #0x8000,FPTEMP_EX(%a6)
275 clrl FPTEMP_HI(%a6)
276 movel #1,FPTEMP_LO(%a6) |set lsb
277 orl #neg_mask,USER_FPSR(%a6) |set N
278 bra sm_dnrm |return smallest denorm
279
280no_dir2:
281 tstb L_SCR1(%a6)
282 bges pos_zero
283neg_zero:
284 clrl FP_SCR1(%a6) |clear the exceptional operand
285 clrl FP_SCR1+4(%a6) |for gen_except.
286 clrl FP_SCR1+8(%a6)
287 fmoves #0x80000000,%fp0
288 rts
289pos_zero:
290 clrl FP_SCR1(%a6) |clear the exceptional operand
291 clrl FP_SCR1+4(%a6) |for gen_except.
292 clrl FP_SCR1+8(%a6)
293 fmoves #0x00000000,%fp0
294 rts
295
296|
297| The destination is a denormalized number. It must be handled
298| by first shifting the bits in the mantissa until it is normalized,
299| then adding the remainder of the source to the exponent.
300|
301dst_dnrm:
302 moveml %d2/%d3,-(%a7)
303 movew FPTEMP_EX(%a6),%d1
304 movel FPTEMP_HI(%a6),%d2
305 movel FPTEMP_LO(%a6),%d3
306dst_loop:
307 tstl %d2 |test for normalized result
308 blts dst_norm |exit loop if so
309 tstl %d0 |otherwise, test shift count
310 beqs dst_fin |if zero, shifting is done
311 subil #1,%d0 |dec src
312 lsll #1,%d3
313 roxll #1,%d2
314 bras dst_loop
315|
316| Destination became normalized. Simply add the remaining
317| portion of the src to the exponent.
318|
319dst_norm:
320 addw %d0,%d1 |dst is normalized; add src
321 tstb L_SCR1(%a6)
322 beqs dnrm_pos
323 orl #0x8000,%d1
324dnrm_pos:
325 movemw %d1,FPTEMP_EX(%a6)
326 moveml %d2,FPTEMP_HI(%a6)
327 moveml %d3,FPTEMP_LO(%a6)
328 fmovel USER_FPCR(%a6),%FPCR
329 fmovex FPTEMP(%a6),%fp0
330 moveml (%a7)+,%d2/%d3
331 rts
332
333|
334| Destination remained denormalized. Call t_excdnrm with
335| exceptional operand in ETEMP.
336|
337dst_fin:
338 tstb L_SCR1(%a6) |check for sign
339 beqs dst_exit
340 orl #neg_mask,USER_FPSR(%a6) |set N
341 orl #0x8000,%d1
342dst_exit:
343 movemw %d1,ETEMP_EX(%a6)
344 moveml %d2,ETEMP_HI(%a6)
345 moveml %d3,ETEMP_LO(%a6)
346 orl #unfl_mask,USER_FPSR(%a6) |set unfl
347 moveml (%a7)+,%d2/%d3
348 leal ETEMP(%a6),%a0
349 bra t_resdnrm
350
351|
352| Source is outside of 2^14 range. Test the sign and branch
353| to the appropriate exception handler.
354|
355src_out:
356 tstb L_SCR1(%a6)
357 beqs scro_pos
358 orl #0x8000,%d1
359scro_pos:
360 movel FPTEMP_HI(%a6),ETEMP_HI(%a6)
361 movel FPTEMP_LO(%a6),ETEMP_LO(%a6)
362 tstw ETEMP(%a6)
363 blts res_neg
364res_pos:
365 movew %d1,ETEMP(%a6) |result in ETEMP
366 bra t_ovfl2
367res_neg:
368 movew %d1,ETEMP(%a6) |result in ETEMP
369 leal ETEMP(%a6),%a0
370 bra t_unfl
371 |end
diff --git a/arch/m68k/fpsp040/scosh.S b/arch/m68k/fpsp040/scosh.S
new file mode 100644
index 000000000000..e81edbb87642
--- /dev/null
+++ b/arch/m68k/fpsp040/scosh.S
@@ -0,0 +1,132 @@
1|
2| scosh.sa 3.1 12/10/90
3|
4| The entry point sCosh computes the hyperbolic cosine of
5| an input argument; sCoshd does the same except for denormalized
6| input.
7|
8| Input: Double-extended number X in location pointed to
9| by address register a0.
10|
11| Output: The value cosh(X) returned in floating-point register Fp0.
12|
13| Accuracy and Monotonicity: The returned result is within 3 ulps in
14| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
15| result is subsequently rounded to double precision. The
16| result is provably monotonic in double precision.
17|
18| Speed: The program sCOSH takes approximately 250 cycles.
19|
20| Algorithm:
21|
22| COSH
23| 1. If |X| > 16380 log2, go to 3.
24|
25| 2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae
26| y = |X|, z = exp(Y), and
27| cosh(X) = (1/2)*( z + 1/z ).
28| Exit.
29|
30| 3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5.
31|
32| 4. (16380 log2 < |X| <= 16480 log2)
33| cosh(X) = sign(X) * exp(|X|)/2.
34| However, invoking exp(|X|) may cause premature overflow.
35| Thus, we calculate sinh(X) as follows:
36| Y := |X|
37| Fact := 2**(16380)
38| Y' := Y - 16381 log2
39| cosh(X) := Fact * exp(Y').
40| Exit.
41|
42| 5. (|X| > 16480 log2) sinh(X) must overflow. Return
43| Huge*Huge to generate overflow and an infinity with
44| the appropriate sign. Huge is the largest finite number in
45| extended format. Exit.
46|
47|
48
49| Copyright (C) Motorola, Inc. 1990
50| All Rights Reserved
51|
52| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
53| The copyright notice above does not evidence any
54| actual or intended publication of such source code.
55
56|SCOSH idnt 2,1 | Motorola 040 Floating Point Software Package
57
58 |section 8
59
60 |xref t_ovfl
61 |xref t_frcinx
62 |xref setox
63
64T1: .long 0x40C62D38,0xD3D64634 | ... 16381 LOG2 LEAD
65T2: .long 0x3D6F90AE,0xB1E75CC7 | ... 16381 LOG2 TRAIL
66
67TWO16380: .long 0x7FFB0000,0x80000000,0x00000000,0x00000000
68
69 .global scoshd
70scoshd:
71|--COSH(X) = 1 FOR DENORMALIZED X
72
73 fmoves #0x3F800000,%fp0
74
75 fmovel %d1,%FPCR
76 fadds #0x00800000,%fp0
77 bra t_frcinx
78
79 .global scosh
80scosh:
81 fmovex (%a0),%fp0 | ...LOAD INPUT
82
83 movel (%a0),%d0
84 movew 4(%a0),%d0
85 andil #0x7FFFFFFF,%d0
86 cmpil #0x400CB167,%d0
87 bgts COSHBIG
88
89|--THIS IS THE USUAL CASE, |X| < 16380 LOG2
90|--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) )
91
92 fabsx %fp0 | ...|X|
93
94 movel %d1,-(%sp)
95 clrl %d1
96 fmovemx %fp0-%fp0,(%a0) |pass parameter to setox
97 bsr setox | ...FP0 IS EXP(|X|)
98 fmuls #0x3F000000,%fp0 | ...(1/2)EXP(|X|)
99 movel (%sp)+,%d1
100
101 fmoves #0x3E800000,%fp1 | ...(1/4)
102 fdivx %fp0,%fp1 | ...1/(2 EXP(|X|))
103
104 fmovel %d1,%FPCR
105 faddx %fp1,%fp0
106
107 bra t_frcinx
108
109COSHBIG:
110 cmpil #0x400CB2B3,%d0
111 bgts COSHHUGE
112
113 fabsx %fp0
114 fsubd T1(%pc),%fp0 | ...(|X|-16381LOG2_LEAD)
115 fsubd T2(%pc),%fp0 | ...|X| - 16381 LOG2, ACCURATE
116
117 movel %d1,-(%sp)
118 clrl %d1
119 fmovemx %fp0-%fp0,(%a0)
120 bsr setox
121 fmovel (%sp)+,%fpcr
122
123 fmulx TWO16380(%pc),%fp0
124 bra t_frcinx
125
126COSHHUGE:
127 fmovel #0,%fpsr |clr N bit if set by source
128 bclrb #7,(%a0) |always return positive value
129 fmovemx (%a0),%fp0-%fp0
130 bra t_ovfl
131
132 |end
diff --git a/arch/m68k/fpsp040/setox.S b/arch/m68k/fpsp040/setox.S
new file mode 100644
index 000000000000..0aa75f9bf7d1
--- /dev/null
+++ b/arch/m68k/fpsp040/setox.S
@@ -0,0 +1,865 @@
1|
2| setox.sa 3.1 12/10/90
3|
4| The entry point setox computes the exponential of a value.
5| setoxd does the same except the input value is a denormalized
6| number. setoxm1 computes exp(X)-1, and setoxm1d computes
7| exp(X)-1 for denormalized X.
8|
9| INPUT
10| -----
11| Double-extended value in memory location pointed to by address
12| register a0.
13|
14| OUTPUT
15| ------
16| exp(X) or exp(X)-1 returned in floating-point register fp0.
17|
18| ACCURACY and MONOTONICITY
19| -------------------------
20| The returned result is within 0.85 ulps in 64 significant bit, i.e.
21| within 0.5001 ulp to 53 bits if the result is subsequently rounded
22| to double precision. The result is provably monotonic in double
23| precision.
24|
25| SPEED
26| -----
27| Two timings are measured, both in the copy-back mode. The
28| first one is measured when the function is invoked the first time
29| (so the instructions and data are not in cache), and the
30| second one is measured when the function is reinvoked at the same
31| input argument.
32|
33| The program setox takes approximately 210/190 cycles for input
34| argument X whose magnitude is less than 16380 log2, which
35| is the usual situation. For the less common arguments,
36| depending on their values, the program may run faster or slower --
37| but no worse than 10% slower even in the extreme cases.
38|
39| The program setoxm1 takes approximately ???/??? cycles for input
40| argument X, 0.25 <= |X| < 70log2. For |X| < 0.25, it takes
41| approximately ???/??? cycles. For the less common arguments,
42| depending on their values, the program may run faster or slower --
43| but no worse than 10% slower even in the extreme cases.
44|
45| ALGORITHM and IMPLEMENTATION NOTES
46| ----------------------------------
47|
48| setoxd
49| ------
50| Step 1. Set ans := 1.0
51|
52| Step 2. Return ans := ans + sign(X)*2^(-126). Exit.
53| Notes: This will always generate one exception -- inexact.
54|
55|
56| setox
57| -----
58|
59| Step 1. Filter out extreme cases of input argument.
60| 1.1 If |X| >= 2^(-65), go to Step 1.3.
61| 1.2 Go to Step 7.
62| 1.3 If |X| < 16380 log(2), go to Step 2.
63| 1.4 Go to Step 8.
64| Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.
65| To avoid the use of floating-point comparisons, a
66| compact representation of |X| is used. This format is a
67| 32-bit integer, the upper (more significant) 16 bits are
68| the sign and biased exponent field of |X|; the lower 16
69| bits are the 16 most significant fraction (including the
70| explicit bit) bits of |X|. Consequently, the comparisons
71| in Steps 1.1 and 1.3 can be performed by integer comparison.
72| Note also that the constant 16380 log(2) used in Step 1.3
73| is also in the compact form. Thus taking the branch
74| to Step 2 guarantees |X| < 16380 log(2). There is no harm
75| to have a small number of cases where |X| is less than,
76| but close to, 16380 log(2) and the branch to Step 9 is
77| taken.
78|
79| Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ).
80| 2.1 Set AdjFlag := 0 (indicates the branch 1.3 -> 2 was taken)
81| 2.2 N := round-to-nearest-integer( X * 64/log2 ).
82| 2.3 Calculate J = N mod 64; so J = 0,1,2,..., or 63.
83| 2.4 Calculate M = (N - J)/64; so N = 64M + J.
84| 2.5 Calculate the address of the stored value of 2^(J/64).
85| 2.6 Create the value Scale = 2^M.
86| Notes: The calculation in 2.2 is really performed by
87|
88| Z := X * constant
89| N := round-to-nearest-integer(Z)
90|
91| where
92|
93| constant := single-precision( 64/log 2 ).
94|
95| Using a single-precision constant avoids memory access.
96| Another effect of using a single-precision "constant" is
97| that the calculated value Z is
98|
99| Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24).
100|
101| This error has to be considered later in Steps 3 and 4.
102|
103| Step 3. Calculate X - N*log2/64.
104| 3.1 R := X + N*L1, where L1 := single-precision(-log2/64).
105| 3.2 R := R + N*L2, L2 := extended-precision(-log2/64 - L1).
106| Notes: a) The way L1 and L2 are chosen ensures L1+L2 approximate
107| the value -log2/64 to 88 bits of accuracy.
108| b) N*L1 is exact because N is no longer than 22 bits and
109| L1 is no longer than 24 bits.
110| c) The calculation X+N*L1 is also exact due to cancellation.
111| Thus, R is practically X+N(L1+L2) to full 64 bits.
112| d) It is important to estimate how large can |R| be after
113| Step 3.2.
114|
115| N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24)
116| X*64/log2 (1+eps) = N + f, |f| <= 0.5
117| X*64/log2 - N = f - eps*X 64/log2
118| X - N*log2/64 = f*log2/64 - eps*X
119|
120|
121| Now |X| <= 16446 log2, thus
122|
123| |X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64
124| <= 0.57 log2/64.
125| This bound will be used in Step 4.
126|
127| Step 4. Approximate exp(R)-1 by a polynomial
128| p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
129| Notes: a) In order to reduce memory access, the coefficients are
130| made as "short" as possible: A1 (which is 1/2), A4 and A5
131| are single precision; A2 and A3 are double precision.
132| b) Even with the restrictions above,
133| |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062.
134| Note that 0.0062 is slightly bigger than 0.57 log2/64.
135| c) To fully utilize the pipeline, p is separated into
136| two independent pieces of roughly equal complexities
137| p = [ R + R*S*(A2 + S*A4) ] +
138| [ S*(A1 + S*(A3 + S*A5)) ]
139| where S = R*R.
140|
141| Step 5. Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by
142| ans := T + ( T*p + t)
143| where T and t are the stored values for 2^(J/64).
144| Notes: 2^(J/64) is stored as T and t where T+t approximates
145| 2^(J/64) to roughly 85 bits; T is in extended precision
146| and t is in single precision. Note also that T is rounded
147| to 62 bits so that the last two bits of T are zero. The
148| reason for such a special form is that T-1, T-2, and T-8
149| will all be exact --- a property that will give much
150| more accurate computation of the function EXPM1.
151|
152| Step 6. Reconstruction of exp(X)
153| exp(X) = 2^M * 2^(J/64) * exp(R).
154| 6.1 If AdjFlag = 0, go to 6.3
155| 6.2 ans := ans * AdjScale
156| 6.3 Restore the user FPCR
157| 6.4 Return ans := ans * Scale. Exit.
158| Notes: If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R,
159| |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will
160| neither overflow nor underflow. If AdjFlag = 1, that
161| means that
162| X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380.
163| Hence, exp(X) may overflow or underflow or neither.
164| When that is the case, AdjScale = 2^(M1) where M1 is
165| approximately M. Thus 6.2 will never cause over/underflow.
166| Possible exception in 6.4 is overflow or underflow.
167| The inexact exception is not generated in 6.4. Although
168| one can argue that the inexact flag should always be
169| raised, to simulate that exception cost to much than the
170| flag is worth in practical uses.
171|
172| Step 7. Return 1 + X.
173| 7.1 ans := X
174| 7.2 Restore user FPCR.
175| 7.3 Return ans := 1 + ans. Exit
176| Notes: For non-zero X, the inexact exception will always be
177| raised by 7.3. That is the only exception raised by 7.3.
178| Note also that we use the FMOVEM instruction to move X
179| in Step 7.1 to avoid unnecessary trapping. (Although
180| the FMOVEM may not seem relevant since X is normalized,
181| the precaution will be useful in the library version of
182| this code where the separate entry for denormalized inputs
183| will be done away with.)
184|
185| Step 8. Handle exp(X) where |X| >= 16380log2.
186| 8.1 If |X| > 16480 log2, go to Step 9.
187| (mimic 2.2 - 2.6)
188| 8.2 N := round-to-integer( X * 64/log2 )
189| 8.3 Calculate J = N mod 64, J = 0,1,...,63
190| 8.4 K := (N-J)/64, M1 := truncate(K/2), M = K-M1, AdjFlag := 1.
191| 8.5 Calculate the address of the stored value 2^(J/64).
192| 8.6 Create the values Scale = 2^M, AdjScale = 2^M1.
193| 8.7 Go to Step 3.
194| Notes: Refer to notes for 2.2 - 2.6.
195|
196| Step 9. Handle exp(X), |X| > 16480 log2.
197| 9.1 If X < 0, go to 9.3
198| 9.2 ans := Huge, go to 9.4
199| 9.3 ans := Tiny.
200| 9.4 Restore user FPCR.
201| 9.5 Return ans := ans * ans. Exit.
202| Notes: Exp(X) will surely overflow or underflow, depending on
203| X's sign. "Huge" and "Tiny" are respectively large/tiny
204| extended-precision numbers whose square over/underflow
205| with an inexact result. Thus, 9.5 always raises the
206| inexact together with either overflow or underflow.
207|
208|
209| setoxm1d
210| --------
211|
212| Step 1. Set ans := 0
213|
214| Step 2. Return ans := X + ans. Exit.
215| Notes: This will return X with the appropriate rounding
216| precision prescribed by the user FPCR.
217|
218| setoxm1
219| -------
220|
221| Step 1. Check |X|
222| 1.1 If |X| >= 1/4, go to Step 1.3.
223| 1.2 Go to Step 7.
224| 1.3 If |X| < 70 log(2), go to Step 2.
225| 1.4 Go to Step 10.
226| Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.
227| However, it is conceivable |X| can be small very often
228| because EXPM1 is intended to evaluate exp(X)-1 accurately
229| when |X| is small. For further details on the comparisons,
230| see the notes on Step 1 of setox.
231|
232| Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ).
233| 2.1 N := round-to-nearest-integer( X * 64/log2 ).
234| 2.2 Calculate J = N mod 64; so J = 0,1,2,..., or 63.
235| 2.3 Calculate M = (N - J)/64; so N = 64M + J.
236| 2.4 Calculate the address of the stored value of 2^(J/64).
237| 2.5 Create the values Sc = 2^M and OnebySc := -2^(-M).
238| Notes: See the notes on Step 2 of setox.
239|
240| Step 3. Calculate X - N*log2/64.
241| 3.1 R := X + N*L1, where L1 := single-precision(-log2/64).
242| 3.2 R := R + N*L2, L2 := extended-precision(-log2/64 - L1).
243| Notes: Applying the analysis of Step 3 of setox in this case
244| shows that |R| <= 0.0055 (note that |X| <= 70 log2 in
245| this case).
246|
247| Step 4. Approximate exp(R)-1 by a polynomial
248| p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6)))))
249| Notes: a) In order to reduce memory access, the coefficients are
250| made as "short" as possible: A1 (which is 1/2), A5 and A6
251| are single precision; A2, A3 and A4 are double precision.
252| b) Even with the restriction above,
253| |p - (exp(R)-1)| < |R| * 2^(-72.7)
254| for all |R| <= 0.0055.
255| c) To fully utilize the pipeline, p is separated into
256| two independent pieces of roughly equal complexity
257| p = [ R*S*(A2 + S*(A4 + S*A6)) ] +
258| [ R + S*(A1 + S*(A3 + S*A5)) ]
259| where S = R*R.
260|
261| Step 5. Compute 2^(J/64)*p by
262| p := T*p
263| where T and t are the stored values for 2^(J/64).
264| Notes: 2^(J/64) is stored as T and t where T+t approximates
265| 2^(J/64) to roughly 85 bits; T is in extended precision
266| and t is in single precision. Note also that T is rounded
267| to 62 bits so that the last two bits of T are zero. The
268| reason for such a special form is that T-1, T-2, and T-8
269| will all be exact --- a property that will be exploited
270| in Step 6 below. The total relative error in p is no
271| bigger than 2^(-67.7) compared to the final result.
272|
273| Step 6. Reconstruction of exp(X)-1
274| exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ).
275| 6.1 If M <= 63, go to Step 6.3.
276| 6.2 ans := T + (p + (t + OnebySc)). Go to 6.6
277| 6.3 If M >= -3, go to 6.5.
278| 6.4 ans := (T + (p + t)) + OnebySc. Go to 6.6
279| 6.5 ans := (T + OnebySc) + (p + t).
280| 6.6 Restore user FPCR.
281| 6.7 Return ans := Sc * ans. Exit.
282| Notes: The various arrangements of the expressions give accurate
283| evaluations.
284|
285| Step 7. exp(X)-1 for |X| < 1/4.
286| 7.1 If |X| >= 2^(-65), go to Step 9.
287| 7.2 Go to Step 8.
288|
289| Step 8. Calculate exp(X)-1, |X| < 2^(-65).
290| 8.1 If |X| < 2^(-16312), goto 8.3
291| 8.2 Restore FPCR; return ans := X - 2^(-16382). Exit.
292| 8.3 X := X * 2^(140).
293| 8.4 Restore FPCR; ans := ans - 2^(-16382).
294| Return ans := ans*2^(140). Exit
295| Notes: The idea is to return "X - tiny" under the user
296| precision and rounding modes. To avoid unnecessary
297| inefficiency, we stay away from denormalized numbers the
298| best we can. For |X| >= 2^(-16312), the straightforward
299| 8.2 generates the inexact exception as the case warrants.
300|
301| Step 9. Calculate exp(X)-1, |X| < 1/4, by a polynomial
302| p = X + X*X*(B1 + X*(B2 + ... + X*B12))
303| Notes: a) In order to reduce memory access, the coefficients are
304| made as "short" as possible: B1 (which is 1/2), B9 to B12
305| are single precision; B3 to B8 are double precision; and
306| B2 is double extended.
307| b) Even with the restriction above,
308| |p - (exp(X)-1)| < |X| 2^(-70.6)
309| for all |X| <= 0.251.
310| Note that 0.251 is slightly bigger than 1/4.
311| c) To fully preserve accuracy, the polynomial is computed
312| as X + ( S*B1 + Q ) where S = X*X and
313| Q = X*S*(B2 + X*(B3 + ... + X*B12))
314| d) To fully utilize the pipeline, Q is separated into
315| two independent pieces of roughly equal complexity
316| Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] +
317| [ S*S*(B3 + S*(B5 + ... + S*B11)) ]
318|
319| Step 10. Calculate exp(X)-1 for |X| >= 70 log 2.
320| 10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all practical
321| purposes. Therefore, go to Step 1 of setox.
322| 10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical purposes.
323| ans := -1
324| Restore user FPCR
325| Return ans := ans + 2^(-126). Exit.
326| Notes: 10.2 will always create an inexact and return -1 + tiny
327| in the user rounding precision and mode.
328|
329|
330
331| Copyright (C) Motorola, Inc. 1990
332| All Rights Reserved
333|
334| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
335| The copyright notice above does not evidence any
336| actual or intended publication of such source code.
337
338|setox idnt 2,1 | Motorola 040 Floating Point Software Package
339
340 |section 8
341
342#include "fpsp.h"
343
344L2: .long 0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000
345
346EXPA3: .long 0x3FA55555,0x55554431
347EXPA2: .long 0x3FC55555,0x55554018
348
349HUGE: .long 0x7FFE0000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
350TINY: .long 0x00010000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
351
352EM1A4: .long 0x3F811111,0x11174385
353EM1A3: .long 0x3FA55555,0x55554F5A
354
355EM1A2: .long 0x3FC55555,0x55555555,0x00000000,0x00000000
356
357EM1B8: .long 0x3EC71DE3,0xA5774682
358EM1B7: .long 0x3EFA01A0,0x19D7CB68
359
360EM1B6: .long 0x3F2A01A0,0x1A019DF3
361EM1B5: .long 0x3F56C16C,0x16C170E2
362
363EM1B4: .long 0x3F811111,0x11111111
364EM1B3: .long 0x3FA55555,0x55555555
365
366EM1B2: .long 0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB
367 .long 0x00000000
368
369TWO140: .long 0x48B00000,0x00000000
370TWON140: .long 0x37300000,0x00000000
371
372EXPTBL:
373 .long 0x3FFF0000,0x80000000,0x00000000,0x00000000
374 .long 0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B
375 .long 0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9
376 .long 0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369
377 .long 0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C
378 .long 0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F
379 .long 0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729
380 .long 0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF
381 .long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF
382 .long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA
383 .long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051
384 .long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029
385 .long 0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494
386 .long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0
387 .long 0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D
388 .long 0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537
389 .long 0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD
390 .long 0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087
391 .long 0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818
392 .long 0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D
393 .long 0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890
394 .long 0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C
395 .long 0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05
396 .long 0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126
397 .long 0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140
398 .long 0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA
399 .long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A
400 .long 0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC
401 .long 0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC
402 .long 0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610
403 .long 0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90
404 .long 0x3FFF0000,0xB311C412,0xA9112488,0x201F678A
405 .long 0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13
406 .long 0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30
407 .long 0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC
408 .long 0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6
409 .long 0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70
410 .long 0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518
411 .long 0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41
412 .long 0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B
413 .long 0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568
414 .long 0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E
415 .long 0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03
416 .long 0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D
417 .long 0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4
418 .long 0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C
419 .long 0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9
420 .long 0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21
421 .long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F
422 .long 0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F
423 .long 0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207
424 .long 0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175
425 .long 0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B
426 .long 0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5
427 .long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A
428 .long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22
429 .long 0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945
430 .long 0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B
431 .long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3
432 .long 0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05
433 .long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19
434 .long 0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5
435 .long 0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22
436 .long 0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A
437
438 .set ADJFLAG,L_SCR2
439 .set SCALE,FP_SCR1
440 .set ADJSCALE,FP_SCR2
441 .set SC,FP_SCR3
442 .set ONEBYSC,FP_SCR4
443
444 | xref t_frcinx
445 |xref t_extdnrm
446 |xref t_unfl
447 |xref t_ovfl
448
449 .global setoxd
450setoxd:
451|--entry point for EXP(X), X is denormalized
452 movel (%a0),%d0
453 andil #0x80000000,%d0
454 oril #0x00800000,%d0 | ...sign(X)*2^(-126)
455 movel %d0,-(%sp)
456 fmoves #0x3F800000,%fp0
457 fmovel %d1,%fpcr
458 fadds (%sp)+,%fp0
459 bra t_frcinx
460
461 .global setox
462setox:
463|--entry point for EXP(X), here X is finite, non-zero, and not NaN's
464
465|--Step 1.
466 movel (%a0),%d0 | ...load part of input X
467 andil #0x7FFF0000,%d0 | ...biased expo. of X
468 cmpil #0x3FBE0000,%d0 | ...2^(-65)
469 bges EXPC1 | ...normal case
470 bra EXPSM
471
472EXPC1:
473|--The case |X| >= 2^(-65)
474 movew 4(%a0),%d0 | ...expo. and partial sig. of |X|
475 cmpil #0x400CB167,%d0 | ...16380 log2 trunc. 16 bits
476 blts EXPMAIN | ...normal case
477 bra EXPBIG
478
479EXPMAIN:
480|--Step 2.
481|--This is the normal branch: 2^(-65) <= |X| < 16380 log2.
482 fmovex (%a0),%fp0 | ...load input from (a0)
483
484 fmovex %fp0,%fp1
485 fmuls #0x42B8AA3B,%fp0 | ...64/log2 * X
486 fmovemx %fp2-%fp2/%fp3,-(%a7) | ...save fp2
487 movel #0,ADJFLAG(%a6)
488 fmovel %fp0,%d0 | ...N = int( X * 64/log2 )
489 lea EXPTBL,%a1
490 fmovel %d0,%fp0 | ...convert to floating-format
491
492 movel %d0,L_SCR1(%a6) | ...save N temporarily
493 andil #0x3F,%d0 | ...D0 is J = N mod 64
494 lsll #4,%d0
495 addal %d0,%a1 | ...address of 2^(J/64)
496 movel L_SCR1(%a6),%d0
497 asrl #6,%d0 | ...D0 is M
498 addiw #0x3FFF,%d0 | ...biased expo. of 2^(M)
499 movew L2,L_SCR1(%a6) | ...prefetch L2, no need in CB
500
501EXPCONT1:
502|--Step 3.
503|--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
504|--a0 points to 2^(J/64), D0 is biased expo. of 2^(M)
505 fmovex %fp0,%fp2
506 fmuls #0xBC317218,%fp0 | ...N * L1, L1 = lead(-log2/64)
507 fmulx L2,%fp2 | ...N * L2, L1+L2 = -log2/64
508 faddx %fp1,%fp0 | ...X + N*L1
509 faddx %fp2,%fp0 | ...fp0 is R, reduced arg.
510| MOVE.W #$3FA5,EXPA3 ...load EXPA3 in cache
511
512|--Step 4.
513|--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
514|-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
515|--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
516|--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))]
517
518 fmovex %fp0,%fp1
519 fmulx %fp1,%fp1 | ...fp1 IS S = R*R
520
521 fmoves #0x3AB60B70,%fp2 | ...fp2 IS A5
522| MOVE.W #0,2(%a1) ...load 2^(J/64) in cache
523
524 fmulx %fp1,%fp2 | ...fp2 IS S*A5
525 fmovex %fp1,%fp3
526 fmuls #0x3C088895,%fp3 | ...fp3 IS S*A4
527
528 faddd EXPA3,%fp2 | ...fp2 IS A3+S*A5
529 faddd EXPA2,%fp3 | ...fp3 IS A2+S*A4
530
531 fmulx %fp1,%fp2 | ...fp2 IS S*(A3+S*A5)
532 movew %d0,SCALE(%a6) | ...SCALE is 2^(M) in extended
533 clrw SCALE+2(%a6)
534 movel #0x80000000,SCALE+4(%a6)
535 clrl SCALE+8(%a6)
536
537 fmulx %fp1,%fp3 | ...fp3 IS S*(A2+S*A4)
538
539 fadds #0x3F000000,%fp2 | ...fp2 IS A1+S*(A3+S*A5)
540 fmulx %fp0,%fp3 | ...fp3 IS R*S*(A2+S*A4)
541
542 fmulx %fp1,%fp2 | ...fp2 IS S*(A1+S*(A3+S*A5))
543 faddx %fp3,%fp0 | ...fp0 IS R+R*S*(A2+S*A4),
544| ...fp3 released
545
546 fmovex (%a1)+,%fp1 | ...fp1 is lead. pt. of 2^(J/64)
547 faddx %fp2,%fp0 | ...fp0 is EXP(R) - 1
548| ...fp2 released
549
550|--Step 5
551|--final reconstruction process
552|--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) )
553
554 fmulx %fp1,%fp0 | ...2^(J/64)*(Exp(R)-1)
555 fmovemx (%a7)+,%fp2-%fp2/%fp3 | ...fp2 restored
556 fadds (%a1),%fp0 | ...accurate 2^(J/64)
557
558 faddx %fp1,%fp0 | ...2^(J/64) + 2^(J/64)*...
559 movel ADJFLAG(%a6),%d0
560
561|--Step 6
562 tstl %d0
563 beqs NORMAL
564ADJUST:
565 fmulx ADJSCALE(%a6),%fp0
566NORMAL:
567 fmovel %d1,%FPCR | ...restore user FPCR
568 fmulx SCALE(%a6),%fp0 | ...multiply 2^(M)
569 bra t_frcinx
570
571EXPSM:
572|--Step 7
573 fmovemx (%a0),%fp0-%fp0 | ...in case X is denormalized
574 fmovel %d1,%FPCR
575 fadds #0x3F800000,%fp0 | ...1+X in user mode
576 bra t_frcinx
577
578EXPBIG:
579|--Step 8
580 cmpil #0x400CB27C,%d0 | ...16480 log2
581 bgts EXP2BIG
582|--Steps 8.2 -- 8.6
583 fmovex (%a0),%fp0 | ...load input from (a0)
584
585 fmovex %fp0,%fp1
586 fmuls #0x42B8AA3B,%fp0 | ...64/log2 * X
587 fmovemx %fp2-%fp2/%fp3,-(%a7) | ...save fp2
588 movel #1,ADJFLAG(%a6)
589 fmovel %fp0,%d0 | ...N = int( X * 64/log2 )
590 lea EXPTBL,%a1
591 fmovel %d0,%fp0 | ...convert to floating-format
592 movel %d0,L_SCR1(%a6) | ...save N temporarily
593 andil #0x3F,%d0 | ...D0 is J = N mod 64
594 lsll #4,%d0
595 addal %d0,%a1 | ...address of 2^(J/64)
596 movel L_SCR1(%a6),%d0
597 asrl #6,%d0 | ...D0 is K
598 movel %d0,L_SCR1(%a6) | ...save K temporarily
599 asrl #1,%d0 | ...D0 is M1
600 subl %d0,L_SCR1(%a6) | ...a1 is M
601 addiw #0x3FFF,%d0 | ...biased expo. of 2^(M1)
602 movew %d0,ADJSCALE(%a6) | ...ADJSCALE := 2^(M1)
603 clrw ADJSCALE+2(%a6)
604 movel #0x80000000,ADJSCALE+4(%a6)
605 clrl ADJSCALE+8(%a6)
606 movel L_SCR1(%a6),%d0 | ...D0 is M
607 addiw #0x3FFF,%d0 | ...biased expo. of 2^(M)
608 bra EXPCONT1 | ...go back to Step 3
609
610EXP2BIG:
611|--Step 9
612 fmovel %d1,%FPCR
613 movel (%a0),%d0
614 bclrb #sign_bit,(%a0) | ...setox always returns positive
615 cmpil #0,%d0
616 blt t_unfl
617 bra t_ovfl
618
619 .global setoxm1d
620setoxm1d:
621|--entry point for EXPM1(X), here X is denormalized
622|--Step 0.
623 bra t_extdnrm
624
625
626 .global setoxm1
627setoxm1:
628|--entry point for EXPM1(X), here X is finite, non-zero, non-NaN
629
630|--Step 1.
631|--Step 1.1
632 movel (%a0),%d0 | ...load part of input X
633 andil #0x7FFF0000,%d0 | ...biased expo. of X
634 cmpil #0x3FFD0000,%d0 | ...1/4
635 bges EM1CON1 | ...|X| >= 1/4
636 bra EM1SM
637
638EM1CON1:
639|--Step 1.3
640|--The case |X| >= 1/4
641 movew 4(%a0),%d0 | ...expo. and partial sig. of |X|
642 cmpil #0x4004C215,%d0 | ...70log2 rounded up to 16 bits
643 bles EM1MAIN | ...1/4 <= |X| <= 70log2
644 bra EM1BIG
645
646EM1MAIN:
647|--Step 2.
648|--This is the case: 1/4 <= |X| <= 70 log2.
649 fmovex (%a0),%fp0 | ...load input from (a0)
650
651 fmovex %fp0,%fp1
652 fmuls #0x42B8AA3B,%fp0 | ...64/log2 * X
653 fmovemx %fp2-%fp2/%fp3,-(%a7) | ...save fp2
654| MOVE.W #$3F81,EM1A4 ...prefetch in CB mode
655 fmovel %fp0,%d0 | ...N = int( X * 64/log2 )
656 lea EXPTBL,%a1
657 fmovel %d0,%fp0 | ...convert to floating-format
658
659 movel %d0,L_SCR1(%a6) | ...save N temporarily
660 andil #0x3F,%d0 | ...D0 is J = N mod 64
661 lsll #4,%d0
662 addal %d0,%a1 | ...address of 2^(J/64)
663 movel L_SCR1(%a6),%d0
664 asrl #6,%d0 | ...D0 is M
665 movel %d0,L_SCR1(%a6) | ...save a copy of M
666| MOVE.W #$3FDC,L2 ...prefetch L2 in CB mode
667
668|--Step 3.
669|--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
670|--a0 points to 2^(J/64), D0 and a1 both contain M
671 fmovex %fp0,%fp2
672 fmuls #0xBC317218,%fp0 | ...N * L1, L1 = lead(-log2/64)
673 fmulx L2,%fp2 | ...N * L2, L1+L2 = -log2/64
674 faddx %fp1,%fp0 | ...X + N*L1
675 faddx %fp2,%fp0 | ...fp0 is R, reduced arg.
676| MOVE.W #$3FC5,EM1A2 ...load EM1A2 in cache
677 addiw #0x3FFF,%d0 | ...D0 is biased expo. of 2^M
678
679|--Step 4.
680|--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
681|-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6)))))
682|--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
683|--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))]
684
685 fmovex %fp0,%fp1
686 fmulx %fp1,%fp1 | ...fp1 IS S = R*R
687
688 fmoves #0x3950097B,%fp2 | ...fp2 IS a6
689| MOVE.W #0,2(%a1) ...load 2^(J/64) in cache
690
691 fmulx %fp1,%fp2 | ...fp2 IS S*A6
692 fmovex %fp1,%fp3
693 fmuls #0x3AB60B6A,%fp3 | ...fp3 IS S*A5
694
695 faddd EM1A4,%fp2 | ...fp2 IS A4+S*A6
696 faddd EM1A3,%fp3 | ...fp3 IS A3+S*A5
697 movew %d0,SC(%a6) | ...SC is 2^(M) in extended
698 clrw SC+2(%a6)
699 movel #0x80000000,SC+4(%a6)
700 clrl SC+8(%a6)
701
702 fmulx %fp1,%fp2 | ...fp2 IS S*(A4+S*A6)
703 movel L_SCR1(%a6),%d0 | ...D0 is M
704 negw %d0 | ...D0 is -M
705 fmulx %fp1,%fp3 | ...fp3 IS S*(A3+S*A5)
706 addiw #0x3FFF,%d0 | ...biased expo. of 2^(-M)
707 faddd EM1A2,%fp2 | ...fp2 IS A2+S*(A4+S*A6)
708 fadds #0x3F000000,%fp3 | ...fp3 IS A1+S*(A3+S*A5)
709
710 fmulx %fp1,%fp2 | ...fp2 IS S*(A2+S*(A4+S*A6))
711 oriw #0x8000,%d0 | ...signed/expo. of -2^(-M)
712 movew %d0,ONEBYSC(%a6) | ...OnebySc is -2^(-M)
713 clrw ONEBYSC+2(%a6)
714 movel #0x80000000,ONEBYSC+4(%a6)
715 clrl ONEBYSC+8(%a6)
716 fmulx %fp3,%fp1 | ...fp1 IS S*(A1+S*(A3+S*A5))
717| ...fp3 released
718
719 fmulx %fp0,%fp2 | ...fp2 IS R*S*(A2+S*(A4+S*A6))
720 faddx %fp1,%fp0 | ...fp0 IS R+S*(A1+S*(A3+S*A5))
721| ...fp1 released
722
723 faddx %fp2,%fp0 | ...fp0 IS EXP(R)-1
724| ...fp2 released
725 fmovemx (%a7)+,%fp2-%fp2/%fp3 | ...fp2 restored
726
727|--Step 5
728|--Compute 2^(J/64)*p
729
730 fmulx (%a1),%fp0 | ...2^(J/64)*(Exp(R)-1)
731
732|--Step 6
733|--Step 6.1
734 movel L_SCR1(%a6),%d0 | ...retrieve M
735 cmpil #63,%d0
736 bles MLE63
737|--Step 6.2 M >= 64
738 fmoves 12(%a1),%fp1 | ...fp1 is t
739 faddx ONEBYSC(%a6),%fp1 | ...fp1 is t+OnebySc
740 faddx %fp1,%fp0 | ...p+(t+OnebySc), fp1 released
741 faddx (%a1),%fp0 | ...T+(p+(t+OnebySc))
742 bras EM1SCALE
743MLE63:
744|--Step 6.3 M <= 63
745 cmpil #-3,%d0
746 bges MGEN3
747MLTN3:
748|--Step 6.4 M <= -4
749 fadds 12(%a1),%fp0 | ...p+t
750 faddx (%a1),%fp0 | ...T+(p+t)
751 faddx ONEBYSC(%a6),%fp0 | ...OnebySc + (T+(p+t))
752 bras EM1SCALE
753MGEN3:
754|--Step 6.5 -3 <= M <= 63
755 fmovex (%a1)+,%fp1 | ...fp1 is T
756 fadds (%a1),%fp0 | ...fp0 is p+t
757 faddx ONEBYSC(%a6),%fp1 | ...fp1 is T+OnebySc
758 faddx %fp1,%fp0 | ...(T+OnebySc)+(p+t)
759
760EM1SCALE:
761|--Step 6.6
762 fmovel %d1,%FPCR
763 fmulx SC(%a6),%fp0
764
765 bra t_frcinx
766
767EM1SM:
768|--Step 7 |X| < 1/4.
769 cmpil #0x3FBE0000,%d0 | ...2^(-65)
770 bges EM1POLY
771
772EM1TINY:
773|--Step 8 |X| < 2^(-65)
774 cmpil #0x00330000,%d0 | ...2^(-16312)
775 blts EM12TINY
776|--Step 8.2
777 movel #0x80010000,SC(%a6) | ...SC is -2^(-16382)
778 movel #0x80000000,SC+4(%a6)
779 clrl SC+8(%a6)
780 fmovex (%a0),%fp0
781 fmovel %d1,%FPCR
782 faddx SC(%a6),%fp0
783
784 bra t_frcinx
785
786EM12TINY:
787|--Step 8.3
788 fmovex (%a0),%fp0
789 fmuld TWO140,%fp0
790 movel #0x80010000,SC(%a6)
791 movel #0x80000000,SC+4(%a6)
792 clrl SC+8(%a6)
793 faddx SC(%a6),%fp0
794 fmovel %d1,%FPCR
795 fmuld TWON140,%fp0
796
797 bra t_frcinx
798
799EM1POLY:
800|--Step 9 exp(X)-1 by a simple polynomial
801 fmovex (%a0),%fp0 | ...fp0 is X
802 fmulx %fp0,%fp0 | ...fp0 is S := X*X
803 fmovemx %fp2-%fp2/%fp3,-(%a7) | ...save fp2
804 fmoves #0x2F30CAA8,%fp1 | ...fp1 is B12
805 fmulx %fp0,%fp1 | ...fp1 is S*B12
806 fmoves #0x310F8290,%fp2 | ...fp2 is B11
807 fadds #0x32D73220,%fp1 | ...fp1 is B10+S*B12
808
809 fmulx %fp0,%fp2 | ...fp2 is S*B11
810 fmulx %fp0,%fp1 | ...fp1 is S*(B10 + ...
811
812 fadds #0x3493F281,%fp2 | ...fp2 is B9+S*...
813 faddd EM1B8,%fp1 | ...fp1 is B8+S*...
814
815 fmulx %fp0,%fp2 | ...fp2 is S*(B9+...
816 fmulx %fp0,%fp1 | ...fp1 is S*(B8+...
817
818 faddd EM1B7,%fp2 | ...fp2 is B7+S*...
819 faddd EM1B6,%fp1 | ...fp1 is B6+S*...
820
821 fmulx %fp0,%fp2 | ...fp2 is S*(B7+...
822 fmulx %fp0,%fp1 | ...fp1 is S*(B6+...
823
824 faddd EM1B5,%fp2 | ...fp2 is B5+S*...
825 faddd EM1B4,%fp1 | ...fp1 is B4+S*...
826
827 fmulx %fp0,%fp2 | ...fp2 is S*(B5+...
828 fmulx %fp0,%fp1 | ...fp1 is S*(B4+...
829
830 faddd EM1B3,%fp2 | ...fp2 is B3+S*...
831 faddx EM1B2,%fp1 | ...fp1 is B2+S*...
832
833 fmulx %fp0,%fp2 | ...fp2 is S*(B3+...
834 fmulx %fp0,%fp1 | ...fp1 is S*(B2+...
835
836 fmulx %fp0,%fp2 | ...fp2 is S*S*(B3+...)
837 fmulx (%a0),%fp1 | ...fp1 is X*S*(B2...
838
839 fmuls #0x3F000000,%fp0 | ...fp0 is S*B1
840 faddx %fp2,%fp1 | ...fp1 is Q
841| ...fp2 released
842
843 fmovemx (%a7)+,%fp2-%fp2/%fp3 | ...fp2 restored
844
845 faddx %fp1,%fp0 | ...fp0 is S*B1+Q
846| ...fp1 released
847
848 fmovel %d1,%FPCR
849 faddx (%a0),%fp0
850
851 bra t_frcinx
852
853EM1BIG:
854|--Step 10 |X| > 70 log2
855 movel (%a0),%d0
856 cmpil #0,%d0
857 bgt EXPC1
858|--Step 10.2
859 fmoves #0xBF800000,%fp0 | ...fp0 is -1
860 fmovel %d1,%FPCR
861 fadds #0x00800000,%fp0 | ...-1 + 2^(-126)
862
863 bra t_frcinx
864
865 |end
diff --git a/arch/m68k/fpsp040/sgetem.S b/arch/m68k/fpsp040/sgetem.S
new file mode 100644
index 000000000000..0fcbd045ba75
--- /dev/null
+++ b/arch/m68k/fpsp040/sgetem.S
@@ -0,0 +1,141 @@
1|
2| sgetem.sa 3.1 12/10/90
3|
4| The entry point sGETEXP returns the exponent portion
5| of the input argument. The exponent bias is removed
6| and the exponent value is returned as an extended
7| precision number in fp0. sGETEXPD handles denormalized
8| numbers.
9|
10| The entry point sGETMAN extracts the mantissa of the
11| input argument. The mantissa is converted to an
12| extended precision number and returned in fp0. The
13| range of the result is [1.0 - 2.0).
14|
15|
16| Input: Double-extended number X in the ETEMP space in
17| the floating-point save stack.
18|
19| Output: The functions return exp(X) or man(X) in fp0.
20|
21| Modified: fp0.
22|
23|
24| Copyright (C) Motorola, Inc. 1990
25| All Rights Reserved
26|
27| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
28| The copyright notice above does not evidence any
29| actual or intended publication of such source code.
30
31|SGETEM idnt 2,1 | Motorola 040 Floating Point Software Package
32
33 |section 8
34
35#include "fpsp.h"
36
37 |xref nrm_set
38
39|
40| This entry point is used by the unimplemented instruction exception
41| handler. It points a0 to the input operand.
42|
43|
44|
45| SGETEXP
46|
47
48 .global sgetexp
49sgetexp:
50 movew LOCAL_EX(%a0),%d0 |get the exponent
51 bclrl #15,%d0 |clear the sign bit
52 subw #0x3fff,%d0 |subtract off the bias
53 fmovew %d0,%fp0 |move the exp to fp0
54 rts
55
56 .global sgetexpd
57sgetexpd:
58 bclrb #sign_bit,LOCAL_EX(%a0)
59 bsr nrm_set |normalize (exp will go negative)
60 movew LOCAL_EX(%a0),%d0 |load resulting exponent into d0
61 subw #0x3fff,%d0 |subtract off the bias
62 fmovew %d0,%fp0 |move the exp to fp0
63 rts
64|
65|
66| This entry point is used by the unimplemented instruction exception
67| handler. It points a0 to the input operand.
68|
69|
70|
71| SGETMAN
72|
73|
74| For normalized numbers, leave the mantissa alone, simply load
75| with an exponent of +/- $3fff.
76|
77 .global sgetman
78sgetman:
79 movel USER_FPCR(%a6),%d0
80 andil #0xffffff00,%d0 |clear rounding precision and mode
81 fmovel %d0,%fpcr |this fpcr setting is used by the 882
82 movew LOCAL_EX(%a0),%d0 |get the exp (really just want sign bit)
83 orw #0x7fff,%d0 |clear old exp
84 bclrl #14,%d0 |make it the new exp +-3fff
85 movew %d0,LOCAL_EX(%a0) |move the sign & exp back to fsave stack
86 fmovex (%a0),%fp0 |put new value back in fp0
87 rts
88
89|
90| For denormalized numbers, shift the mantissa until the j-bit = 1,
91| then load the exponent with +/1 $3fff.
92|
93 .global sgetmand
94sgetmand:
95 movel LOCAL_HI(%a0),%d0 |load ms mant in d0
96 movel LOCAL_LO(%a0),%d1 |load ls mant in d1
97 bsr shft |shift mantissa bits till msbit is set
98 movel %d0,LOCAL_HI(%a0) |put ms mant back on stack
99 movel %d1,LOCAL_LO(%a0) |put ls mant back on stack
100 bras sgetman
101
102|
103| SHFT
104|
105| Shifts the mantissa bits until msbit is set.
106| input:
107| ms mantissa part in d0
108| ls mantissa part in d1
109| output:
110| shifted bits in d0 and d1
111shft:
112 tstl %d0 |if any bits set in ms mant
113 bnes upper |then branch
114| ;else no bits set in ms mant
115 tstl %d1 |test if any bits set in ls mant
116 bnes cont |if set then continue
117 bras shft_end |else return
118cont:
119 movel %d3,-(%a7) |save d3
120 exg %d0,%d1 |shift ls mant to ms mant
121 bfffo %d0{#0:#32},%d3 |find first 1 in ls mant to d0
122 lsll %d3,%d0 |shift first 1 to integer bit in ms mant
123 movel (%a7)+,%d3 |restore d3
124 bras shft_end
125upper:
126
127 moveml %d3/%d5/%d6,-(%a7) |save registers
128 bfffo %d0{#0:#32},%d3 |find first 1 in ls mant to d0
129 lsll %d3,%d0 |shift ms mant until j-bit is set
130 movel %d1,%d6 |save ls mant in d6
131 lsll %d3,%d1 |shift ls mant by count
132 movel #32,%d5
133 subl %d3,%d5 |sub 32 from shift for ls mant
134 lsrl %d5,%d6 |shift off all bits but those that will
135| ;be shifted into ms mant
136 orl %d6,%d0 |shift the ls mant bits into the ms mant
137 moveml (%a7)+,%d3/%d5/%d6 |restore registers
138shft_end:
139 rts
140
141 |end
diff --git a/arch/m68k/fpsp040/sint.S b/arch/m68k/fpsp040/sint.S
new file mode 100644
index 000000000000..0f9bd28e55a0
--- /dev/null
+++ b/arch/m68k/fpsp040/sint.S
@@ -0,0 +1,247 @@
1|
2| sint.sa 3.1 12/10/90
3|
4| The entry point sINT computes the rounded integer
5| equivalent of the input argument, sINTRZ computes
6| the integer rounded to zero of the input argument.
7|
8| Entry points sint and sintrz are called from do_func
9| to emulate the fint and fintrz unimplemented instructions,
10| respectively. Entry point sintdo is used by bindec.
11|
12| Input: (Entry points sint and sintrz) Double-extended
13| number X in the ETEMP space in the floating-point
14| save stack.
15| (Entry point sintdo) Double-extended number X in
16| location pointed to by the address register a0.
17| (Entry point sintd) Double-extended denormalized
18| number X in the ETEMP space in the floating-point
19| save stack.
20|
21| Output: The function returns int(X) or intrz(X) in fp0.
22|
23| Modifies: fp0.
24|
25| Algorithm: (sint and sintrz)
26|
27| 1. If exp(X) >= 63, return X.
28| If exp(X) < 0, return +/- 0 or +/- 1, according to
29| the rounding mode.
30|
31| 2. (X is in range) set rsc = 63 - exp(X). Unnormalize the
32| result to the exponent $403e.
33|
34| 3. Round the result in the mode given in USER_FPCR. For
35| sintrz, force round-to-zero mode.
36|
37| 4. Normalize the rounded result; store in fp0.
38|
39| For the denormalized cases, force the correct result
40| for the given sign and rounding mode.
41|
42| Sign(X)
43| RMODE + -
44| ----- --------
45| RN +0 -0
46| RZ +0 -0
47| RM +0 -1
48| RP +1 -0
49|
50|
51| Copyright (C) Motorola, Inc. 1990
52| All Rights Reserved
53|
54| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
55| The copyright notice above does not evidence any
56| actual or intended publication of such source code.
57
58|SINT idnt 2,1 | Motorola 040 Floating Point Software Package
59
60 |section 8
61
62#include "fpsp.h"
63
64 |xref dnrm_lp
65 |xref nrm_set
66 |xref round
67 |xref t_inx2
68 |xref ld_pone
69 |xref ld_mone
70 |xref ld_pzero
71 |xref ld_mzero
72 |xref snzrinx
73
74|
75| FINT
76|
77 .global sint
78sint:
79 bfextu FPCR_MODE(%a6){#2:#2},%d1 |use user's mode for rounding
80| ;implicitly has extend precision
81| ;in upper word.
82 movel %d1,L_SCR1(%a6) |save mode bits
83 bras sintexc
84
85|
86| FINT with extended denorm inputs.
87|
88 .global sintd
89sintd:
90 btstb #5,FPCR_MODE(%a6)
91 beq snzrinx |if round nearest or round zero, +/- 0
92 btstb #4,FPCR_MODE(%a6)
93 beqs rnd_mns
94rnd_pls:
95 btstb #sign_bit,LOCAL_EX(%a0)
96 bnes sintmz
97 bsr ld_pone |if round plus inf and pos, answer is +1
98 bra t_inx2
99rnd_mns:
100 btstb #sign_bit,LOCAL_EX(%a0)
101 beqs sintpz
102 bsr ld_mone |if round mns inf and neg, answer is -1
103 bra t_inx2
104sintpz:
105 bsr ld_pzero
106 bra t_inx2
107sintmz:
108 bsr ld_mzero
109 bra t_inx2
110
111|
112| FINTRZ
113|
114 .global sintrz
115sintrz:
116 movel #1,L_SCR1(%a6) |use rz mode for rounding
117| ;implicitly has extend precision
118| ;in upper word.
119 bras sintexc
120|
121| SINTDO
122|
123| Input: a0 points to an IEEE extended format operand
124| Output: fp0 has the result
125|
126| Exceptions:
127|
128| If the subroutine results in an inexact operation, the inx2 and
129| ainx bits in the USER_FPSR are set.
130|
131|
132 .global sintdo
133sintdo:
134 bfextu FPCR_MODE(%a6){#2:#2},%d1 |use user's mode for rounding
135| ;implicitly has ext precision
136| ;in upper word.
137 movel %d1,L_SCR1(%a6) |save mode bits
138|
139| Real work of sint is in sintexc
140|
141sintexc:
142 bclrb #sign_bit,LOCAL_EX(%a0) |convert to internal extended
143| ;format
144 sne LOCAL_SGN(%a0)
145 cmpw #0x403e,LOCAL_EX(%a0) |check if (unbiased) exp > 63
146 bgts out_rnge |branch if exp < 63
147 cmpw #0x3ffd,LOCAL_EX(%a0) |check if (unbiased) exp < 0
148 bgt in_rnge |if 63 >= exp > 0, do calc
149|
150| Input is less than zero. Restore sign, and check for directed
151| rounding modes. L_SCR1 contains the rmode in the lower byte.
152|
153un_rnge:
154 btstb #1,L_SCR1+3(%a6) |check for rn and rz
155 beqs un_rnrz
156 tstb LOCAL_SGN(%a0) |check for sign
157 bnes un_rmrp_neg
158|
159| Sign is +. If rp, load +1.0, if rm, load +0.0
160|
161 cmpib #3,L_SCR1+3(%a6) |check for rp
162 beqs un_ldpone |if rp, load +1.0
163 bsr ld_pzero |if rm, load +0.0
164 bra t_inx2
165un_ldpone:
166 bsr ld_pone
167 bra t_inx2
168|
169| Sign is -. If rm, load -1.0, if rp, load -0.0
170|
171un_rmrp_neg:
172 cmpib #2,L_SCR1+3(%a6) |check for rm
173 beqs un_ldmone |if rm, load -1.0
174 bsr ld_mzero |if rp, load -0.0
175 bra t_inx2
176un_ldmone:
177 bsr ld_mone
178 bra t_inx2
179|
180| Rmode is rn or rz; return signed zero
181|
182un_rnrz:
183 tstb LOCAL_SGN(%a0) |check for sign
184 bnes un_rnrz_neg
185 bsr ld_pzero
186 bra t_inx2
187un_rnrz_neg:
188 bsr ld_mzero
189 bra t_inx2
190
191|
192| Input is greater than 2^63. All bits are significant. Return
193| the input.
194|
195out_rnge:
196 bfclr LOCAL_SGN(%a0){#0:#8} |change back to IEEE ext format
197 beqs intps
198 bsetb #sign_bit,LOCAL_EX(%a0)
199intps:
200 fmovel %fpcr,-(%sp)
201 fmovel #0,%fpcr
202 fmovex LOCAL_EX(%a0),%fp0 |if exp > 63
203| ;then return X to the user
204| ;there are no fraction bits
205 fmovel (%sp)+,%fpcr
206 rts
207
208in_rnge:
209| ;shift off fraction bits
210 clrl %d0 |clear d0 - initial g,r,s for
211| ;dnrm_lp
212 movel #0x403e,%d1 |set threshold for dnrm_lp
213| ;assumes a0 points to operand
214 bsr dnrm_lp
215| ;returns unnormalized number
216| ;pointed by a0
217| ;output d0 supplies g,r,s
218| ;used by round
219 movel L_SCR1(%a6),%d1 |use selected rounding mode
220|
221|
222 bsr round |round the unnorm based on users
223| ;input a0 ptr to ext X
224| ; d0 g,r,s bits
225| ; d1 PREC/MODE info
226| ;output a0 ptr to rounded result
227| ;inexact flag set in USER_FPSR
228| ;if initial grs set
229|
230| normalize the rounded result and store value in fp0
231|
232 bsr nrm_set |normalize the unnorm
233| ;Input: a0 points to operand to
234| ;be normalized
235| ;Output: a0 points to normalized
236| ;result
237 bfclr LOCAL_SGN(%a0){#0:#8}
238 beqs nrmrndp
239 bsetb #sign_bit,LOCAL_EX(%a0) |return to IEEE extended format
240nrmrndp:
241 fmovel %fpcr,-(%sp)
242 fmovel #0,%fpcr
243 fmovex LOCAL_EX(%a0),%fp0 |move result to fp0
244 fmovel (%sp)+,%fpcr
245 rts
246
247 |end
diff --git a/arch/m68k/fpsp040/skeleton.S b/arch/m68k/fpsp040/skeleton.S
new file mode 100644
index 000000000000..dbc1255a5e99
--- /dev/null
+++ b/arch/m68k/fpsp040/skeleton.S
@@ -0,0 +1,516 @@
1|
2| skeleton.sa 3.2 4/26/91
3|
4| This file contains code that is system dependent and will
5| need to be modified to install the FPSP.
6|
7| Each entry point for exception 'xxxx' begins with a 'jmp fpsp_xxxx'.
8| Put any target system specific handling that must be done immediately
9| before the jump instruction. If there no handling necessary, then
10| the 'fpsp_xxxx' handler entry point should be placed in the exception
11| table so that the 'jmp' can be eliminated. If the FPSP determines that the
12| exception is one that must be reported then there will be a
13| return from the package by a 'jmp real_xxxx'. At that point
14| the machine state will be identical to the state before
15| the FPSP was entered. In particular, whatever condition
16| that caused the exception will still be pending when the FPSP
17| package returns. Thus, there will be system specific code
18| to handle the exception.
19|
20| If the exception was completely handled by the package, then
21| the return will be via a 'jmp fpsp_done'. Unless there is
22| OS specific work to be done (such as handling a context switch or
23| interrupt) the user program can be resumed via 'rte'.
24|
25| In the following skeleton code, some typical 'real_xxxx' handling
26| code is shown. This code may need to be moved to an appropriate
27| place in the target system, or rewritten.
28|
29
30| Copyright (C) Motorola, Inc. 1990
31| All Rights Reserved
32|
33| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
34| The copyright notice above does not evidence any
35| actual or intended publication of such source code.
36
37|
38| Modified for Linux-1.3.x by Jes Sorensen (jds@kom.auc.dk)
39|
40
41#include <linux/linkage.h>
42#include <asm/entry.h>
43#include <asm/offsets.h>
44
45|SKELETON idnt 2,1 | Motorola 040 Floating Point Software Package
46
47 |section 15
48|
49| The following counters are used for standalone testing
50|
51
52 |section 8
53
54#include "fpsp.h"
55
56 |xref b1238_fix
57
58|
59| Divide by Zero exception
60|
61| All dz exceptions are 'real', hence no fpsp_dz entry point.
62|
63 .global dz
64 .global real_dz
65dz:
66real_dz:
67 link %a6,#-LOCAL_SIZE
68 fsave -(%sp)
69 bclrb #E1,E_BYTE(%a6)
70 frestore (%sp)+
71 unlk %a6
72
73 SAVE_ALL_INT
74 GET_CURRENT(%d0)
75 movel %sp,%sp@- | stack frame pointer argument
76 bsrl trap_c
77 addql #4,%sp
78 bral ret_from_exception
79
80|
81| Inexact exception
82|
83| All inexact exceptions are real, but the 'real' handler
84| will probably want to clear the pending exception.
85| The provided code will clear the E3 exception (if pending),
86| otherwise clear the E1 exception. The frestore is not really
87| necessary for E1 exceptions.
88|
89| Code following the 'inex' label is to handle bug #1232. In this
90| bug, if an E1 snan, ovfl, or unfl occurred, and the process was
91| swapped out before taking the exception, the exception taken on
92| return was inex, rather than the correct exception. The snan, ovfl,
93| and unfl exception to be taken must not have been enabled. The
94| fix is to check for E1, and the existence of one of snan, ovfl,
95| or unfl bits set in the fpsr. If any of these are set, branch
96| to the appropriate handler for the exception in the fpsr. Note
97| that this fix is only for d43b parts, and is skipped if the
98| version number is not $40.
99|
100|
101 .global real_inex
102 .global inex
103inex:
104 link %a6,#-LOCAL_SIZE
105 fsave -(%sp)
106 cmpib #VER_40,(%sp) |test version number
107 bnes not_fmt40
108 fmovel %fpsr,-(%sp)
109 btstb #E1,E_BYTE(%a6) |test for E1 set
110 beqs not_b1232
111 btstb #snan_bit,2(%sp) |test for snan
112 beq inex_ckofl
113 addl #4,%sp
114 frestore (%sp)+
115 unlk %a6
116 bra snan
117inex_ckofl:
118 btstb #ovfl_bit,2(%sp) |test for ovfl
119 beq inex_ckufl
120 addl #4,%sp
121 frestore (%sp)+
122 unlk %a6
123 bra ovfl
124inex_ckufl:
125 btstb #unfl_bit,2(%sp) |test for unfl
126 beq not_b1232
127 addl #4,%sp
128 frestore (%sp)+
129 unlk %a6
130 bra unfl
131
132|
133| We do not have the bug 1232 case. Clean up the stack and call
134| real_inex.
135|
136not_b1232:
137 addl #4,%sp
138 frestore (%sp)+
139 unlk %a6
140
141real_inex:
142
143 link %a6,#-LOCAL_SIZE
144 fsave -(%sp)
145not_fmt40:
146 bclrb #E3,E_BYTE(%a6) |clear and test E3 flag
147 beqs inex_cke1
148|
149| Clear dirty bit on dest resister in the frame before branching
150| to b1238_fix.
151|
152 moveml %d0/%d1,USER_DA(%a6)
153 bfextu CMDREG1B(%a6){#6:#3},%d0 |get dest reg no
154 bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit
155 bsrl b1238_fix |test for bug1238 case
156 moveml USER_DA(%a6),%d0/%d1
157 bras inex_done
158inex_cke1:
159 bclrb #E1,E_BYTE(%a6)
160inex_done:
161 frestore (%sp)+
162 unlk %a6
163
164 SAVE_ALL_INT
165 GET_CURRENT(%d0)
166 movel %sp,%sp@- | stack frame pointer argument
167 bsrl trap_c
168 addql #4,%sp
169 bral ret_from_exception
170
171|
172| Overflow exception
173|
174 |xref fpsp_ovfl
175 .global real_ovfl
176 .global ovfl
177ovfl:
178 jmp fpsp_ovfl
179real_ovfl:
180
181 link %a6,#-LOCAL_SIZE
182 fsave -(%sp)
183 bclrb #E3,E_BYTE(%a6) |clear and test E3 flag
184 bnes ovfl_done
185 bclrb #E1,E_BYTE(%a6)
186ovfl_done:
187 frestore (%sp)+
188 unlk %a6
189
190 SAVE_ALL_INT
191 GET_CURRENT(%d0)
192 movel %sp,%sp@- | stack frame pointer argument
193 bsrl trap_c
194 addql #4,%sp
195 bral ret_from_exception
196
197|
198| Underflow exception
199|
200 |xref fpsp_unfl
201 .global real_unfl
202 .global unfl
203unfl:
204 jmp fpsp_unfl
205real_unfl:
206
207 link %a6,#-LOCAL_SIZE
208 fsave -(%sp)
209 bclrb #E3,E_BYTE(%a6) |clear and test E3 flag
210 bnes unfl_done
211 bclrb #E1,E_BYTE(%a6)
212unfl_done:
213 frestore (%sp)+
214 unlk %a6
215
216 SAVE_ALL_INT
217 GET_CURRENT(%d0)
218 movel %sp,%sp@- | stack frame pointer argument
219 bsrl trap_c
220 addql #4,%sp
221 bral ret_from_exception
222
223|
224| Signalling NAN exception
225|
226 |xref fpsp_snan
227 .global real_snan
228 .global snan
229snan:
230 jmp fpsp_snan
231real_snan:
232 link %a6,#-LOCAL_SIZE
233 fsave -(%sp)
234 bclrb #E1,E_BYTE(%a6) |snan is always an E1 exception
235 frestore (%sp)+
236 unlk %a6
237
238 SAVE_ALL_INT
239 GET_CURRENT(%d0)
240 movel %sp,%sp@- | stack frame pointer argument
241 bsrl trap_c
242 addql #4,%sp
243 bral ret_from_exception
244
245|
246| Operand Error exception
247|
248 |xref fpsp_operr
249 .global real_operr
250 .global operr
251operr:
252 jmp fpsp_operr
253real_operr:
254 link %a6,#-LOCAL_SIZE
255 fsave -(%sp)
256 bclrb #E1,E_BYTE(%a6) |operr is always an E1 exception
257 frestore (%sp)+
258 unlk %a6
259
260 SAVE_ALL_INT
261 GET_CURRENT(%d0)
262 movel %sp,%sp@- | stack frame pointer argument
263 bsrl trap_c
264 addql #4,%sp
265 bral ret_from_exception
266
267
268|
269| BSUN exception
270|
271| This sample handler simply clears the nan bit in the FPSR.
272|
273 |xref fpsp_bsun
274 .global real_bsun
275 .global bsun
276bsun:
277 jmp fpsp_bsun
278real_bsun:
279 link %a6,#-LOCAL_SIZE
280 fsave -(%sp)
281 bclrb #E1,E_BYTE(%a6) |bsun is always an E1 exception
282 fmovel %FPSR,-(%sp)
283 bclrb #nan_bit,(%sp)
284 fmovel (%sp)+,%FPSR
285 frestore (%sp)+
286 unlk %a6
287
288 SAVE_ALL_INT
289 GET_CURRENT(%d0)
290 movel %sp,%sp@- | stack frame pointer argument
291 bsrl trap_c
292 addql #4,%sp
293 bral ret_from_exception
294
295|
296| F-line exception
297|
298| A 'real' F-line exception is one that the FPSP isn't supposed to
299| handle. E.g. an instruction with a co-processor ID that is not 1.
300|
301|
302 |xref fpsp_fline
303 .global real_fline
304 .global fline
305fline:
306 jmp fpsp_fline
307real_fline:
308
309 SAVE_ALL_INT
310 GET_CURRENT(%d0)
311 movel %sp,%sp@- | stack frame pointer argument
312 bsrl trap_c
313 addql #4,%sp
314 bral ret_from_exception
315
316|
317| Unsupported data type exception
318|
319 |xref fpsp_unsupp
320 .global real_unsupp
321 .global unsupp
322unsupp:
323 jmp fpsp_unsupp
324real_unsupp:
325 link %a6,#-LOCAL_SIZE
326 fsave -(%sp)
327 bclrb #E1,E_BYTE(%a6) |unsupp is always an E1 exception
328 frestore (%sp)+
329 unlk %a6
330
331 SAVE_ALL_INT
332 GET_CURRENT(%d0)
333 movel %sp,%sp@- | stack frame pointer argument
334 bsrl trap_c
335 addql #4,%sp
336 bral ret_from_exception
337
338|
339| Trace exception
340|
341 .global real_trace
342real_trace:
343 |
344 bral trap
345
346|
347| fpsp_fmt_error --- exit point for frame format error
348|
349| The fpu stack frame does not match the frames existing
350| or planned at the time of this writing. The fpsp is
351| unable to handle frame sizes not in the following
352| version:size pairs:
353|
354| {4060, 4160} - busy frame
355| {4028, 4130} - unimp frame
356| {4000, 4100} - idle frame
357|
358| This entry point simply holds an f-line illegal value.
359| Replace this with a call to your kernel panic code or
360| code to handle future revisions of the fpu.
361|
362 .global fpsp_fmt_error
363fpsp_fmt_error:
364
365 .long 0xf27f0000 |f-line illegal
366
367|
368| fpsp_done --- FPSP exit point
369|
370| The exception has been handled by the package and we are ready
371| to return to user mode, but there may be OS specific code
372| to execute before we do. If there is, do it now.
373|
374|
375
376 .global fpsp_done
377fpsp_done:
378 btst #0x5,%sp@ | supervisor bit set in saved SR?
379 beq .Lnotkern
380 rte
381.Lnotkern:
382 SAVE_ALL_INT
383 GET_CURRENT(%d0)
384 tstb %curptr@(TASK_NEEDRESCHED)
385 jne ret_from_exception | deliver signals,
386 | reschedule etc..
387 RESTORE_ALL
388
389|
390| mem_write --- write to user or supervisor address space
391|
392| Writes to memory while in supervisor mode. copyout accomplishes
393| this via a 'moves' instruction. copyout is a UNIX SVR3 (and later) function.
394| If you don't have copyout, use the local copy of the function below.
395|
396| a0 - supervisor source address
397| a1 - user destination address
398| d0 - number of bytes to write (maximum count is 12)
399|
400| The supervisor source address is guaranteed to point into the supervisor
401| stack. The result is that a UNIX
402| process is allowed to sleep as a consequence of a page fault during
403| copyout. The probability of a page fault is exceedingly small because
404| the 68040 always reads the destination address and thus the page
405| faults should have already been handled.
406|
407| If the EXC_SR shows that the exception was from supervisor space,
408| then just do a dumb (and slow) memory move. In a UNIX environment
409| there shouldn't be any supervisor mode floating point exceptions.
410|
411 .global mem_write
412mem_write:
413 btstb #5,EXC_SR(%a6) |check for supervisor state
414 beqs user_write
415super_write:
416 moveb (%a0)+,(%a1)+
417 subql #1,%d0
418 bnes super_write
419 rts
420user_write:
421 movel %d1,-(%sp) |preserve d1 just in case
422 movel %d0,-(%sp)
423 movel %a1,-(%sp)
424 movel %a0,-(%sp)
425 jsr copyout
426 addw #12,%sp
427 movel (%sp)+,%d1
428 rts
429|
430| mem_read --- read from user or supervisor address space
431|
432| Reads from memory while in supervisor mode. copyin accomplishes
433| this via a 'moves' instruction. copyin is a UNIX SVR3 (and later) function.
434| If you don't have copyin, use the local copy of the function below.
435|
436| The FPSP calls mem_read to read the original F-line instruction in order
437| to extract the data register number when the 'Dn' addressing mode is
438| used.
439|
440|Input:
441| a0 - user source address
442| a1 - supervisor destination address
443| d0 - number of bytes to read (maximum count is 12)
444|
445| Like mem_write, mem_read always reads with a supervisor
446| destination address on the supervisor stack. Also like mem_write,
447| the EXC_SR is checked and a simple memory copy is done if reading
448| from supervisor space is indicated.
449|
450 .global mem_read
451mem_read:
452 btstb #5,EXC_SR(%a6) |check for supervisor state
453 beqs user_read
454super_read:
455 moveb (%a0)+,(%a1)+
456 subql #1,%d0
457 bnes super_read
458 rts
459user_read:
460 movel %d1,-(%sp) |preserve d1 just in case
461 movel %d0,-(%sp)
462 movel %a1,-(%sp)
463 movel %a0,-(%sp)
464 jsr copyin
465 addw #12,%sp
466 movel (%sp)+,%d1
467 rts
468
469|
470| Use these routines if your kernel doesn't have copyout/copyin equivalents.
471| Assumes that D0/D1/A0/A1 are scratch registers. copyout overwrites DFC,
472| and copyin overwrites SFC.
473|
474copyout:
475 movel 4(%sp),%a0 | source
476 movel 8(%sp),%a1 | destination
477 movel 12(%sp),%d0 | count
478 subl #1,%d0 | dec count by 1 for dbra
479 movel #1,%d1
480
481| DFC is already set
482| movec %d1,%DFC | set dfc for user data space
483moreout:
484 moveb (%a0)+,%d1 | fetch supervisor byte
485out_ea:
486 movesb %d1,(%a1)+ | write user byte
487 dbf %d0,moreout
488 rts
489
490copyin:
491 movel 4(%sp),%a0 | source
492 movel 8(%sp),%a1 | destination
493 movel 12(%sp),%d0 | count
494 subl #1,%d0 | dec count by 1 for dbra
495 movel #1,%d1
496| SFC is already set
497| movec %d1,%SFC | set sfc for user space
498morein:
499in_ea:
500 movesb (%a0)+,%d1 | fetch user byte
501 moveb %d1,(%a1)+ | write supervisor byte
502 dbf %d0,morein
503 rts
504
505 .section .fixup,#alloc,#execinstr
506 .even
5071:
508 jbra fpsp040_die
509
510 .section __ex_table,#alloc
511 .align 4
512
513 .long in_ea,1b
514 .long out_ea,1b
515
516 |end
diff --git a/arch/m68k/fpsp040/slog2.S b/arch/m68k/fpsp040/slog2.S
new file mode 100644
index 000000000000..517fa4563246
--- /dev/null
+++ b/arch/m68k/fpsp040/slog2.S
@@ -0,0 +1,188 @@
1|
2| slog2.sa 3.1 12/10/90
3|
4| The entry point slog10 computes the base-10
5| logarithm of an input argument X.
6| slog10d does the same except the input value is a
7| denormalized number.
8| sLog2 and sLog2d are the base-2 analogues.
9|
10| INPUT: Double-extended value in memory location pointed to
11| by address register a0.
12|
13| OUTPUT: log_10(X) or log_2(X) returned in floating-point
14| register fp0.
15|
16| ACCURACY and MONOTONICITY: The returned result is within 1.7
17| ulps in 64 significant bit, i.e. within 0.5003 ulp
18| to 53 bits if the result is subsequently rounded
19| to double precision. The result is provably monotonic
20| in double precision.
21|
22| SPEED: Two timings are measured, both in the copy-back mode.
23| The first one is measured when the function is invoked
24| the first time (so the instructions and data are not
25| in cache), and the second one is measured when the
26| function is reinvoked at the same input argument.
27|
28| ALGORITHM and IMPLEMENTATION NOTES:
29|
30| slog10d:
31|
32| Step 0. If X < 0, create a NaN and raise the invalid operation
33| flag. Otherwise, save FPCR in D1; set FpCR to default.
34| Notes: Default means round-to-nearest mode, no floating-point
35| traps, and precision control = double extended.
36|
37| Step 1. Call slognd to obtain Y = log(X), the natural log of X.
38| Notes: Even if X is denormalized, log(X) is always normalized.
39|
40| Step 2. Compute log_10(X) = log(X) * (1/log(10)).
41| 2.1 Restore the user FPCR
42| 2.2 Return ans := Y * INV_L10.
43|
44|
45| slog10:
46|
47| Step 0. If X < 0, create a NaN and raise the invalid operation
48| flag. Otherwise, save FPCR in D1; set FpCR to default.
49| Notes: Default means round-to-nearest mode, no floating-point
50| traps, and precision control = double extended.
51|
52| Step 1. Call sLogN to obtain Y = log(X), the natural log of X.
53|
54| Step 2. Compute log_10(X) = log(X) * (1/log(10)).
55| 2.1 Restore the user FPCR
56| 2.2 Return ans := Y * INV_L10.
57|
58|
59| sLog2d:
60|
61| Step 0. If X < 0, create a NaN and raise the invalid operation
62| flag. Otherwise, save FPCR in D1; set FpCR to default.
63| Notes: Default means round-to-nearest mode, no floating-point
64| traps, and precision control = double extended.
65|
66| Step 1. Call slognd to obtain Y = log(X), the natural log of X.
67| Notes: Even if X is denormalized, log(X) is always normalized.
68|
69| Step 2. Compute log_10(X) = log(X) * (1/log(2)).
70| 2.1 Restore the user FPCR
71| 2.2 Return ans := Y * INV_L2.
72|
73|
74| sLog2:
75|
76| Step 0. If X < 0, create a NaN and raise the invalid operation
77| flag. Otherwise, save FPCR in D1; set FpCR to default.
78| Notes: Default means round-to-nearest mode, no floating-point
79| traps, and precision control = double extended.
80|
81| Step 1. If X is not an integer power of two, i.e., X != 2^k,
82| go to Step 3.
83|
84| Step 2. Return k.
85| 2.1 Get integer k, X = 2^k.
86| 2.2 Restore the user FPCR.
87| 2.3 Return ans := convert-to-double-extended(k).
88|
89| Step 3. Call sLogN to obtain Y = log(X), the natural log of X.
90|
91| Step 4. Compute log_2(X) = log(X) * (1/log(2)).
92| 4.1 Restore the user FPCR
93| 4.2 Return ans := Y * INV_L2.
94|
95
96| Copyright (C) Motorola, Inc. 1990
97| All Rights Reserved
98|
99| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
100| The copyright notice above does not evidence any
101| actual or intended publication of such source code.
102
103|SLOG2 idnt 2,1 | Motorola 040 Floating Point Software Package
104
105 |section 8
106
107 |xref t_frcinx
108 |xref t_operr
109 |xref slogn
110 |xref slognd
111
112INV_L10: .long 0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000
113
114INV_L2: .long 0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000
115
116 .global slog10d
117slog10d:
118|--entry point for Log10(X), X is denormalized
119 movel (%a0),%d0
120 blt invalid
121 movel %d1,-(%sp)
122 clrl %d1
123 bsr slognd | ...log(X), X denorm.
124 fmovel (%sp)+,%fpcr
125 fmulx INV_L10,%fp0
126 bra t_frcinx
127
128 .global slog10
129slog10:
130|--entry point for Log10(X), X is normalized
131
132 movel (%a0),%d0
133 blt invalid
134 movel %d1,-(%sp)
135 clrl %d1
136 bsr slogn | ...log(X), X normal.
137 fmovel (%sp)+,%fpcr
138 fmulx INV_L10,%fp0
139 bra t_frcinx
140
141
142 .global slog2d
143slog2d:
144|--entry point for Log2(X), X is denormalized
145
146 movel (%a0),%d0
147 blt invalid
148 movel %d1,-(%sp)
149 clrl %d1
150 bsr slognd | ...log(X), X denorm.
151 fmovel (%sp)+,%fpcr
152 fmulx INV_L2,%fp0
153 bra t_frcinx
154
155 .global slog2
156slog2:
157|--entry point for Log2(X), X is normalized
158 movel (%a0),%d0
159 blt invalid
160
161 movel 8(%a0),%d0
162 bnes continue | ...X is not 2^k
163
164 movel 4(%a0),%d0
165 andl #0x7FFFFFFF,%d0
166 tstl %d0
167 bnes continue
168
169|--X = 2^k.
170 movew (%a0),%d0
171 andl #0x00007FFF,%d0
172 subl #0x3FFF,%d0
173 fmovel %d1,%fpcr
174 fmovel %d0,%fp0
175 bra t_frcinx
176
177continue:
178 movel %d1,-(%sp)
179 clrl %d1
180 bsr slogn | ...log(X), X normal.
181 fmovel (%sp)+,%fpcr
182 fmulx INV_L2,%fp0
183 bra t_frcinx
184
185invalid:
186 bra t_operr
187
188 |end
diff --git a/arch/m68k/fpsp040/slogn.S b/arch/m68k/fpsp040/slogn.S
new file mode 100644
index 000000000000..2aaa0725c035
--- /dev/null
+++ b/arch/m68k/fpsp040/slogn.S
@@ -0,0 +1,592 @@
1|
2| slogn.sa 3.1 12/10/90
3|
4| slogn computes the natural logarithm of an
5| input value. slognd does the same except the input value is a
6| denormalized number. slognp1 computes log(1+X), and slognp1d
7| computes log(1+X) for denormalized X.
8|
9| Input: Double-extended value in memory location pointed to by address
10| register a0.
11|
12| Output: log(X) or log(1+X) returned in floating-point register Fp0.
13|
14| Accuracy and Monotonicity: The returned result is within 2 ulps in
15| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
16| result is subsequently rounded to double precision. The
17| result is provably monotonic in double precision.
18|
19| Speed: The program slogn takes approximately 190 cycles for input
20| argument X such that |X-1| >= 1/16, which is the usual
21| situation. For those arguments, slognp1 takes approximately
22| 210 cycles. For the less common arguments, the program will
23| run no worse than 10% slower.
24|
25| Algorithm:
26| LOGN:
27| Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in
28| u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2.
29|
30| Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven
31| significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base
32| 2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7).
33|
34| Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u,
35| log(1+u) = poly.
36|
37| Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)
38| by k*log(2) + (log(F) + poly). The values of log(F) are calculated
39| beforehand and stored in the program.
40|
41| lognp1:
42| Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in
43| u where u = 2X/(2+X). Otherwise, move on to Step 2.
44|
45| Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2
46| of the algorithm for LOGN and compute log(1+X) as
47| k*log(2) + log(F) + poly where poly approximates log(1+u),
48| u = (Y-F)/F.
49|
50| Implementation Notes:
51| Note 1. There are 64 different possible values for F, thus 64 log(F)'s
52| need to be tabulated. Moreover, the values of 1/F are also
53| tabulated so that the division in (Y-F)/F can be performed by a
54| multiplication.
55|
56| Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value
57| Y-F has to be calculated carefully when 1/2 <= X < 3/2.
58|
59| Note 3. To fully exploit the pipeline, polynomials are usually separated
60| into two parts evaluated independently before being added up.
61|
62
63| Copyright (C) Motorola, Inc. 1990
64| All Rights Reserved
65|
66| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
67| The copyright notice above does not evidence any
68| actual or intended publication of such source code.
69
70|slogn idnt 2,1 | Motorola 040 Floating Point Software Package
71
72 |section 8
73
74#include "fpsp.h"
75
76BOUNDS1: .long 0x3FFEF07D,0x3FFF8841
77BOUNDS2: .long 0x3FFE8000,0x3FFFC000
78
79LOGOF2: .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
80
81one: .long 0x3F800000
82zero: .long 0x00000000
83infty: .long 0x7F800000
84negone: .long 0xBF800000
85
86LOGA6: .long 0x3FC2499A,0xB5E4040B
87LOGA5: .long 0xBFC555B5,0x848CB7DB
88
89LOGA4: .long 0x3FC99999,0x987D8730
90LOGA3: .long 0xBFCFFFFF,0xFF6F7E97
91
92LOGA2: .long 0x3FD55555,0x555555a4
93LOGA1: .long 0xBFE00000,0x00000008
94
95LOGB5: .long 0x3F175496,0xADD7DAD6
96LOGB4: .long 0x3F3C71C2,0xFE80C7E0
97
98LOGB3: .long 0x3F624924,0x928BCCFF
99LOGB2: .long 0x3F899999,0x999995EC
100
101LOGB1: .long 0x3FB55555,0x55555555
102TWO: .long 0x40000000,0x00000000
103
104LTHOLD: .long 0x3f990000,0x80000000,0x00000000,0x00000000
105
106LOGTBL:
107 .long 0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
108 .long 0x3FF70000,0xFF015358,0x833C47E2,0x00000000
109 .long 0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
110 .long 0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
111 .long 0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
112 .long 0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
113 .long 0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
114 .long 0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
115 .long 0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
116 .long 0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
117 .long 0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
118 .long 0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
119 .long 0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
120 .long 0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
121 .long 0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
122 .long 0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
123 .long 0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
124 .long 0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
125 .long 0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
126 .long 0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
127 .long 0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
128 .long 0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
129 .long 0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
130 .long 0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
131 .long 0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
132 .long 0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
133 .long 0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
134 .long 0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
135 .long 0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
136 .long 0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
137 .long 0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
138 .long 0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
139 .long 0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
140 .long 0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
141 .long 0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
142 .long 0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
143 .long 0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
144 .long 0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
145 .long 0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
146 .long 0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
147 .long 0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
148 .long 0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
149 .long 0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
150 .long 0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
151 .long 0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
152 .long 0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
153 .long 0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
154 .long 0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
155 .long 0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
156 .long 0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
157 .long 0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
158 .long 0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
159 .long 0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
160 .long 0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
161 .long 0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
162 .long 0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
163 .long 0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
164 .long 0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
165 .long 0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
166 .long 0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
167 .long 0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
168 .long 0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
169 .long 0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
170 .long 0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
171 .long 0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
172 .long 0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
173 .long 0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
174 .long 0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
175 .long 0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
176 .long 0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
177 .long 0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
178 .long 0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
179 .long 0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
180 .long 0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
181 .long 0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
182 .long 0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
183 .long 0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
184 .long 0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
185 .long 0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
186 .long 0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
187 .long 0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
188 .long 0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
189 .long 0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
190 .long 0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
191 .long 0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
192 .long 0x3FFE0000,0x825EFCED,0x49369330,0x00000000
193 .long 0x3FFE0000,0x9868C809,0x868C8098,0x00000000
194 .long 0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
195 .long 0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
196 .long 0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
197 .long 0x3FFE0000,0x95A02568,0x095A0257,0x00000000
198 .long 0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
199 .long 0x3FFE0000,0x94458094,0x45809446,0x00000000
200 .long 0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
201 .long 0x3FFE0000,0x92F11384,0x0497889C,0x00000000
202 .long 0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
203 .long 0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
204 .long 0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
205 .long 0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
206 .long 0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
207 .long 0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
208 .long 0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
209 .long 0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
210 .long 0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
211 .long 0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
212 .long 0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
213 .long 0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
214 .long 0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
215 .long 0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
216 .long 0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
217 .long 0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
218 .long 0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
219 .long 0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
220 .long 0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
221 .long 0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
222 .long 0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
223 .long 0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
224 .long 0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
225 .long 0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
226 .long 0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
227 .long 0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
228 .long 0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
229 .long 0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
230 .long 0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
231 .long 0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
232 .long 0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
233 .long 0x3FFE0000,0x80808080,0x80808081,0x00000000
234 .long 0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
235
236 .set ADJK,L_SCR1
237
238 .set X,FP_SCR1
239 .set XDCARE,X+2
240 .set XFRAC,X+4
241
242 .set F,FP_SCR2
243 .set FFRAC,F+4
244
245 .set KLOG2,FP_SCR3
246
247 .set SAVEU,FP_SCR4
248
249 | xref t_frcinx
250 |xref t_extdnrm
251 |xref t_operr
252 |xref t_dz
253
254 .global slognd
255slognd:
256|--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
257
258 movel #-100,ADJK(%a6) | ...INPUT = 2^(ADJK) * FP0
259
260|----normalize the input value by left shifting k bits (k to be determined
261|----below), adjusting exponent and storing -k to ADJK
262|----the value TWOTO100 is no longer needed.
263|----Note that this code assumes the denormalized input is NON-ZERO.
264
265 moveml %d2-%d7,-(%a7) | ...save some registers
266 movel #0x00000000,%d3 | ...D3 is exponent of smallest norm. #
267 movel 4(%a0),%d4
268 movel 8(%a0),%d5 | ...(D4,D5) is (Hi_X,Lo_X)
269 clrl %d2 | ...D2 used for holding K
270
271 tstl %d4
272 bnes HiX_not0
273
274HiX_0:
275 movel %d5,%d4
276 clrl %d5
277 movel #32,%d2
278 clrl %d6
279 bfffo %d4{#0:#32},%d6
280 lsll %d6,%d4
281 addl %d6,%d2 | ...(D3,D4,D5) is normalized
282
283 movel %d3,X(%a6)
284 movel %d4,XFRAC(%a6)
285 movel %d5,XFRAC+4(%a6)
286 negl %d2
287 movel %d2,ADJK(%a6)
288 fmovex X(%a6),%fp0
289 moveml (%a7)+,%d2-%d7 | ...restore registers
290 lea X(%a6),%a0
291 bras LOGBGN | ...begin regular log(X)
292
293
294HiX_not0:
295 clrl %d6
296 bfffo %d4{#0:#32},%d6 | ...find first 1
297 movel %d6,%d2 | ...get k
298 lsll %d6,%d4
299 movel %d5,%d7 | ...a copy of D5
300 lsll %d6,%d5
301 negl %d6
302 addil #32,%d6
303 lsrl %d6,%d7
304 orl %d7,%d4 | ...(D3,D4,D5) normalized
305
306 movel %d3,X(%a6)
307 movel %d4,XFRAC(%a6)
308 movel %d5,XFRAC+4(%a6)
309 negl %d2
310 movel %d2,ADJK(%a6)
311 fmovex X(%a6),%fp0
312 moveml (%a7)+,%d2-%d7 | ...restore registers
313 lea X(%a6),%a0
314 bras LOGBGN | ...begin regular log(X)
315
316
317 .global slogn
318slogn:
319|--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
320
321 fmovex (%a0),%fp0 | ...LOAD INPUT
322 movel #0x00000000,ADJK(%a6)
323
324LOGBGN:
325|--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
326|--A FINITE, NON-ZERO, NORMALIZED NUMBER.
327
328 movel (%a0),%d0
329 movew 4(%a0),%d0
330
331 movel (%a0),X(%a6)
332 movel 4(%a0),X+4(%a6)
333 movel 8(%a0),X+8(%a6)
334
335 cmpil #0,%d0 | ...CHECK IF X IS NEGATIVE
336 blt LOGNEG | ...LOG OF NEGATIVE ARGUMENT IS INVALID
337 cmp2l BOUNDS1,%d0 | ...X IS POSITIVE, CHECK IF X IS NEAR 1
338 bcc LOGNEAR1 | ...BOUNDS IS ROUGHLY [15/16, 17/16]
339
340LOGMAIN:
341|--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
342
343|--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
344|--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
345|--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
346|-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
347|--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
348|--LOG(1+U) CAN BE VERY EFFICIENT.
349|--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
350|--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
351
352|--GET K, Y, F, AND ADDRESS OF 1/F.
353 asrl #8,%d0
354 asrl #8,%d0 | ...SHIFTED 16 BITS, BIASED EXPO. OF X
355 subil #0x3FFF,%d0 | ...THIS IS K
356 addl ADJK(%a6),%d0 | ...ADJUST K, ORIGINAL INPUT MAY BE DENORM.
357 lea LOGTBL,%a0 | ...BASE ADDRESS OF 1/F AND LOG(F)
358 fmovel %d0,%fp1 | ...CONVERT K TO FLOATING-POINT FORMAT
359
360|--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
361 movel #0x3FFF0000,X(%a6) | ...X IS NOW Y, I.E. 2^(-K)*X
362 movel XFRAC(%a6),FFRAC(%a6)
363 andil #0xFE000000,FFRAC(%a6) | ...FIRST 7 BITS OF Y
364 oril #0x01000000,FFRAC(%a6) | ...GET F: ATTACH A 1 AT THE EIGHTH BIT
365 movel FFRAC(%a6),%d0 | ...READY TO GET ADDRESS OF 1/F
366 andil #0x7E000000,%d0
367 asrl #8,%d0
368 asrl #8,%d0
369 asrl #4,%d0 | ...SHIFTED 20, D0 IS THE DISPLACEMENT
370 addal %d0,%a0 | ...A0 IS THE ADDRESS FOR 1/F
371
372 fmovex X(%a6),%fp0
373 movel #0x3fff0000,F(%a6)
374 clrl F+8(%a6)
375 fsubx F(%a6),%fp0 | ...Y-F
376 fmovemx %fp2-%fp2/%fp3,-(%sp) | ...SAVE FP2 WHILE FP0 IS NOT READY
377|--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
378|--REGISTERS SAVED: FPCR, FP1, FP2
379
380LP1CONT1:
381|--AN RE-ENTRY POINT FOR LOGNP1
382 fmulx (%a0),%fp0 | ...FP0 IS U = (Y-F)/F
383 fmulx LOGOF2,%fp1 | ...GET K*LOG2 WHILE FP0 IS NOT READY
384 fmovex %fp0,%fp2
385 fmulx %fp2,%fp2 | ...FP2 IS V=U*U
386 fmovex %fp1,KLOG2(%a6) | ...PUT K*LOG2 IN MEMORY, FREE FP1
387
388|--LOG(1+U) IS APPROXIMATED BY
389|--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
390|--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))]
391
392 fmovex %fp2,%fp3
393 fmovex %fp2,%fp1
394
395 fmuld LOGA6,%fp1 | ...V*A6
396 fmuld LOGA5,%fp2 | ...V*A5
397
398 faddd LOGA4,%fp1 | ...A4+V*A6
399 faddd LOGA3,%fp2 | ...A3+V*A5
400
401 fmulx %fp3,%fp1 | ...V*(A4+V*A6)
402 fmulx %fp3,%fp2 | ...V*(A3+V*A5)
403
404 faddd LOGA2,%fp1 | ...A2+V*(A4+V*A6)
405 faddd LOGA1,%fp2 | ...A1+V*(A3+V*A5)
406
407 fmulx %fp3,%fp1 | ...V*(A2+V*(A4+V*A6))
408 addal #16,%a0 | ...ADDRESS OF LOG(F)
409 fmulx %fp3,%fp2 | ...V*(A1+V*(A3+V*A5)), FP3 RELEASED
410
411 fmulx %fp0,%fp1 | ...U*V*(A2+V*(A4+V*A6))
412 faddx %fp2,%fp0 | ...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED
413
414 faddx (%a0),%fp1 | ...LOG(F)+U*V*(A2+V*(A4+V*A6))
415 fmovemx (%sp)+,%fp2-%fp2/%fp3 | ...RESTORE FP2
416 faddx %fp1,%fp0 | ...FP0 IS LOG(F) + LOG(1+U)
417
418 fmovel %d1,%fpcr
419 faddx KLOG2(%a6),%fp0 | ...FINAL ADD
420 bra t_frcinx
421
422
423LOGNEAR1:
424|--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
425 fmovex %fp0,%fp1
426 fsubs one,%fp1 | ...FP1 IS X-1
427 fadds one,%fp0 | ...FP0 IS X+1
428 faddx %fp1,%fp1 | ...FP1 IS 2(X-1)
429|--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
430|--IN U, U = 2(X-1)/(X+1) = FP1/FP0
431
432LP1CONT2:
433|--THIS IS AN RE-ENTRY POINT FOR LOGNP1
434 fdivx %fp0,%fp1 | ...FP1 IS U
435 fmovemx %fp2-%fp2/%fp3,-(%sp) | ...SAVE FP2
436|--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
437|--LET V=U*U, W=V*V, CALCULATE
438|--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
439|--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] )
440 fmovex %fp1,%fp0
441 fmulx %fp0,%fp0 | ...FP0 IS V
442 fmovex %fp1,SAVEU(%a6) | ...STORE U IN MEMORY, FREE FP1
443 fmovex %fp0,%fp1
444 fmulx %fp1,%fp1 | ...FP1 IS W
445
446 fmoved LOGB5,%fp3
447 fmoved LOGB4,%fp2
448
449 fmulx %fp1,%fp3 | ...W*B5
450 fmulx %fp1,%fp2 | ...W*B4
451
452 faddd LOGB3,%fp3 | ...B3+W*B5
453 faddd LOGB2,%fp2 | ...B2+W*B4
454
455 fmulx %fp3,%fp1 | ...W*(B3+W*B5), FP3 RELEASED
456
457 fmulx %fp0,%fp2 | ...V*(B2+W*B4)
458
459 faddd LOGB1,%fp1 | ...B1+W*(B3+W*B5)
460 fmulx SAVEU(%a6),%fp0 | ...FP0 IS U*V
461
462 faddx %fp2,%fp1 | ...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
463 fmovemx (%sp)+,%fp2-%fp2/%fp3 | ...FP2 RESTORED
464
465 fmulx %fp1,%fp0 | ...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
466
467 fmovel %d1,%fpcr
468 faddx SAVEU(%a6),%fp0
469 bra t_frcinx
470 rts
471
472LOGNEG:
473|--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
474 bra t_operr
475
476 .global slognp1d
477slognp1d:
478|--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
479| Simply return the denorm
480
481 bra t_extdnrm
482
483 .global slognp1
484slognp1:
485|--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
486
487 fmovex (%a0),%fp0 | ...LOAD INPUT
488 fabsx %fp0 |test magnitude
489 fcmpx LTHOLD,%fp0 |compare with min threshold
490 fbgt LP1REAL |if greater, continue
491 fmovel #0,%fpsr |clr N flag from compare
492 fmovel %d1,%fpcr
493 fmovex (%a0),%fp0 |return signed argument
494 bra t_frcinx
495
496LP1REAL:
497 fmovex (%a0),%fp0 | ...LOAD INPUT
498 movel #0x00000000,ADJK(%a6)
499 fmovex %fp0,%fp1 | ...FP1 IS INPUT Z
500 fadds one,%fp0 | ...X := ROUND(1+Z)
501 fmovex %fp0,X(%a6)
502 movew XFRAC(%a6),XDCARE(%a6)
503 movel X(%a6),%d0
504 cmpil #0,%d0
505 ble LP1NEG0 | ...LOG OF ZERO OR -VE
506 cmp2l BOUNDS2,%d0
507 bcs LOGMAIN | ...BOUNDS2 IS [1/2,3/2]
508|--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
509|--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
510|--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
511
512LP1NEAR1:
513|--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
514 cmp2l BOUNDS1,%d0
515 bcss LP1CARE
516
517LP1ONE16:
518|--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
519|--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
520 faddx %fp1,%fp1 | ...FP1 IS 2Z
521 fadds one,%fp0 | ...FP0 IS 1+X
522|--U = FP1/FP0
523 bra LP1CONT2
524
525LP1CARE:
526|--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
527|--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
528|--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
529|--THERE ARE ONLY TWO CASES.
530|--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
531|--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z
532|--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
533|--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
534
535 movel XFRAC(%a6),FFRAC(%a6)
536 andil #0xFE000000,FFRAC(%a6)
537 oril #0x01000000,FFRAC(%a6) | ...F OBTAINED
538 cmpil #0x3FFF8000,%d0 | ...SEE IF 1+Z > 1
539 bges KISZERO
540
541KISNEG1:
542 fmoves TWO,%fp0
543 movel #0x3fff0000,F(%a6)
544 clrl F+8(%a6)
545 fsubx F(%a6),%fp0 | ...2-F
546 movel FFRAC(%a6),%d0
547 andil #0x7E000000,%d0
548 asrl #8,%d0
549 asrl #8,%d0
550 asrl #4,%d0 | ...D0 CONTAINS DISPLACEMENT FOR 1/F
551 faddx %fp1,%fp1 | ...GET 2Z
552 fmovemx %fp2-%fp2/%fp3,-(%sp) | ...SAVE FP2
553 faddx %fp1,%fp0 | ...FP0 IS Y-F = (2-F)+2Z
554 lea LOGTBL,%a0 | ...A0 IS ADDRESS OF 1/F
555 addal %d0,%a0
556 fmoves negone,%fp1 | ...FP1 IS K = -1
557 bra LP1CONT1
558
559KISZERO:
560 fmoves one,%fp0
561 movel #0x3fff0000,F(%a6)
562 clrl F+8(%a6)
563 fsubx F(%a6),%fp0 | ...1-F
564 movel FFRAC(%a6),%d0
565 andil #0x7E000000,%d0
566 asrl #8,%d0
567 asrl #8,%d0
568 asrl #4,%d0
569 faddx %fp1,%fp0 | ...FP0 IS Y-F
570 fmovemx %fp2-%fp2/%fp3,-(%sp) | ...FP2 SAVED
571 lea LOGTBL,%a0
572 addal %d0,%a0 | ...A0 IS ADDRESS OF 1/F
573 fmoves zero,%fp1 | ...FP1 IS K = 0
574 bra LP1CONT1
575
576LP1NEG0:
577|--FPCR SAVED. D0 IS X IN COMPACT FORM.
578 cmpil #0,%d0
579 blts LP1NEG
580LP1ZERO:
581 fmoves negone,%fp0
582
583 fmovel %d1,%fpcr
584 bra t_dz
585
586LP1NEG:
587 fmoves zero,%fp0
588
589 fmovel %d1,%fpcr
590 bra t_operr
591
592 |end
diff --git a/arch/m68k/fpsp040/smovecr.S b/arch/m68k/fpsp040/smovecr.S
new file mode 100644
index 000000000000..a0127fa55e9c
--- /dev/null
+++ b/arch/m68k/fpsp040/smovecr.S
@@ -0,0 +1,162 @@
1|
2| smovecr.sa 3.1 12/10/90
3|
4| The entry point sMOVECR returns the constant at the
5| offset given in the instruction field.
6|
7| Input: An offset in the instruction word.
8|
9| Output: The constant rounded to the user's rounding
10| mode unchecked for overflow.
11|
12| Modified: fp0.
13|
14|
15| Copyright (C) Motorola, Inc. 1990
16| All Rights Reserved
17|
18| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
19| The copyright notice above does not evidence any
20| actual or intended publication of such source code.
21
22|SMOVECR idnt 2,1 | Motorola 040 Floating Point Software Package
23
24 |section 8
25
26#include "fpsp.h"
27
28 |xref nrm_set
29 |xref round
30 |xref PIRN
31 |xref PIRZRM
32 |xref PIRP
33 |xref SMALRN
34 |xref SMALRZRM
35 |xref SMALRP
36 |xref BIGRN
37 |xref BIGRZRM
38 |xref BIGRP
39
40FZERO: .long 00000000
41|
42| FMOVECR
43|
44 .global smovcr
45smovcr:
46 bfextu CMDREG1B(%a6){#9:#7},%d0 |get offset
47 bfextu USER_FPCR(%a6){#26:#2},%d1 |get rmode
48|
49| check range of offset
50|
51 tstb %d0 |if zero, offset is to pi
52 beqs PI_TBL |it is pi
53 cmpib #0x0a,%d0 |check range $01 - $0a
54 bles Z_VAL |if in this range, return zero
55 cmpib #0x0e,%d0 |check range $0b - $0e
56 bles SM_TBL |valid constants in this range
57 cmpib #0x2f,%d0 |check range $10 - $2f
58 bles Z_VAL |if in this range, return zero
59 cmpib #0x3f,%d0 |check range $30 - $3f
60 ble BG_TBL |valid constants in this range
61Z_VAL:
62 fmoves FZERO,%fp0
63 rts
64PI_TBL:
65 tstb %d1 |offset is zero, check for rmode
66 beqs PI_RN |if zero, rn mode
67 cmpib #0x3,%d1 |check for rp
68 beqs PI_RP |if 3, rp mode
69PI_RZRM:
70 leal PIRZRM,%a0 |rmode is rz or rm, load PIRZRM in a0
71 bra set_finx
72PI_RN:
73 leal PIRN,%a0 |rmode is rn, load PIRN in a0
74 bra set_finx
75PI_RP:
76 leal PIRP,%a0 |rmode is rp, load PIRP in a0
77 bra set_finx
78SM_TBL:
79 subil #0xb,%d0 |make offset in 0 - 4 range
80 tstb %d1 |check for rmode
81 beqs SM_RN |if zero, rn mode
82 cmpib #0x3,%d1 |check for rp
83 beqs SM_RP |if 3, rp mode
84SM_RZRM:
85 leal SMALRZRM,%a0 |rmode is rz or rm, load SMRZRM in a0
86 cmpib #0x2,%d0 |check if result is inex
87 ble set_finx |if 0 - 2, it is inexact
88 bra no_finx |if 3, it is exact
89SM_RN:
90 leal SMALRN,%a0 |rmode is rn, load SMRN in a0
91 cmpib #0x2,%d0 |check if result is inex
92 ble set_finx |if 0 - 2, it is inexact
93 bra no_finx |if 3, it is exact
94SM_RP:
95 leal SMALRP,%a0 |rmode is rp, load SMRP in a0
96 cmpib #0x2,%d0 |check if result is inex
97 ble set_finx |if 0 - 2, it is inexact
98 bra no_finx |if 3, it is exact
99BG_TBL:
100 subil #0x30,%d0 |make offset in 0 - f range
101 tstb %d1 |check for rmode
102 beqs BG_RN |if zero, rn mode
103 cmpib #0x3,%d1 |check for rp
104 beqs BG_RP |if 3, rp mode
105BG_RZRM:
106 leal BIGRZRM,%a0 |rmode is rz or rm, load BGRZRM in a0
107 cmpib #0x1,%d0 |check if result is inex
108 ble set_finx |if 0 - 1, it is inexact
109 cmpib #0x7,%d0 |second check
110 ble no_finx |if 0 - 7, it is exact
111 bra set_finx |if 8 - f, it is inexact
112BG_RN:
113 leal BIGRN,%a0 |rmode is rn, load BGRN in a0
114 cmpib #0x1,%d0 |check if result is inex
115 ble set_finx |if 0 - 1, it is inexact
116 cmpib #0x7,%d0 |second check
117 ble no_finx |if 0 - 7, it is exact
118 bra set_finx |if 8 - f, it is inexact
119BG_RP:
120 leal BIGRP,%a0 |rmode is rp, load SMRP in a0
121 cmpib #0x1,%d0 |check if result is inex
122 ble set_finx |if 0 - 1, it is inexact
123 cmpib #0x7,%d0 |second check
124 ble no_finx |if 0 - 7, it is exact
125| bra set_finx ;if 8 - f, it is inexact
126set_finx:
127 orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex
128no_finx:
129 mulul #12,%d0 |use offset to point into tables
130 movel %d1,L_SCR1(%a6) |load mode for round call
131 bfextu USER_FPCR(%a6){#24:#2},%d1 |get precision
132 tstl %d1 |check if extended precision
133|
134| Precision is extended
135|
136 bnes not_ext |if extended, do not call round
137 fmovemx (%a0,%d0),%fp0-%fp0 |return result in fp0
138 rts
139|
140| Precision is single or double
141|
142not_ext:
143 swap %d1 |rnd prec in upper word of d1
144 addl L_SCR1(%a6),%d1 |merge rmode in low word of d1
145 movel (%a0,%d0),FP_SCR1(%a6) |load first word to temp storage
146 movel 4(%a0,%d0),FP_SCR1+4(%a6) |load second word
147 movel 8(%a0,%d0),FP_SCR1+8(%a6) |load third word
148 clrl %d0 |clear g,r,s
149 lea FP_SCR1(%a6),%a0
150 btstb #sign_bit,LOCAL_EX(%a0)
151 sne LOCAL_SGN(%a0) |convert to internal ext. format
152
153 bsr round |go round the mantissa
154
155 bfclr LOCAL_SGN(%a0){#0:#8} |convert back to IEEE ext format
156 beqs fin_fcr
157 bsetb #sign_bit,LOCAL_EX(%a0)
158fin_fcr:
159 fmovemx (%a0),%fp0-%fp0
160 rts
161
162 |end
diff --git a/arch/m68k/fpsp040/srem_mod.S b/arch/m68k/fpsp040/srem_mod.S
new file mode 100644
index 000000000000..8c8d7f50cc68
--- /dev/null
+++ b/arch/m68k/fpsp040/srem_mod.S
@@ -0,0 +1,422 @@
1|
2| srem_mod.sa 3.1 12/10/90
3|
4| The entry point sMOD computes the floating point MOD of the
5| input values X and Y. The entry point sREM computes the floating
6| point (IEEE) REM of the input values X and Y.
7|
8| INPUT
9| -----
10| Double-extended value Y is pointed to by address in register
11| A0. Double-extended value X is located in -12(A0). The values
12| of X and Y are both nonzero and finite; although either or both
13| of them can be denormalized. The special cases of zeros, NaNs,
14| and infinities are handled elsewhere.
15|
16| OUTPUT
17| ------
18| FREM(X,Y) or FMOD(X,Y), depending on entry point.
19|
20| ALGORITHM
21| ---------
22|
23| Step 1. Save and strip signs of X and Y: signX := sign(X),
24| signY := sign(Y), X := |X|, Y := |Y|,
25| signQ := signX EOR signY. Record whether MOD or REM
26| is requested.
27|
28| Step 2. Set L := expo(X)-expo(Y), k := 0, Q := 0.
29| If (L < 0) then
30| R := X, go to Step 4.
31| else
32| R := 2^(-L)X, j := L.
33| endif
34|
35| Step 3. Perform MOD(X,Y)
36| 3.1 If R = Y, go to Step 9.
37| 3.2 If R > Y, then { R := R - Y, Q := Q + 1}
38| 3.3 If j = 0, go to Step 4.
39| 3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to
40| Step 3.1.
41|
42| Step 4. At this point, R = X - QY = MOD(X,Y). Set
43| Last_Subtract := false (used in Step 7 below). If
44| MOD is requested, go to Step 6.
45|
46| Step 5. R = MOD(X,Y), but REM(X,Y) is requested.
47| 5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to
48| Step 6.
49| 5.2 If R > Y/2, then { set Last_Subtract := true,
50| Q := Q + 1, Y := signY*Y }. Go to Step 6.
51| 5.3 This is the tricky case of R = Y/2. If Q is odd,
52| then { Q := Q + 1, signX := -signX }.
53|
54| Step 6. R := signX*R.
55|
56| Step 7. If Last_Subtract = true, R := R - Y.
57|
58| Step 8. Return signQ, last 7 bits of Q, and R as required.
59|
60| Step 9. At this point, R = 2^(-j)*X - Q Y = Y. Thus,
61| X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1),
62| R := 0. Return signQ, last 7 bits of Q, and R.
63|
64|
65
66| Copyright (C) Motorola, Inc. 1990
67| All Rights Reserved
68|
69| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
70| The copyright notice above does not evidence any
71| actual or intended publication of such source code.
72
73SREM_MOD: |idnt 2,1 | Motorola 040 Floating Point Software Package
74
75 |section 8
76
77#include "fpsp.h"
78
79 .set Mod_Flag,L_SCR3
80 .set SignY,FP_SCR3+4
81 .set SignX,FP_SCR3+8
82 .set SignQ,FP_SCR3+12
83 .set Sc_Flag,FP_SCR4
84
85 .set Y,FP_SCR1
86 .set Y_Hi,Y+4
87 .set Y_Lo,Y+8
88
89 .set R,FP_SCR2
90 .set R_Hi,R+4
91 .set R_Lo,R+8
92
93
94Scale: .long 0x00010000,0x80000000,0x00000000,0x00000000
95
96 |xref t_avoid_unsupp
97
98 .global smod
99smod:
100
101 movel #0,Mod_Flag(%a6)
102 bras Mod_Rem
103
104 .global srem
105srem:
106
107 movel #1,Mod_Flag(%a6)
108
109Mod_Rem:
110|..Save sign of X and Y
111 moveml %d2-%d7,-(%a7) | ...save data registers
112 movew (%a0),%d3
113 movew %d3,SignY(%a6)
114 andil #0x00007FFF,%d3 | ...Y := |Y|
115
116|
117 movel 4(%a0),%d4
118 movel 8(%a0),%d5 | ...(D3,D4,D5) is |Y|
119
120 tstl %d3
121 bnes Y_Normal
122
123 movel #0x00003FFE,%d3 | ...$3FFD + 1
124 tstl %d4
125 bnes HiY_not0
126
127HiY_0:
128 movel %d5,%d4
129 clrl %d5
130 subil #32,%d3
131 clrl %d6
132 bfffo %d4{#0:#32},%d6
133 lsll %d6,%d4
134 subl %d6,%d3 | ...(D3,D4,D5) is normalized
135| ...with bias $7FFD
136 bras Chk_X
137
138HiY_not0:
139 clrl %d6
140 bfffo %d4{#0:#32},%d6
141 subl %d6,%d3
142 lsll %d6,%d4
143 movel %d5,%d7 | ...a copy of D5
144 lsll %d6,%d5
145 negl %d6
146 addil #32,%d6
147 lsrl %d6,%d7
148 orl %d7,%d4 | ...(D3,D4,D5) normalized
149| ...with bias $7FFD
150 bras Chk_X
151
152Y_Normal:
153 addil #0x00003FFE,%d3 | ...(D3,D4,D5) normalized
154| ...with bias $7FFD
155
156Chk_X:
157 movew -12(%a0),%d0
158 movew %d0,SignX(%a6)
159 movew SignY(%a6),%d1
160 eorl %d0,%d1
161 andil #0x00008000,%d1
162 movew %d1,SignQ(%a6) | ...sign(Q) obtained
163 andil #0x00007FFF,%d0
164 movel -8(%a0),%d1
165 movel -4(%a0),%d2 | ...(D0,D1,D2) is |X|
166 tstl %d0
167 bnes X_Normal
168 movel #0x00003FFE,%d0
169 tstl %d1
170 bnes HiX_not0
171
172HiX_0:
173 movel %d2,%d1
174 clrl %d2
175 subil #32,%d0
176 clrl %d6
177 bfffo %d1{#0:#32},%d6
178 lsll %d6,%d1
179 subl %d6,%d0 | ...(D0,D1,D2) is normalized
180| ...with bias $7FFD
181 bras Init
182
183HiX_not0:
184 clrl %d6
185 bfffo %d1{#0:#32},%d6
186 subl %d6,%d0
187 lsll %d6,%d1
188 movel %d2,%d7 | ...a copy of D2
189 lsll %d6,%d2
190 negl %d6
191 addil #32,%d6
192 lsrl %d6,%d7
193 orl %d7,%d1 | ...(D0,D1,D2) normalized
194| ...with bias $7FFD
195 bras Init
196
197X_Normal:
198 addil #0x00003FFE,%d0 | ...(D0,D1,D2) normalized
199| ...with bias $7FFD
200
201Init:
202|
203 movel %d3,L_SCR1(%a6) | ...save biased expo(Y)
204 movel %d0,L_SCR2(%a6) |save d0
205 subl %d3,%d0 | ...L := expo(X)-expo(Y)
206| Move.L D0,L ...D0 is j
207 clrl %d6 | ...D6 := carry <- 0
208 clrl %d3 | ...D3 is Q
209 moveal #0,%a1 | ...A1 is k; j+k=L, Q=0
210
211|..(Carry,D1,D2) is R
212 tstl %d0
213 bges Mod_Loop
214
215|..expo(X) < expo(Y). Thus X = mod(X,Y)
216|
217 movel L_SCR2(%a6),%d0 |restore d0
218 bra Get_Mod
219
220|..At this point R = 2^(-L)X; Q = 0; k = 0; and k+j = L
221
222
223Mod_Loop:
224 tstl %d6 | ...test carry bit
225 bgts R_GT_Y
226
227|..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
228 cmpl %d4,%d1 | ...compare hi(R) and hi(Y)
229 bnes R_NE_Y
230 cmpl %d5,%d2 | ...compare lo(R) and lo(Y)
231 bnes R_NE_Y
232
233|..At this point, R = Y
234 bra Rem_is_0
235
236R_NE_Y:
237|..use the borrow of the previous compare
238 bcss R_LT_Y | ...borrow is set iff R < Y
239
240R_GT_Y:
241|..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
242|..and Y < (D1,D2) < 2Y. Either way, perform R - Y
243 subl %d5,%d2 | ...lo(R) - lo(Y)
244 subxl %d4,%d1 | ...hi(R) - hi(Y)
245 clrl %d6 | ...clear carry
246 addql #1,%d3 | ...Q := Q + 1
247
248R_LT_Y:
249|..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
250 tstl %d0 | ...see if j = 0.
251 beqs PostLoop
252
253 addl %d3,%d3 | ...Q := 2Q
254 addl %d2,%d2 | ...lo(R) = 2lo(R)
255 roxll #1,%d1 | ...hi(R) = 2hi(R) + carry
256 scs %d6 | ...set Carry if 2(R) overflows
257 addql #1,%a1 | ...k := k+1
258 subql #1,%d0 | ...j := j - 1
259|..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
260
261 bras Mod_Loop
262
263PostLoop:
264|..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
265
266|..normalize R.
267 movel L_SCR1(%a6),%d0 | ...new biased expo of R
268 tstl %d1
269 bnes HiR_not0
270
271HiR_0:
272 movel %d2,%d1
273 clrl %d2
274 subil #32,%d0
275 clrl %d6
276 bfffo %d1{#0:#32},%d6
277 lsll %d6,%d1
278 subl %d6,%d0 | ...(D0,D1,D2) is normalized
279| ...with bias $7FFD
280 bras Get_Mod
281
282HiR_not0:
283 clrl %d6
284 bfffo %d1{#0:#32},%d6
285 bmis Get_Mod | ...already normalized
286 subl %d6,%d0
287 lsll %d6,%d1
288 movel %d2,%d7 | ...a copy of D2
289 lsll %d6,%d2
290 negl %d6
291 addil #32,%d6
292 lsrl %d6,%d7
293 orl %d7,%d1 | ...(D0,D1,D2) normalized
294
295|
296Get_Mod:
297 cmpil #0x000041FE,%d0
298 bges No_Scale
299Do_Scale:
300 movew %d0,R(%a6)
301 clrw R+2(%a6)
302 movel %d1,R_Hi(%a6)
303 movel %d2,R_Lo(%a6)
304 movel L_SCR1(%a6),%d6
305 movew %d6,Y(%a6)
306 clrw Y+2(%a6)
307 movel %d4,Y_Hi(%a6)
308 movel %d5,Y_Lo(%a6)
309 fmovex R(%a6),%fp0 | ...no exception
310 movel #1,Sc_Flag(%a6)
311 bras ModOrRem
312No_Scale:
313 movel %d1,R_Hi(%a6)
314 movel %d2,R_Lo(%a6)
315 subil #0x3FFE,%d0
316 movew %d0,R(%a6)
317 clrw R+2(%a6)
318 movel L_SCR1(%a6),%d6
319 subil #0x3FFE,%d6
320 movel %d6,L_SCR1(%a6)
321 fmovex R(%a6),%fp0
322 movew %d6,Y(%a6)
323 movel %d4,Y_Hi(%a6)
324 movel %d5,Y_Lo(%a6)
325 movel #0,Sc_Flag(%a6)
326
327|
328
329
330ModOrRem:
331 movel Mod_Flag(%a6),%d6
332 beqs Fix_Sign
333
334 movel L_SCR1(%a6),%d6 | ...new biased expo(Y)
335 subql #1,%d6 | ...biased expo(Y/2)
336 cmpl %d6,%d0
337 blts Fix_Sign
338 bgts Last_Sub
339
340 cmpl %d4,%d1
341 bnes Not_EQ
342 cmpl %d5,%d2
343 bnes Not_EQ
344 bra Tie_Case
345
346Not_EQ:
347 bcss Fix_Sign
348
349Last_Sub:
350|
351 fsubx Y(%a6),%fp0 | ...no exceptions
352 addql #1,%d3 | ...Q := Q + 1
353
354|
355
356Fix_Sign:
357|..Get sign of X
358 movew SignX(%a6),%d6
359 bges Get_Q
360 fnegx %fp0
361
362|..Get Q
363|
364Get_Q:
365 clrl %d6
366 movew SignQ(%a6),%d6 | ...D6 is sign(Q)
367 movel #8,%d7
368 lsrl %d7,%d6
369 andil #0x0000007F,%d3 | ...7 bits of Q
370 orl %d6,%d3 | ...sign and bits of Q
371 swap %d3
372 fmovel %fpsr,%d6
373 andil #0xFF00FFFF,%d6
374 orl %d3,%d6
375 fmovel %d6,%fpsr | ...put Q in fpsr
376
377|
378Restore:
379 moveml (%a7)+,%d2-%d7
380 fmovel USER_FPCR(%a6),%fpcr
381 movel Sc_Flag(%a6),%d0
382 beqs Finish
383 fmulx Scale(%pc),%fp0 | ...may cause underflow
384 bra t_avoid_unsupp |check for denorm as a
385| ;result of the scaling
386
387Finish:
388 fmovex %fp0,%fp0 |capture exceptions & round
389 rts
390
391Rem_is_0:
392|..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
393 addql #1,%d3
394 cmpil #8,%d0 | ...D0 is j
395 bges Q_Big
396
397 lsll %d0,%d3
398 bras Set_R_0
399
400Q_Big:
401 clrl %d3
402
403Set_R_0:
404 fmoves #0x00000000,%fp0
405 movel #0,Sc_Flag(%a6)
406 bra Fix_Sign
407
408Tie_Case:
409|..Check parity of Q
410 movel %d3,%d6
411 andil #0x00000001,%d6
412 tstl %d6
413 beq Fix_Sign | ...Q is even
414
415|..Q is odd, Q := Q + 1, signX := -signX
416 addql #1,%d3
417 movew SignX(%a6),%d6
418 eoril #0x00008000,%d6
419 movew %d6,SignX(%a6)
420 bra Fix_Sign
421
422 |end
diff --git a/arch/m68k/fpsp040/ssin.S b/arch/m68k/fpsp040/ssin.S
new file mode 100644
index 000000000000..043c91cdd657
--- /dev/null
+++ b/arch/m68k/fpsp040/ssin.S
@@ -0,0 +1,746 @@
1|
2| ssin.sa 3.3 7/29/91
3|
4| The entry point sSIN computes the sine of an input argument
5| sCOS computes the cosine, and sSINCOS computes both. The
6| corresponding entry points with a "d" computes the same
7| corresponding function values for denormalized inputs.
8|
9| Input: Double-extended number X in location pointed to
10| by address register a0.
11|
12| Output: The function value sin(X) or cos(X) returned in Fp0 if SIN or
13| COS is requested. Otherwise, for SINCOS, sin(X) is returned
14| in Fp0, and cos(X) is returned in Fp1.
15|
16| Modifies: Fp0 for SIN or COS; both Fp0 and Fp1 for SINCOS.
17|
18| Accuracy and Monotonicity: The returned result is within 1 ulp in
19| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
20| result is subsequently rounded to double precision. The
21| result is provably monotonic in double precision.
22|
23| Speed: The programs sSIN and sCOS take approximately 150 cycles for
24| input argument X such that |X| < 15Pi, which is the usual
25| situation. The speed for sSINCOS is approximately 190 cycles.
26|
27| Algorithm:
28|
29| SIN and COS:
30| 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1.
31|
32| 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7.
33|
34| 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
35| k = N mod 4, so in particular, k = 0,1,2,or 3. Overwrite
36| k by k := k + AdjN.
37|
38| 4. If k is even, go to 6.
39|
40| 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. Return sgn*cos(r)
41| where cos(r) is approximated by an even polynomial in r,
42| 1 + r*r*(B1+s*(B2+ ... + s*B8)), s = r*r.
43| Exit.
44|
45| 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r)
46| where sin(r) is approximated by an odd polynomial in r
47| r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r.
48| Exit.
49|
50| 7. If |X| > 1, go to 9.
51|
52| 8. (|X|<2**(-40)) If SIN is invoked, return X; otherwise return 1.
53|
54| 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 3.
55|
56| SINCOS:
57| 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
58|
59| 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
60| k = N mod 4, so in particular, k = 0,1,2,or 3.
61|
62| 3. If k is even, go to 5.
63|
64| 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), i.e.
65| j1 exclusive or with the l.s.b. of k.
66| sgn1 := (-1)**j1, sgn2 := (-1)**j2.
67| SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where
68| sin(r) and cos(r) are computed as odd and even polynomials
69| in r, respectively. Exit
70|
71| 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1.
72| SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where
73| sin(r) and cos(r) are computed as odd and even polynomials
74| in r, respectively. Exit
75|
76| 6. If |X| > 1, go to 8.
77|
78| 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit.
79|
80| 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
81|
82
83| Copyright (C) Motorola, Inc. 1990
84| All Rights Reserved
85|
86| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
87| The copyright notice above does not evidence any
88| actual or intended publication of such source code.
89
90|SSIN idnt 2,1 | Motorola 040 Floating Point Software Package
91
92 |section 8
93
94#include "fpsp.h"
95
96BOUNDS1: .long 0x3FD78000,0x4004BC7E
97TWOBYPI: .long 0x3FE45F30,0x6DC9C883
98
99SINA7: .long 0xBD6AAA77,0xCCC994F5
100SINA6: .long 0x3DE61209,0x7AAE8DA1
101
102SINA5: .long 0xBE5AE645,0x2A118AE4
103SINA4: .long 0x3EC71DE3,0xA5341531
104
105SINA3: .long 0xBF2A01A0,0x1A018B59,0x00000000,0x00000000
106
107SINA2: .long 0x3FF80000,0x88888888,0x888859AF,0x00000000
108
109SINA1: .long 0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000
110
111COSB8: .long 0x3D2AC4D0,0xD6011EE3
112COSB7: .long 0xBDA9396F,0x9F45AC19
113
114COSB6: .long 0x3E21EED9,0x0612C972
115COSB5: .long 0xBE927E4F,0xB79D9FCF
116
117COSB4: .long 0x3EFA01A0,0x1A01D423,0x00000000,0x00000000
118
119COSB3: .long 0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000
120
121COSB2: .long 0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E
122COSB1: .long 0xBF000000
123
124INVTWOPI: .long 0x3FFC0000,0xA2F9836E,0x4E44152A
125
126TWOPI1: .long 0x40010000,0xC90FDAA2,0x00000000,0x00000000
127TWOPI2: .long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000
128
129 |xref PITBL
130
131 .set INARG,FP_SCR4
132
133 .set X,FP_SCR5
134 .set XDCARE,X+2
135 .set XFRAC,X+4
136
137 .set RPRIME,FP_SCR1
138 .set SPRIME,FP_SCR2
139
140 .set POSNEG1,L_SCR1
141 .set TWOTO63,L_SCR1
142
143 .set ENDFLAG,L_SCR2
144 .set N,L_SCR2
145
146 .set ADJN,L_SCR3
147
148 | xref t_frcinx
149 |xref t_extdnrm
150 |xref sto_cos
151
152 .global ssind
153ssind:
154|--SIN(X) = X FOR DENORMALIZED X
155 bra t_extdnrm
156
157 .global scosd
158scosd:
159|--COS(X) = 1 FOR DENORMALIZED X
160
161 fmoves #0x3F800000,%fp0
162|
163| 9D25B Fix: Sometimes the previous fmove.s sets fpsr bits
164|
165 fmovel #0,%fpsr
166|
167 bra t_frcinx
168
169 .global ssin
170ssin:
171|--SET ADJN TO 0
172 movel #0,ADJN(%a6)
173 bras SINBGN
174
175 .global scos
176scos:
177|--SET ADJN TO 1
178 movel #1,ADJN(%a6)
179
180SINBGN:
181|--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
182
183 fmovex (%a0),%fp0 | ...LOAD INPUT
184
185 movel (%a0),%d0
186 movew 4(%a0),%d0
187 fmovex %fp0,X(%a6)
188 andil #0x7FFFFFFF,%d0 | ...COMPACTIFY X
189
190 cmpil #0x3FD78000,%d0 | ...|X| >= 2**(-40)?
191 bges SOK1
192 bra SINSM
193
194SOK1:
195 cmpil #0x4004BC7E,%d0 | ...|X| < 15 PI?
196 blts SINMAIN
197 bra REDUCEX
198
199SINMAIN:
200|--THIS IS THE USUAL CASE, |X| <= 15 PI.
201|--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
202 fmovex %fp0,%fp1
203 fmuld TWOBYPI,%fp1 | ...X*2/PI
204
205|--HIDE THE NEXT THREE INSTRUCTIONS
206 lea PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32
207
208
209|--FP1 IS NOW READY
210 fmovel %fp1,N(%a6) | ...CONVERT TO INTEGER
211
212 movel N(%a6),%d0
213 asll #4,%d0
214 addal %d0,%a1 | ...A1 IS THE ADDRESS OF N*PIBY2
215| ...WHICH IS IN TWO PIECES Y1 & Y2
216
217 fsubx (%a1)+,%fp0 | ...X-Y1
218|--HIDE THE NEXT ONE
219 fsubs (%a1),%fp0 | ...FP0 IS R = (X-Y1)-Y2
220
221SINCONT:
222|--continuation from REDUCEX
223
224|--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
225 movel N(%a6),%d0
226 addl ADJN(%a6),%d0 | ...SEE IF D0 IS ODD OR EVEN
227 rorl #1,%d0 | ...D0 WAS ODD IFF D0 IS NEGATIVE
228 cmpil #0,%d0
229 blt COSPOLY
230
231SINPOLY:
232|--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
233|--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
234|--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
235|--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
236|--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
237|--WHERE T=S*S.
238|--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
239|--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
240 fmovex %fp0,X(%a6) | ...X IS R
241 fmulx %fp0,%fp0 | ...FP0 IS S
242|---HIDE THE NEXT TWO WHILE WAITING FOR FP0
243 fmoved SINA7,%fp3
244 fmoved SINA6,%fp2
245|--FP0 IS NOW READY
246 fmovex %fp0,%fp1
247 fmulx %fp1,%fp1 | ...FP1 IS T
248|--HIDE THE NEXT TWO WHILE WAITING FOR FP1
249
250 rorl #1,%d0
251 andil #0x80000000,%d0
252| ...LEAST SIG. BIT OF D0 IN SIGN POSITION
253 eorl %d0,X(%a6) | ...X IS NOW R'= SGN*R
254
255 fmulx %fp1,%fp3 | ...TA7
256 fmulx %fp1,%fp2 | ...TA6
257
258 faddd SINA5,%fp3 | ...A5+TA7
259 faddd SINA4,%fp2 | ...A4+TA6
260
261 fmulx %fp1,%fp3 | ...T(A5+TA7)
262 fmulx %fp1,%fp2 | ...T(A4+TA6)
263
264 faddd SINA3,%fp3 | ...A3+T(A5+TA7)
265 faddx SINA2,%fp2 | ...A2+T(A4+TA6)
266
267 fmulx %fp3,%fp1 | ...T(A3+T(A5+TA7))
268
269 fmulx %fp0,%fp2 | ...S(A2+T(A4+TA6))
270 faddx SINA1,%fp1 | ...A1+T(A3+T(A5+TA7))
271 fmulx X(%a6),%fp0 | ...R'*S
272
273 faddx %fp2,%fp1 | ...[A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
274|--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
275|--FP2 RELEASED, RESTORE NOW AND TAKE FULL ADVANTAGE OF HIDING
276
277
278 fmulx %fp1,%fp0 | ...SIN(R')-R'
279|--FP1 RELEASED.
280
281 fmovel %d1,%FPCR |restore users exceptions
282 faddx X(%a6),%fp0 |last inst - possible exception set
283 bra t_frcinx
284
285
286COSPOLY:
287|--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
288|--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY
289|--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
290|--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
291|--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
292|--WHERE T=S*S.
293|--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
294|--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
295|--AND IS THEREFORE STORED AS SINGLE PRECISION.
296
297 fmulx %fp0,%fp0 | ...FP0 IS S
298|---HIDE THE NEXT TWO WHILE WAITING FOR FP0
299 fmoved COSB8,%fp2
300 fmoved COSB7,%fp3
301|--FP0 IS NOW READY
302 fmovex %fp0,%fp1
303 fmulx %fp1,%fp1 | ...FP1 IS T
304|--HIDE THE NEXT TWO WHILE WAITING FOR FP1
305 fmovex %fp0,X(%a6) | ...X IS S
306 rorl #1,%d0
307 andil #0x80000000,%d0
308| ...LEAST SIG. BIT OF D0 IN SIGN POSITION
309
310 fmulx %fp1,%fp2 | ...TB8
311|--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
312 eorl %d0,X(%a6) | ...X IS NOW S'= SGN*S
313 andil #0x80000000,%d0
314
315 fmulx %fp1,%fp3 | ...TB7
316|--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
317 oril #0x3F800000,%d0 | ...D0 IS SGN IN SINGLE
318 movel %d0,POSNEG1(%a6)
319
320 faddd COSB6,%fp2 | ...B6+TB8
321 faddd COSB5,%fp3 | ...B5+TB7
322
323 fmulx %fp1,%fp2 | ...T(B6+TB8)
324 fmulx %fp1,%fp3 | ...T(B5+TB7)
325
326 faddd COSB4,%fp2 | ...B4+T(B6+TB8)
327 faddx COSB3,%fp3 | ...B3+T(B5+TB7)
328
329 fmulx %fp1,%fp2 | ...T(B4+T(B6+TB8))
330 fmulx %fp3,%fp1 | ...T(B3+T(B5+TB7))
331
332 faddx COSB2,%fp2 | ...B2+T(B4+T(B6+TB8))
333 fadds COSB1,%fp1 | ...B1+T(B3+T(B5+TB7))
334
335 fmulx %fp2,%fp0 | ...S(B2+T(B4+T(B6+TB8)))
336|--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
337|--FP2 RELEASED.
338
339
340 faddx %fp1,%fp0
341|--FP1 RELEASED
342
343 fmulx X(%a6),%fp0
344
345 fmovel %d1,%FPCR |restore users exceptions
346 fadds POSNEG1(%a6),%fp0 |last inst - possible exception set
347 bra t_frcinx
348
349
350SINBORS:
351|--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
352|--IF |X| < 2**(-40), RETURN X OR 1.
353 cmpil #0x3FFF8000,%d0
354 bgts REDUCEX
355
356
357SINSM:
358 movel ADJN(%a6),%d0
359 cmpil #0,%d0
360 bgts COSTINY
361
362SINTINY:
363 movew #0x0000,XDCARE(%a6) | ...JUST IN CASE
364 fmovel %d1,%FPCR |restore users exceptions
365 fmovex X(%a6),%fp0 |last inst - possible exception set
366 bra t_frcinx
367
368
369COSTINY:
370 fmoves #0x3F800000,%fp0
371
372 fmovel %d1,%FPCR |restore users exceptions
373 fsubs #0x00800000,%fp0 |last inst - possible exception set
374 bra t_frcinx
375
376
377REDUCEX:
378|--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
379|--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
380|--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
381
382 fmovemx %fp2-%fp5,-(%a7) | ...save FP2 through FP5
383 movel %d2,-(%a7)
384 fmoves #0x00000000,%fp1
385|--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
386|--there is a danger of unwanted overflow in first LOOP iteration. In this
387|--case, reduce argument by one remainder step to make subsequent reduction
388|--safe.
389 cmpil #0x7ffeffff,%d0 |is argument dangerously large?
390 bnes LOOP
391 movel #0x7ffe0000,FP_SCR2(%a6) |yes
392| ;create 2**16383*PI/2
393 movel #0xc90fdaa2,FP_SCR2+4(%a6)
394 clrl FP_SCR2+8(%a6)
395 ftstx %fp0 |test sign of argument
396 movel #0x7fdc0000,FP_SCR3(%a6) |create low half of 2**16383*
397| ;PI/2 at FP_SCR3
398 movel #0x85a308d3,FP_SCR3+4(%a6)
399 clrl FP_SCR3+8(%a6)
400 fblt red_neg
401 orw #0x8000,FP_SCR2(%a6) |positive arg
402 orw #0x8000,FP_SCR3(%a6)
403red_neg:
404 faddx FP_SCR2(%a6),%fp0 |high part of reduction is exact
405 fmovex %fp0,%fp1 |save high result in fp1
406 faddx FP_SCR3(%a6),%fp0 |low part of reduction
407 fsubx %fp0,%fp1 |determine low component of result
408 faddx FP_SCR3(%a6),%fp1 |fp0/fp1 are reduced argument.
409
410|--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
411|--integer quotient will be stored in N
412|--Intermediate remainder is 66-bit long; (R,r) in (FP0,FP1)
413
414LOOP:
415 fmovex %fp0,INARG(%a6) | ...+-2**K * F, 1 <= F < 2
416 movew INARG(%a6),%d0
417 movel %d0,%a1 | ...save a copy of D0
418 andil #0x00007FFF,%d0
419 subil #0x00003FFF,%d0 | ...D0 IS K
420 cmpil #28,%d0
421 bles LASTLOOP
422CONTLOOP:
423 subil #27,%d0 | ...D0 IS L := K-27
424 movel #0,ENDFLAG(%a6)
425 bras WORK
426LASTLOOP:
427 clrl %d0 | ...D0 IS L := 0
428 movel #1,ENDFLAG(%a6)
429
430WORK:
431|--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
432|--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
433
434|--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
435|--2**L * (PIby2_1), 2**L * (PIby2_2)
436
437 movel #0x00003FFE,%d2 | ...BIASED EXPO OF 2/PI
438 subl %d0,%d2 | ...BIASED EXPO OF 2**(-L)*(2/PI)
439
440 movel #0xA2F9836E,FP_SCR1+4(%a6)
441 movel #0x4E44152A,FP_SCR1+8(%a6)
442 movew %d2,FP_SCR1(%a6) | ...FP_SCR1 is 2**(-L)*(2/PI)
443
444 fmovex %fp0,%fp2
445 fmulx FP_SCR1(%a6),%fp2
446|--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
447|--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
448|--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
449|--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
450|--US THE DESIRED VALUE IN FLOATING POINT.
451
452|--HIDE SIX CYCLES OF INSTRUCTION
453 movel %a1,%d2
454 swap %d2
455 andil #0x80000000,%d2
456 oril #0x5F000000,%d2 | ...D2 IS SIGN(INARG)*2**63 IN SGL
457 movel %d2,TWOTO63(%a6)
458
459 movel %d0,%d2
460 addil #0x00003FFF,%d2 | ...BIASED EXPO OF 2**L * (PI/2)
461
462|--FP2 IS READY
463 fadds TWOTO63(%a6),%fp2 | ...THE FRACTIONAL PART OF FP1 IS ROUNDED
464
465|--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1 and 2**(L)*Piby2_2
466 movew %d2,FP_SCR2(%a6)
467 clrw FP_SCR2+2(%a6)
468 movel #0xC90FDAA2,FP_SCR2+4(%a6)
469 clrl FP_SCR2+8(%a6) | ...FP_SCR2 is 2**(L) * Piby2_1
470
471|--FP2 IS READY
472 fsubs TWOTO63(%a6),%fp2 | ...FP2 is N
473
474 addil #0x00003FDD,%d0
475 movew %d0,FP_SCR3(%a6)
476 clrw FP_SCR3+2(%a6)
477 movel #0x85A308D3,FP_SCR3+4(%a6)
478 clrl FP_SCR3+8(%a6) | ...FP_SCR3 is 2**(L) * Piby2_2
479
480 movel ENDFLAG(%a6),%d0
481
482|--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
483|--P2 = 2**(L) * Piby2_2
484 fmovex %fp2,%fp4
485 fmulx FP_SCR2(%a6),%fp4 | ...W = N*P1
486 fmovex %fp2,%fp5
487 fmulx FP_SCR3(%a6),%fp5 | ...w = N*P2
488 fmovex %fp4,%fp3
489|--we want P+p = W+w but |p| <= half ulp of P
490|--Then, we need to compute A := R-P and a := r-p
491 faddx %fp5,%fp3 | ...FP3 is P
492 fsubx %fp3,%fp4 | ...W-P
493
494 fsubx %fp3,%fp0 | ...FP0 is A := R - P
495 faddx %fp5,%fp4 | ...FP4 is p = (W-P)+w
496
497 fmovex %fp0,%fp3 | ...FP3 A
498 fsubx %fp4,%fp1 | ...FP1 is a := r - p
499
500|--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
501|--|r| <= half ulp of R.
502 faddx %fp1,%fp0 | ...FP0 is R := A+a
503|--No need to calculate r if this is the last loop
504 cmpil #0,%d0
505 bgt RESTORE
506
507|--Need to calculate r
508 fsubx %fp0,%fp3 | ...A-R
509 faddx %fp3,%fp1 | ...FP1 is r := (A-R)+a
510 bra LOOP
511
512RESTORE:
513 fmovel %fp2,N(%a6)
514 movel (%a7)+,%d2
515 fmovemx (%a7)+,%fp2-%fp5
516
517
518 movel ADJN(%a6),%d0
519 cmpil #4,%d0
520
521 blt SINCONT
522 bras SCCONT
523
524 .global ssincosd
525ssincosd:
526|--SIN AND COS OF X FOR DENORMALIZED X
527
528 fmoves #0x3F800000,%fp1
529 bsr sto_cos |store cosine result
530 bra t_extdnrm
531
532 .global ssincos
533ssincos:
534|--SET ADJN TO 4
535 movel #4,ADJN(%a6)
536
537 fmovex (%a0),%fp0 | ...LOAD INPUT
538
539 movel (%a0),%d0
540 movew 4(%a0),%d0
541 fmovex %fp0,X(%a6)
542 andil #0x7FFFFFFF,%d0 | ...COMPACTIFY X
543
544 cmpil #0x3FD78000,%d0 | ...|X| >= 2**(-40)?
545 bges SCOK1
546 bra SCSM
547
548SCOK1:
549 cmpil #0x4004BC7E,%d0 | ...|X| < 15 PI?
550 blts SCMAIN
551 bra REDUCEX
552
553
554SCMAIN:
555|--THIS IS THE USUAL CASE, |X| <= 15 PI.
556|--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
557 fmovex %fp0,%fp1
558 fmuld TWOBYPI,%fp1 | ...X*2/PI
559
560|--HIDE THE NEXT THREE INSTRUCTIONS
561 lea PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32
562
563
564|--FP1 IS NOW READY
565 fmovel %fp1,N(%a6) | ...CONVERT TO INTEGER
566
567 movel N(%a6),%d0
568 asll #4,%d0
569 addal %d0,%a1 | ...ADDRESS OF N*PIBY2, IN Y1, Y2
570
571 fsubx (%a1)+,%fp0 | ...X-Y1
572 fsubs (%a1),%fp0 | ...FP0 IS R = (X-Y1)-Y2
573
574SCCONT:
575|--continuation point from REDUCEX
576
577|--HIDE THE NEXT TWO
578 movel N(%a6),%d0
579 rorl #1,%d0
580
581 cmpil #0,%d0 | ...D0 < 0 IFF N IS ODD
582 bge NEVEN
583
584NODD:
585|--REGISTERS SAVED SO FAR: D0, A0, FP2.
586
587 fmovex %fp0,RPRIME(%a6)
588 fmulx %fp0,%fp0 | ...FP0 IS S = R*R
589 fmoved SINA7,%fp1 | ...A7
590 fmoved COSB8,%fp2 | ...B8
591 fmulx %fp0,%fp1 | ...SA7
592 movel %d2,-(%a7)
593 movel %d0,%d2
594 fmulx %fp0,%fp2 | ...SB8
595 rorl #1,%d2
596 andil #0x80000000,%d2
597
598 faddd SINA6,%fp1 | ...A6+SA7
599 eorl %d0,%d2
600 andil #0x80000000,%d2
601 faddd COSB7,%fp2 | ...B7+SB8
602
603 fmulx %fp0,%fp1 | ...S(A6+SA7)
604 eorl %d2,RPRIME(%a6)
605 movel (%a7)+,%d2
606 fmulx %fp0,%fp2 | ...S(B7+SB8)
607 rorl #1,%d0
608 andil #0x80000000,%d0
609
610 faddd SINA5,%fp1 | ...A5+S(A6+SA7)
611 movel #0x3F800000,POSNEG1(%a6)
612 eorl %d0,POSNEG1(%a6)
613 faddd COSB6,%fp2 | ...B6+S(B7+SB8)
614
615 fmulx %fp0,%fp1 | ...S(A5+S(A6+SA7))
616 fmulx %fp0,%fp2 | ...S(B6+S(B7+SB8))
617 fmovex %fp0,SPRIME(%a6)
618
619 faddd SINA4,%fp1 | ...A4+S(A5+S(A6+SA7))
620 eorl %d0,SPRIME(%a6)
621 faddd COSB5,%fp2 | ...B5+S(B6+S(B7+SB8))
622
623 fmulx %fp0,%fp1 | ...S(A4+...)
624 fmulx %fp0,%fp2 | ...S(B5+...)
625
626 faddd SINA3,%fp1 | ...A3+S(A4+...)
627 faddd COSB4,%fp2 | ...B4+S(B5+...)
628
629 fmulx %fp0,%fp1 | ...S(A3+...)
630 fmulx %fp0,%fp2 | ...S(B4+...)
631
632 faddx SINA2,%fp1 | ...A2+S(A3+...)
633 faddx COSB3,%fp2 | ...B3+S(B4+...)
634
635 fmulx %fp0,%fp1 | ...S(A2+...)
636 fmulx %fp0,%fp2 | ...S(B3+...)
637
638 faddx SINA1,%fp1 | ...A1+S(A2+...)
639 faddx COSB2,%fp2 | ...B2+S(B3+...)
640
641 fmulx %fp0,%fp1 | ...S(A1+...)
642 fmulx %fp2,%fp0 | ...S(B2+...)
643
644
645
646 fmulx RPRIME(%a6),%fp1 | ...R'S(A1+...)
647 fadds COSB1,%fp0 | ...B1+S(B2...)
648 fmulx SPRIME(%a6),%fp0 | ...S'(B1+S(B2+...))
649
650 movel %d1,-(%sp) |restore users mode & precision
651 andil #0xff,%d1 |mask off all exceptions
652 fmovel %d1,%FPCR
653 faddx RPRIME(%a6),%fp1 | ...COS(X)
654 bsr sto_cos |store cosine result
655 fmovel (%sp)+,%FPCR |restore users exceptions
656 fadds POSNEG1(%a6),%fp0 | ...SIN(X)
657
658 bra t_frcinx
659
660
661NEVEN:
662|--REGISTERS SAVED SO FAR: FP2.
663
664 fmovex %fp0,RPRIME(%a6)
665 fmulx %fp0,%fp0 | ...FP0 IS S = R*R
666 fmoved COSB8,%fp1 | ...B8
667 fmoved SINA7,%fp2 | ...A7
668 fmulx %fp0,%fp1 | ...SB8
669 fmovex %fp0,SPRIME(%a6)
670 fmulx %fp0,%fp2 | ...SA7
671 rorl #1,%d0
672 andil #0x80000000,%d0
673 faddd COSB7,%fp1 | ...B7+SB8
674 faddd SINA6,%fp2 | ...A6+SA7
675 eorl %d0,RPRIME(%a6)
676 eorl %d0,SPRIME(%a6)
677 fmulx %fp0,%fp1 | ...S(B7+SB8)
678 oril #0x3F800000,%d0
679 movel %d0,POSNEG1(%a6)
680 fmulx %fp0,%fp2 | ...S(A6+SA7)
681
682 faddd COSB6,%fp1 | ...B6+S(B7+SB8)
683 faddd SINA5,%fp2 | ...A5+S(A6+SA7)
684
685 fmulx %fp0,%fp1 | ...S(B6+S(B7+SB8))
686 fmulx %fp0,%fp2 | ...S(A5+S(A6+SA7))
687
688 faddd COSB5,%fp1 | ...B5+S(B6+S(B7+SB8))
689 faddd SINA4,%fp2 | ...A4+S(A5+S(A6+SA7))
690
691 fmulx %fp0,%fp1 | ...S(B5+...)
692 fmulx %fp0,%fp2 | ...S(A4+...)
693
694 faddd COSB4,%fp1 | ...B4+S(B5+...)
695 faddd SINA3,%fp2 | ...A3+S(A4+...)
696
697 fmulx %fp0,%fp1 | ...S(B4+...)
698 fmulx %fp0,%fp2 | ...S(A3+...)
699
700 faddx COSB3,%fp1 | ...B3+S(B4+...)
701 faddx SINA2,%fp2 | ...A2+S(A3+...)
702
703 fmulx %fp0,%fp1 | ...S(B3+...)
704 fmulx %fp0,%fp2 | ...S(A2+...)
705
706 faddx COSB2,%fp1 | ...B2+S(B3+...)
707 faddx SINA1,%fp2 | ...A1+S(A2+...)
708
709 fmulx %fp0,%fp1 | ...S(B2+...)
710 fmulx %fp2,%fp0 | ...s(a1+...)
711
712
713
714 fadds COSB1,%fp1 | ...B1+S(B2...)
715 fmulx RPRIME(%a6),%fp0 | ...R'S(A1+...)
716 fmulx SPRIME(%a6),%fp1 | ...S'(B1+S(B2+...))
717
718 movel %d1,-(%sp) |save users mode & precision
719 andil #0xff,%d1 |mask off all exceptions
720 fmovel %d1,%FPCR
721 fadds POSNEG1(%a6),%fp1 | ...COS(X)
722 bsr sto_cos |store cosine result
723 fmovel (%sp)+,%FPCR |restore users exceptions
724 faddx RPRIME(%a6),%fp0 | ...SIN(X)
725
726 bra t_frcinx
727
728SCBORS:
729 cmpil #0x3FFF8000,%d0
730 bgt REDUCEX
731
732
733SCSM:
734 movew #0x0000,XDCARE(%a6)
735 fmoves #0x3F800000,%fp1
736
737 movel %d1,-(%sp) |save users mode & precision
738 andil #0xff,%d1 |mask off all exceptions
739 fmovel %d1,%FPCR
740 fsubs #0x00800000,%fp1
741 bsr sto_cos |store cosine result
742 fmovel (%sp)+,%FPCR |restore users exceptions
743 fmovex X(%a6),%fp0
744 bra t_frcinx
745
746 |end
diff --git a/arch/m68k/fpsp040/ssinh.S b/arch/m68k/fpsp040/ssinh.S
new file mode 100644
index 000000000000..c8b3308bb143
--- /dev/null
+++ b/arch/m68k/fpsp040/ssinh.S
@@ -0,0 +1,135 @@
1|
2| ssinh.sa 3.1 12/10/90
3|
4| The entry point sSinh computes the hyperbolic sine of
5| an input argument; sSinhd does the same except for denormalized
6| input.
7|
8| Input: Double-extended number X in location pointed to
9| by address register a0.
10|
11| Output: The value sinh(X) returned in floating-point register Fp0.
12|
13| Accuracy and Monotonicity: The returned result is within 3 ulps in
14| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
15| result is subsequently rounded to double precision. The
16| result is provably monotonic in double precision.
17|
18| Speed: The program sSINH takes approximately 280 cycles.
19|
20| Algorithm:
21|
22| SINH
23| 1. If |X| > 16380 log2, go to 3.
24|
25| 2. (|X| <= 16380 log2) Sinh(X) is obtained by the formulae
26| y = |X|, sgn = sign(X), and z = expm1(Y),
27| sinh(X) = sgn*(1/2)*( z + z/(1+z) ).
28| Exit.
29|
30| 3. If |X| > 16480 log2, go to 5.
31|
32| 4. (16380 log2 < |X| <= 16480 log2)
33| sinh(X) = sign(X) * exp(|X|)/2.
34| However, invoking exp(|X|) may cause premature overflow.
35| Thus, we calculate sinh(X) as follows:
36| Y := |X|
37| sgn := sign(X)
38| sgnFact := sgn * 2**(16380)
39| Y' := Y - 16381 log2
40| sinh(X) := sgnFact * exp(Y').
41| Exit.
42|
43| 5. (|X| > 16480 log2) sinh(X) must overflow. Return
44| sign(X)*Huge*Huge to generate overflow and an infinity with
45| the appropriate sign. Huge is the largest finite number in
46| extended format. Exit.
47|
48
49| Copyright (C) Motorola, Inc. 1990
50| All Rights Reserved
51|
52| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
53| The copyright notice above does not evidence any
54| actual or intended publication of such source code.
55
56|SSINH idnt 2,1 | Motorola 040 Floating Point Software Package
57
58 |section 8
59
60T1: .long 0x40C62D38,0xD3D64634 | ... 16381 LOG2 LEAD
61T2: .long 0x3D6F90AE,0xB1E75CC7 | ... 16381 LOG2 TRAIL
62
63 |xref t_frcinx
64 |xref t_ovfl
65 |xref t_extdnrm
66 |xref setox
67 |xref setoxm1
68
69 .global ssinhd
70ssinhd:
71|--SINH(X) = X FOR DENORMALIZED X
72
73 bra t_extdnrm
74
75 .global ssinh
76ssinh:
77 fmovex (%a0),%fp0 | ...LOAD INPUT
78
79 movel (%a0),%d0
80 movew 4(%a0),%d0
81 movel %d0,%a1 | save a copy of original (compacted) operand
82 andl #0x7FFFFFFF,%d0
83 cmpl #0x400CB167,%d0
84 bgts SINHBIG
85
86|--THIS IS THE USUAL CASE, |X| < 16380 LOG2
87|--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) )
88
89 fabsx %fp0 | ...Y = |X|
90
91 moveml %a1/%d1,-(%sp)
92 fmovemx %fp0-%fp0,(%a0)
93 clrl %d1
94 bsr setoxm1 | ...FP0 IS Z = EXPM1(Y)
95 fmovel #0,%fpcr
96 moveml (%sp)+,%a1/%d1
97
98 fmovex %fp0,%fp1
99 fadds #0x3F800000,%fp1 | ...1+Z
100 fmovex %fp0,-(%sp)
101 fdivx %fp1,%fp0 | ...Z/(1+Z)
102 movel %a1,%d0
103 andl #0x80000000,%d0
104 orl #0x3F000000,%d0
105 faddx (%sp)+,%fp0
106 movel %d0,-(%sp)
107
108 fmovel %d1,%fpcr
109 fmuls (%sp)+,%fp0 |last fp inst - possible exceptions set
110
111 bra t_frcinx
112
113SINHBIG:
114 cmpl #0x400CB2B3,%d0
115 bgt t_ovfl
116 fabsx %fp0
117 fsubd T1(%pc),%fp0 | ...(|X|-16381LOG2_LEAD)
118 movel #0,-(%sp)
119 movel #0x80000000,-(%sp)
120 movel %a1,%d0
121 andl #0x80000000,%d0
122 orl #0x7FFB0000,%d0
123 movel %d0,-(%sp) | ...EXTENDED FMT
124 fsubd T2(%pc),%fp0 | ...|X| - 16381 LOG2, ACCURATE
125
126 movel %d1,-(%sp)
127 clrl %d1
128 fmovemx %fp0-%fp0,(%a0)
129 bsr setox
130 fmovel (%sp)+,%fpcr
131
132 fmulx (%sp)+,%fp0 |possible exception
133 bra t_frcinx
134
135 |end
diff --git a/arch/m68k/fpsp040/stan.S b/arch/m68k/fpsp040/stan.S
new file mode 100644
index 000000000000..b5c2a196e617
--- /dev/null
+++ b/arch/m68k/fpsp040/stan.S
@@ -0,0 +1,455 @@
1|
2| stan.sa 3.3 7/29/91
3|
4| The entry point stan computes the tangent of
5| an input argument;
6| stand does the same except for denormalized input.
7|
8| Input: Double-extended number X in location pointed to
9| by address register a0.
10|
11| Output: The value tan(X) returned in floating-point register Fp0.
12|
13| Accuracy and Monotonicity: The returned result is within 3 ulp in
14| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
15| result is subsequently rounded to double precision. The
16| result is provably monotonic in double precision.
17|
18| Speed: The program sTAN takes approximately 170 cycles for
19| input argument X such that |X| < 15Pi, which is the usual
20| situation.
21|
22| Algorithm:
23|
24| 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
25|
26| 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
27| k = N mod 2, so in particular, k = 0 or 1.
28|
29| 3. If k is odd, go to 5.
30|
31| 4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a
32| rational function U/V where
33| U = r + r*s*(P1 + s*(P2 + s*P3)), and
34| V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r.
35| Exit.
36|
37| 4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by a
38| rational function U/V where
39| U = r + r*s*(P1 + s*(P2 + s*P3)), and
40| V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r,
41| -Cot(r) = -V/U. Exit.
42|
43| 6. If |X| > 1, go to 8.
44|
45| 7. (|X|<2**(-40)) Tan(X) = X. Exit.
46|
47| 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
48|
49
50| Copyright (C) Motorola, Inc. 1990
51| All Rights Reserved
52|
53| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
54| The copyright notice above does not evidence any
55| actual or intended publication of such source code.
56
57|STAN idnt 2,1 | Motorola 040 Floating Point Software Package
58
59 |section 8
60
61#include "fpsp.h"
62
63BOUNDS1: .long 0x3FD78000,0x4004BC7E
64TWOBYPI: .long 0x3FE45F30,0x6DC9C883
65
66TANQ4: .long 0x3EA0B759,0xF50F8688
67TANP3: .long 0xBEF2BAA5,0xA8924F04
68
69TANQ3: .long 0xBF346F59,0xB39BA65F,0x00000000,0x00000000
70
71TANP2: .long 0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
72
73TANQ2: .long 0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
74
75TANP1: .long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
76
77TANQ1: .long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
78
79INVTWOPI: .long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
80
81TWOPI1: .long 0x40010000,0xC90FDAA2,0x00000000,0x00000000
82TWOPI2: .long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000
83
84|--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
85|--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
86|--MOST 69 BITS LONG.
87 .global PITBL
88PITBL:
89 .long 0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
90 .long 0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
91 .long 0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
92 .long 0xC0040000,0xB6365E22,0xEE46F000,0x21480000
93 .long 0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
94 .long 0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
95 .long 0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
96 .long 0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
97 .long 0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
98 .long 0xC0040000,0x90836524,0x88034B96,0x20B00000
99 .long 0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
100 .long 0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
101 .long 0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
102 .long 0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
103 .long 0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
104 .long 0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
105 .long 0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
106 .long 0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
107 .long 0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
108 .long 0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
109 .long 0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
110 .long 0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
111 .long 0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
112 .long 0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
113 .long 0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
114 .long 0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
115 .long 0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
116 .long 0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
117 .long 0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
118 .long 0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
119 .long 0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
120 .long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
121 .long 0x00000000,0x00000000,0x00000000,0x00000000
122 .long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
123 .long 0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
124 .long 0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
125 .long 0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
126 .long 0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
127 .long 0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
128 .long 0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
129 .long 0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
130 .long 0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
131 .long 0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
132 .long 0x40030000,0x8A3AE64F,0x76F80584,0x21080000
133 .long 0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
134 .long 0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
135 .long 0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
136 .long 0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
137 .long 0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
138 .long 0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
139 .long 0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
140 .long 0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
141 .long 0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
142 .long 0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
143 .long 0x40040000,0x8A3AE64F,0x76F80584,0x21880000
144 .long 0x40040000,0x90836524,0x88034B96,0xA0B00000
145 .long 0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
146 .long 0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
147 .long 0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
148 .long 0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
149 .long 0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
150 .long 0x40040000,0xB6365E22,0xEE46F000,0xA1480000
151 .long 0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
152 .long 0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
153 .long 0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
154
155 .set INARG,FP_SCR4
156
157 .set TWOTO63,L_SCR1
158 .set ENDFLAG,L_SCR2
159 .set N,L_SCR3
160
161 | xref t_frcinx
162 |xref t_extdnrm
163
164 .global stand
165stand:
166|--TAN(X) = X FOR DENORMALIZED X
167
168 bra t_extdnrm
169
170 .global stan
171stan:
172 fmovex (%a0),%fp0 | ...LOAD INPUT
173
174 movel (%a0),%d0
175 movew 4(%a0),%d0
176 andil #0x7FFFFFFF,%d0
177
178 cmpil #0x3FD78000,%d0 | ...|X| >= 2**(-40)?
179 bges TANOK1
180 bra TANSM
181TANOK1:
182 cmpil #0x4004BC7E,%d0 | ...|X| < 15 PI?
183 blts TANMAIN
184 bra REDUCEX
185
186
187TANMAIN:
188|--THIS IS THE USUAL CASE, |X| <= 15 PI.
189|--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
190 fmovex %fp0,%fp1
191 fmuld TWOBYPI,%fp1 | ...X*2/PI
192
193|--HIDE THE NEXT TWO INSTRUCTIONS
194 leal PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32
195
196|--FP1 IS NOW READY
197 fmovel %fp1,%d0 | ...CONVERT TO INTEGER
198
199 asll #4,%d0
200 addal %d0,%a1 | ...ADDRESS N*PIBY2 IN Y1, Y2
201
202 fsubx (%a1)+,%fp0 | ...X-Y1
203|--HIDE THE NEXT ONE
204
205 fsubs (%a1),%fp0 | ...FP0 IS R = (X-Y1)-Y2
206
207 rorl #5,%d0
208 andil #0x80000000,%d0 | ...D0 WAS ODD IFF D0 < 0
209
210TANCONT:
211
212 cmpil #0,%d0
213 blt NODD
214
215 fmovex %fp0,%fp1
216 fmulx %fp1,%fp1 | ...S = R*R
217
218 fmoved TANQ4,%fp3
219 fmoved TANP3,%fp2
220
221 fmulx %fp1,%fp3 | ...SQ4
222 fmulx %fp1,%fp2 | ...SP3
223
224 faddd TANQ3,%fp3 | ...Q3+SQ4
225 faddx TANP2,%fp2 | ...P2+SP3
226
227 fmulx %fp1,%fp3 | ...S(Q3+SQ4)
228 fmulx %fp1,%fp2 | ...S(P2+SP3)
229
230 faddx TANQ2,%fp3 | ...Q2+S(Q3+SQ4)
231 faddx TANP1,%fp2 | ...P1+S(P2+SP3)
232
233 fmulx %fp1,%fp3 | ...S(Q2+S(Q3+SQ4))
234 fmulx %fp1,%fp2 | ...S(P1+S(P2+SP3))
235
236 faddx TANQ1,%fp3 | ...Q1+S(Q2+S(Q3+SQ4))
237 fmulx %fp0,%fp2 | ...RS(P1+S(P2+SP3))
238
239 fmulx %fp3,%fp1 | ...S(Q1+S(Q2+S(Q3+SQ4)))
240
241
242 faddx %fp2,%fp0 | ...R+RS(P1+S(P2+SP3))
243
244
245 fadds #0x3F800000,%fp1 | ...1+S(Q1+...)
246
247 fmovel %d1,%fpcr |restore users exceptions
248 fdivx %fp1,%fp0 |last inst - possible exception set
249
250 bra t_frcinx
251
252NODD:
253 fmovex %fp0,%fp1
254 fmulx %fp0,%fp0 | ...S = R*R
255
256 fmoved TANQ4,%fp3
257 fmoved TANP3,%fp2
258
259 fmulx %fp0,%fp3 | ...SQ4
260 fmulx %fp0,%fp2 | ...SP3
261
262 faddd TANQ3,%fp3 | ...Q3+SQ4
263 faddx TANP2,%fp2 | ...P2+SP3
264
265 fmulx %fp0,%fp3 | ...S(Q3+SQ4)
266 fmulx %fp0,%fp2 | ...S(P2+SP3)
267
268 faddx TANQ2,%fp3 | ...Q2+S(Q3+SQ4)
269 faddx TANP1,%fp2 | ...P1+S(P2+SP3)
270
271 fmulx %fp0,%fp3 | ...S(Q2+S(Q3+SQ4))
272 fmulx %fp0,%fp2 | ...S(P1+S(P2+SP3))
273
274 faddx TANQ1,%fp3 | ...Q1+S(Q2+S(Q3+SQ4))
275 fmulx %fp1,%fp2 | ...RS(P1+S(P2+SP3))
276
277 fmulx %fp3,%fp0 | ...S(Q1+S(Q2+S(Q3+SQ4)))
278
279
280 faddx %fp2,%fp1 | ...R+RS(P1+S(P2+SP3))
281 fadds #0x3F800000,%fp0 | ...1+S(Q1+...)
282
283
284 fmovex %fp1,-(%sp)
285 eoril #0x80000000,(%sp)
286
287 fmovel %d1,%fpcr |restore users exceptions
288 fdivx (%sp)+,%fp0 |last inst - possible exception set
289
290 bra t_frcinx
291
292TANBORS:
293|--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
294|--IF |X| < 2**(-40), RETURN X OR 1.
295 cmpil #0x3FFF8000,%d0
296 bgts REDUCEX
297
298TANSM:
299
300 fmovex %fp0,-(%sp)
301 fmovel %d1,%fpcr |restore users exceptions
302 fmovex (%sp)+,%fp0 |last inst - possible exception set
303
304 bra t_frcinx
305
306
307REDUCEX:
308|--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
309|--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
310|--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
311
312 fmovemx %fp2-%fp5,-(%a7) | ...save FP2 through FP5
313 movel %d2,-(%a7)
314 fmoves #0x00000000,%fp1
315
316|--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
317|--there is a danger of unwanted overflow in first LOOP iteration. In this
318|--case, reduce argument by one remainder step to make subsequent reduction
319|--safe.
320 cmpil #0x7ffeffff,%d0 |is argument dangerously large?
321 bnes LOOP
322 movel #0x7ffe0000,FP_SCR2(%a6) |yes
323| ;create 2**16383*PI/2
324 movel #0xc90fdaa2,FP_SCR2+4(%a6)
325 clrl FP_SCR2+8(%a6)
326 ftstx %fp0 |test sign of argument
327 movel #0x7fdc0000,FP_SCR3(%a6) |create low half of 2**16383*
328| ;PI/2 at FP_SCR3
329 movel #0x85a308d3,FP_SCR3+4(%a6)
330 clrl FP_SCR3+8(%a6)
331 fblt red_neg
332 orw #0x8000,FP_SCR2(%a6) |positive arg
333 orw #0x8000,FP_SCR3(%a6)
334red_neg:
335 faddx FP_SCR2(%a6),%fp0 |high part of reduction is exact
336 fmovex %fp0,%fp1 |save high result in fp1
337 faddx FP_SCR3(%a6),%fp0 |low part of reduction
338 fsubx %fp0,%fp1 |determine low component of result
339 faddx FP_SCR3(%a6),%fp1 |fp0/fp1 are reduced argument.
340
341|--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
342|--integer quotient will be stored in N
343|--Intermediate remainder is 66-bit long; (R,r) in (FP0,FP1)
344
345LOOP:
346 fmovex %fp0,INARG(%a6) | ...+-2**K * F, 1 <= F < 2
347 movew INARG(%a6),%d0
348 movel %d0,%a1 | ...save a copy of D0
349 andil #0x00007FFF,%d0
350 subil #0x00003FFF,%d0 | ...D0 IS K
351 cmpil #28,%d0
352 bles LASTLOOP
353CONTLOOP:
354 subil #27,%d0 | ...D0 IS L := K-27
355 movel #0,ENDFLAG(%a6)
356 bras WORK
357LASTLOOP:
358 clrl %d0 | ...D0 IS L := 0
359 movel #1,ENDFLAG(%a6)
360
361WORK:
362|--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
363|--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
364
365|--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
366|--2**L * (PIby2_1), 2**L * (PIby2_2)
367
368 movel #0x00003FFE,%d2 | ...BIASED EXPO OF 2/PI
369 subl %d0,%d2 | ...BIASED EXPO OF 2**(-L)*(2/PI)
370
371 movel #0xA2F9836E,FP_SCR1+4(%a6)
372 movel #0x4E44152A,FP_SCR1+8(%a6)
373 movew %d2,FP_SCR1(%a6) | ...FP_SCR1 is 2**(-L)*(2/PI)
374
375 fmovex %fp0,%fp2
376 fmulx FP_SCR1(%a6),%fp2
377|--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
378|--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
379|--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
380|--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
381|--US THE DESIRED VALUE IN FLOATING POINT.
382
383|--HIDE SIX CYCLES OF INSTRUCTION
384 movel %a1,%d2
385 swap %d2
386 andil #0x80000000,%d2
387 oril #0x5F000000,%d2 | ...D2 IS SIGN(INARG)*2**63 IN SGL
388 movel %d2,TWOTO63(%a6)
389
390 movel %d0,%d2
391 addil #0x00003FFF,%d2 | ...BIASED EXPO OF 2**L * (PI/2)
392
393|--FP2 IS READY
394 fadds TWOTO63(%a6),%fp2 | ...THE FRACTIONAL PART OF FP1 IS ROUNDED
395
396|--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1 and 2**(L)*Piby2_2
397 movew %d2,FP_SCR2(%a6)
398 clrw FP_SCR2+2(%a6)
399 movel #0xC90FDAA2,FP_SCR2+4(%a6)
400 clrl FP_SCR2+8(%a6) | ...FP_SCR2 is 2**(L) * Piby2_1
401
402|--FP2 IS READY
403 fsubs TWOTO63(%a6),%fp2 | ...FP2 is N
404
405 addil #0x00003FDD,%d0
406 movew %d0,FP_SCR3(%a6)
407 clrw FP_SCR3+2(%a6)
408 movel #0x85A308D3,FP_SCR3+4(%a6)
409 clrl FP_SCR3+8(%a6) | ...FP_SCR3 is 2**(L) * Piby2_2
410
411 movel ENDFLAG(%a6),%d0
412
413|--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
414|--P2 = 2**(L) * Piby2_2
415 fmovex %fp2,%fp4
416 fmulx FP_SCR2(%a6),%fp4 | ...W = N*P1
417 fmovex %fp2,%fp5
418 fmulx FP_SCR3(%a6),%fp5 | ...w = N*P2
419 fmovex %fp4,%fp3
420|--we want P+p = W+w but |p| <= half ulp of P
421|--Then, we need to compute A := R-P and a := r-p
422 faddx %fp5,%fp3 | ...FP3 is P
423 fsubx %fp3,%fp4 | ...W-P
424
425 fsubx %fp3,%fp0 | ...FP0 is A := R - P
426 faddx %fp5,%fp4 | ...FP4 is p = (W-P)+w
427
428 fmovex %fp0,%fp3 | ...FP3 A
429 fsubx %fp4,%fp1 | ...FP1 is a := r - p
430
431|--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
432|--|r| <= half ulp of R.
433 faddx %fp1,%fp0 | ...FP0 is R := A+a
434|--No need to calculate r if this is the last loop
435 cmpil #0,%d0
436 bgt RESTORE
437
438|--Need to calculate r
439 fsubx %fp0,%fp3 | ...A-R
440 faddx %fp3,%fp1 | ...FP1 is r := (A-R)+a
441 bra LOOP
442
443RESTORE:
444 fmovel %fp2,N(%a6)
445 movel (%a7)+,%d2
446 fmovemx (%a7)+,%fp2-%fp5
447
448
449 movel N(%a6),%d0
450 rorl #1,%d0
451
452
453 bra TANCONT
454
455 |end
diff --git a/arch/m68k/fpsp040/stanh.S b/arch/m68k/fpsp040/stanh.S
new file mode 100644
index 000000000000..33b009802243
--- /dev/null
+++ b/arch/m68k/fpsp040/stanh.S
@@ -0,0 +1,185 @@
1|
2| stanh.sa 3.1 12/10/90
3|
4| The entry point sTanh computes the hyperbolic tangent of
5| an input argument; sTanhd does the same except for denormalized
6| input.
7|
8| Input: Double-extended number X in location pointed to
9| by address register a0.
10|
11| Output: The value tanh(X) returned in floating-point register Fp0.
12|
13| Accuracy and Monotonicity: The returned result is within 3 ulps in
14| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
15| result is subsequently rounded to double precision. The
16| result is provably monotonic in double precision.
17|
18| Speed: The program stanh takes approximately 270 cycles.
19|
20| Algorithm:
21|
22| TANH
23| 1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3.
24|
25| 2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by
26| sgn := sign(X), y := 2|X|, z := expm1(Y), and
27| tanh(X) = sgn*( z/(2+z) ).
28| Exit.
29|
30| 3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1,
31| go to 7.
32|
33| 4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6.
34|
35| 5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by
36| sgn := sign(X), y := 2|X|, z := exp(Y),
37| tanh(X) = sgn - [ sgn*2/(1+z) ].
38| Exit.
39|
40| 6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we
41| calculate Tanh(X) by
42| sgn := sign(X), Tiny := 2**(-126),
43| tanh(X) := sgn - sgn*Tiny.
44| Exit.
45|
46| 7. (|X| < 2**(-40)). Tanh(X) = X. Exit.
47|
48
49| Copyright (C) Motorola, Inc. 1990
50| All Rights Reserved
51|
52| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
53| The copyright notice above does not evidence any
54| actual or intended publication of such source code.
55
56|STANH idnt 2,1 | Motorola 040 Floating Point Software Package
57
58 |section 8
59
60#include "fpsp.h"
61
62 .set X,FP_SCR5
63 .set XDCARE,X+2
64 .set XFRAC,X+4
65
66 .set SGN,L_SCR3
67
68 .set V,FP_SCR6
69
70BOUNDS1: .long 0x3FD78000,0x3FFFDDCE | ... 2^(-40), (5/2)LOG2
71
72 |xref t_frcinx
73 |xref t_extdnrm
74 |xref setox
75 |xref setoxm1
76
77 .global stanhd
78stanhd:
79|--TANH(X) = X FOR DENORMALIZED X
80
81 bra t_extdnrm
82
83 .global stanh
84stanh:
85 fmovex (%a0),%fp0 | ...LOAD INPUT
86
87 fmovex %fp0,X(%a6)
88 movel (%a0),%d0
89 movew 4(%a0),%d0
90 movel %d0,X(%a6)
91 andl #0x7FFFFFFF,%d0
92 cmp2l BOUNDS1(%pc),%d0 | ...2**(-40) < |X| < (5/2)LOG2 ?
93 bcss TANHBORS
94
95|--THIS IS THE USUAL CASE
96|--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).
97
98 movel X(%a6),%d0
99 movel %d0,SGN(%a6)
100 andl #0x7FFF0000,%d0
101 addl #0x00010000,%d0 | ...EXPONENT OF 2|X|
102 movel %d0,X(%a6)
103 andl #0x80000000,SGN(%a6)
104 fmovex X(%a6),%fp0 | ...FP0 IS Y = 2|X|
105
106 movel %d1,-(%a7)
107 clrl %d1
108 fmovemx %fp0-%fp0,(%a0)
109 bsr setoxm1 | ...FP0 IS Z = EXPM1(Y)
110 movel (%a7)+,%d1
111
112 fmovex %fp0,%fp1
113 fadds #0x40000000,%fp1 | ...Z+2
114 movel SGN(%a6),%d0
115 fmovex %fp1,V(%a6)
116 eorl %d0,V(%a6)
117
118 fmovel %d1,%FPCR |restore users exceptions
119 fdivx V(%a6),%fp0
120 bra t_frcinx
121
122TANHBORS:
123 cmpl #0x3FFF8000,%d0
124 blt TANHSM
125
126 cmpl #0x40048AA1,%d0
127 bgt TANHHUGE
128
129|-- (5/2) LOG2 < |X| < 50 LOG2,
130|--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X),
131|--TANH(X) = SGN - SGN*2/[EXP(Y)+1].
132
133 movel X(%a6),%d0
134 movel %d0,SGN(%a6)
135 andl #0x7FFF0000,%d0
136 addl #0x00010000,%d0 | ...EXPO OF 2|X|
137 movel %d0,X(%a6) | ...Y = 2|X|
138 andl #0x80000000,SGN(%a6)
139 movel SGN(%a6),%d0
140 fmovex X(%a6),%fp0 | ...Y = 2|X|
141
142 movel %d1,-(%a7)
143 clrl %d1
144 fmovemx %fp0-%fp0,(%a0)
145 bsr setox | ...FP0 IS EXP(Y)
146 movel (%a7)+,%d1
147 movel SGN(%a6),%d0
148 fadds #0x3F800000,%fp0 | ...EXP(Y)+1
149
150 eorl #0xC0000000,%d0 | ...-SIGN(X)*2
151 fmoves %d0,%fp1 | ...-SIGN(X)*2 IN SGL FMT
152 fdivx %fp0,%fp1 | ...-SIGN(X)2 / [EXP(Y)+1 ]
153
154 movel SGN(%a6),%d0
155 orl #0x3F800000,%d0 | ...SGN
156 fmoves %d0,%fp0 | ...SGN IN SGL FMT
157
158 fmovel %d1,%FPCR |restore users exceptions
159 faddx %fp1,%fp0
160
161 bra t_frcinx
162
163TANHSM:
164 movew #0x0000,XDCARE(%a6)
165
166 fmovel %d1,%FPCR |restore users exceptions
167 fmovex X(%a6),%fp0 |last inst - possible exception set
168
169 bra t_frcinx
170
171TANHHUGE:
172|---RETURN SGN(X) - SGN(X)EPS
173 movel X(%a6),%d0
174 andl #0x80000000,%d0
175 orl #0x3F800000,%d0
176 fmoves %d0,%fp0
177 andl #0x80000000,%d0
178 eorl #0x80800000,%d0 | ...-SIGN(X)*EPS
179
180 fmovel %d1,%FPCR |restore users exceptions
181 fadds %d0,%fp0
182
183 bra t_frcinx
184
185 |end
diff --git a/arch/m68k/fpsp040/sto_res.S b/arch/m68k/fpsp040/sto_res.S
new file mode 100644
index 000000000000..0cdca3b060ad
--- /dev/null
+++ b/arch/m68k/fpsp040/sto_res.S
@@ -0,0 +1,98 @@
1|
2| sto_res.sa 3.1 12/10/90
3|
4| Takes the result and puts it in where the user expects it.
5| Library functions return result in fp0. If fp0 is not the
6| users destination register then fp0 is moved to the
7| correct floating-point destination register. fp0 and fp1
8| are then restored to the original contents.
9|
10| Input: result in fp0,fp1
11|
12| d2 & a0 should be kept unmodified
13|
14| Output: moves the result to the true destination reg or mem
15|
16| Modifies: destination floating point register
17|
18
19| Copyright (C) Motorola, Inc. 1990
20| All Rights Reserved
21|
22| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
23| The copyright notice above does not evidence any
24| actual or intended publication of such source code.
25
26STO_RES: |idnt 2,1 | Motorola 040 Floating Point Software Package
27
28
29 |section 8
30
31#include "fpsp.h"
32
33 .global sto_cos
34sto_cos:
35 bfextu CMDREG1B(%a6){#13:#3},%d0 |extract cos destination
36 cmpib #3,%d0 |check for fp0/fp1 cases
37 bles c_fp0123
38 fmovemx %fp1-%fp1,-(%a7)
39 moveql #7,%d1
40 subl %d0,%d1 |d1 = 7- (dest. reg. no.)
41 clrl %d0
42 bsetl %d1,%d0 |d0 is dynamic register mask
43 fmovemx (%a7)+,%d0
44 rts
45c_fp0123:
46 cmpib #0,%d0
47 beqs c_is_fp0
48 cmpib #1,%d0
49 beqs c_is_fp1
50 cmpib #2,%d0
51 beqs c_is_fp2
52c_is_fp3:
53 fmovemx %fp1-%fp1,USER_FP3(%a6)
54 rts
55c_is_fp2:
56 fmovemx %fp1-%fp1,USER_FP2(%a6)
57 rts
58c_is_fp1:
59 fmovemx %fp1-%fp1,USER_FP1(%a6)
60 rts
61c_is_fp0:
62 fmovemx %fp1-%fp1,USER_FP0(%a6)
63 rts
64
65
66 .global sto_res
67sto_res:
68 bfextu CMDREG1B(%a6){#6:#3},%d0 |extract destination register
69 cmpib #3,%d0 |check for fp0/fp1 cases
70 bles fp0123
71 fmovemx %fp0-%fp0,-(%a7)
72 moveql #7,%d1
73 subl %d0,%d1 |d1 = 7- (dest. reg. no.)
74 clrl %d0
75 bsetl %d1,%d0 |d0 is dynamic register mask
76 fmovemx (%a7)+,%d0
77 rts
78fp0123:
79 cmpib #0,%d0
80 beqs is_fp0
81 cmpib #1,%d0
82 beqs is_fp1
83 cmpib #2,%d0
84 beqs is_fp2
85is_fp3:
86 fmovemx %fp0-%fp0,USER_FP3(%a6)
87 rts
88is_fp2:
89 fmovemx %fp0-%fp0,USER_FP2(%a6)
90 rts
91is_fp1:
92 fmovemx %fp0-%fp0,USER_FP1(%a6)
93 rts
94is_fp0:
95 fmovemx %fp0-%fp0,USER_FP0(%a6)
96 rts
97
98 |end
diff --git a/arch/m68k/fpsp040/stwotox.S b/arch/m68k/fpsp040/stwotox.S
new file mode 100644
index 000000000000..4e3c1407d3df
--- /dev/null
+++ b/arch/m68k/fpsp040/stwotox.S
@@ -0,0 +1,427 @@
1|
2| stwotox.sa 3.1 12/10/90
3|
4| stwotox --- 2**X
5| stwotoxd --- 2**X for denormalized X
6| stentox --- 10**X
7| stentoxd --- 10**X for denormalized X
8|
9| Input: Double-extended number X in location pointed to
10| by address register a0.
11|
12| Output: The function values are returned in Fp0.
13|
14| Accuracy and Monotonicity: The returned result is within 2 ulps in
15| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
16| result is subsequently rounded to double precision. The
17| result is provably monotonic in double precision.
18|
19| Speed: The program stwotox takes approximately 190 cycles and the
20| program stentox takes approximately 200 cycles.
21|
22| Algorithm:
23|
24| twotox
25| 1. If |X| > 16480, go to ExpBig.
26|
27| 2. If |X| < 2**(-70), go to ExpSm.
28|
29| 3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore
30| decompose N as
31| N = 64(M + M') + j, j = 0,1,2,...,63.
32|
33| 4. Overwrite r := r * log2. Then
34| 2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
35| Go to expr to compute that expression.
36|
37| tentox
38| 1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig.
39|
40| 2. If |X| < 2**(-70), go to ExpSm.
41|
42| 3. Set y := X*log_2(10)*64 (base 2 log of 10). Set
43| N := round-to-int(y). Decompose N as
44| N = 64(M + M') + j, j = 0,1,2,...,63.
45|
46| 4. Define r as
47| r := ((X - N*L1)-N*L2) * L10
48| where L1, L2 are the leading and trailing parts of log_10(2)/64
49| and L10 is the natural log of 10. Then
50| 10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
51| Go to expr to compute that expression.
52|
53| expr
54| 1. Fetch 2**(j/64) from table as Fact1 and Fact2.
55|
56| 2. Overwrite Fact1 and Fact2 by
57| Fact1 := 2**(M) * Fact1
58| Fact2 := 2**(M) * Fact2
59| Thus Fact1 + Fact2 = 2**(M) * 2**(j/64).
60|
61| 3. Calculate P where 1 + P approximates exp(r):
62| P = r + r*r*(A1+r*(A2+...+r*A5)).
63|
64| 4. Let AdjFact := 2**(M'). Return
65| AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ).
66| Exit.
67|
68| ExpBig
69| 1. Generate overflow by Huge * Huge if X > 0; otherwise, generate
70| underflow by Tiny * Tiny.
71|
72| ExpSm
73| 1. Return 1 + X.
74|
75
76| Copyright (C) Motorola, Inc. 1990
77| All Rights Reserved
78|
79| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
80| The copyright notice above does not evidence any
81| actual or intended publication of such source code.
82
83|STWOTOX idnt 2,1 | Motorola 040 Floating Point Software Package
84
85 |section 8
86
87#include "fpsp.h"
88
89BOUNDS1: .long 0x3FB98000,0x400D80C0 | ... 2^(-70),16480
90BOUNDS2: .long 0x3FB98000,0x400B9B07 | ... 2^(-70),16480 LOG2/LOG10
91
92L2TEN64: .long 0x406A934F,0x0979A371 | ... 64LOG10/LOG2
93L10TWO1: .long 0x3F734413,0x509F8000 | ... LOG2/64LOG10
94
95L10TWO2: .long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
96
97LOG10: .long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
98
99LOG2: .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
100
101EXPA5: .long 0x3F56C16D,0x6F7BD0B2
102EXPA4: .long 0x3F811112,0x302C712C
103EXPA3: .long 0x3FA55555,0x55554CC1
104EXPA2: .long 0x3FC55555,0x55554A54
105EXPA1: .long 0x3FE00000,0x00000000,0x00000000,0x00000000
106
107HUGE: .long 0x7FFE0000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
108TINY: .long 0x00010000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
109
110EXPTBL:
111 .long 0x3FFF0000,0x80000000,0x00000000,0x3F738000
112 .long 0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
113 .long 0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
114 .long 0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
115 .long 0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
116 .long 0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
117 .long 0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
118 .long 0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
119 .long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
120 .long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
121 .long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
122 .long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
123 .long 0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
124 .long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
125 .long 0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
126 .long 0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
127 .long 0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
128 .long 0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
129 .long 0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
130 .long 0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
131 .long 0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
132 .long 0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
133 .long 0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
134 .long 0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
135 .long 0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
136 .long 0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
137 .long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
138 .long 0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
139 .long 0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
140 .long 0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
141 .long 0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
142 .long 0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
143 .long 0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
144 .long 0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
145 .long 0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
146 .long 0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
147 .long 0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
148 .long 0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
149 .long 0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
150 .long 0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
151 .long 0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
152 .long 0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
153 .long 0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
154 .long 0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
155 .long 0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
156 .long 0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
157 .long 0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
158 .long 0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
159 .long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
160 .long 0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
161 .long 0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
162 .long 0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
163 .long 0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
164 .long 0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
165 .long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
166 .long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
167 .long 0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
168 .long 0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
169 .long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
170 .long 0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
171 .long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
172 .long 0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
173 .long 0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
174 .long 0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
175
176 .set N,L_SCR1
177
178 .set X,FP_SCR1
179 .set XDCARE,X+2
180 .set XFRAC,X+4
181
182 .set ADJFACT,FP_SCR2
183
184 .set FACT1,FP_SCR3
185 .set FACT1HI,FACT1+4
186 .set FACT1LOW,FACT1+8
187
188 .set FACT2,FP_SCR4
189 .set FACT2HI,FACT2+4
190 .set FACT2LOW,FACT2+8
191
192 | xref t_unfl
193 |xref t_ovfl
194 |xref t_frcinx
195
196 .global stwotoxd
197stwotoxd:
198|--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
199
200 fmovel %d1,%fpcr | ...set user's rounding mode/precision
201 fmoves #0x3F800000,%fp0 | ...RETURN 1 + X
202 movel (%a0),%d0
203 orl #0x00800001,%d0
204 fadds %d0,%fp0
205 bra t_frcinx
206
207 .global stwotox
208stwotox:
209|--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
210 fmovemx (%a0),%fp0-%fp0 | ...LOAD INPUT, do not set cc's
211
212 movel (%a0),%d0
213 movew 4(%a0),%d0
214 fmovex %fp0,X(%a6)
215 andil #0x7FFFFFFF,%d0
216
217 cmpil #0x3FB98000,%d0 | ...|X| >= 2**(-70)?
218 bges TWOOK1
219 bra EXPBORS
220
221TWOOK1:
222 cmpil #0x400D80C0,%d0 | ...|X| > 16480?
223 bles TWOMAIN
224 bra EXPBORS
225
226
227TWOMAIN:
228|--USUAL CASE, 2^(-70) <= |X| <= 16480
229
230 fmovex %fp0,%fp1
231 fmuls #0x42800000,%fp1 | ...64 * X
232
233 fmovel %fp1,N(%a6) | ...N = ROUND-TO-INT(64 X)
234 movel %d2,-(%sp)
235 lea EXPTBL,%a1 | ...LOAD ADDRESS OF TABLE OF 2^(J/64)
236 fmovel N(%a6),%fp1 | ...N --> FLOATING FMT
237 movel N(%a6),%d0
238 movel %d0,%d2
239 andil #0x3F,%d0 | ...D0 IS J
240 asll #4,%d0 | ...DISPLACEMENT FOR 2^(J/64)
241 addal %d0,%a1 | ...ADDRESS FOR 2^(J/64)
242 asrl #6,%d2 | ...d2 IS L, N = 64L + J
243 movel %d2,%d0
244 asrl #1,%d0 | ...D0 IS M
245 subl %d0,%d2 | ...d2 IS M', N = 64(M+M') + J
246 addil #0x3FFF,%d2
247 movew %d2,ADJFACT(%a6) | ...ADJFACT IS 2^(M')
248 movel (%sp)+,%d2
249|--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
250|--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
251|--ADJFACT = 2^(M').
252|--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
253
254 fmuls #0x3C800000,%fp1 | ...(1/64)*N
255 movel (%a1)+,FACT1(%a6)
256 movel (%a1)+,FACT1HI(%a6)
257 movel (%a1)+,FACT1LOW(%a6)
258 movew (%a1)+,FACT2(%a6)
259 clrw FACT2+2(%a6)
260
261 fsubx %fp1,%fp0 | ...X - (1/64)*INT(64 X)
262
263 movew (%a1)+,FACT2HI(%a6)
264 clrw FACT2HI+2(%a6)
265 clrl FACT2LOW(%a6)
266 addw %d0,FACT1(%a6)
267
268 fmulx LOG2,%fp0 | ...FP0 IS R
269 addw %d0,FACT2(%a6)
270
271 bra expr
272
273EXPBORS:
274|--FPCR, D0 SAVED
275 cmpil #0x3FFF8000,%d0
276 bgts EXPBIG
277
278EXPSM:
279|--|X| IS SMALL, RETURN 1 + X
280
281 fmovel %d1,%FPCR |restore users exceptions
282 fadds #0x3F800000,%fp0 | ...RETURN 1 + X
283
284 bra t_frcinx
285
286EXPBIG:
287|--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
288|--REGISTERS SAVE SO FAR ARE FPCR AND D0
289 movel X(%a6),%d0
290 cmpil #0,%d0
291 blts EXPNEG
292
293 bclrb #7,(%a0) |t_ovfl expects positive value
294 bra t_ovfl
295
296EXPNEG:
297 bclrb #7,(%a0) |t_unfl expects positive value
298 bra t_unfl
299
300 .global stentoxd
301stentoxd:
302|--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
303
304 fmovel %d1,%fpcr | ...set user's rounding mode/precision
305 fmoves #0x3F800000,%fp0 | ...RETURN 1 + X
306 movel (%a0),%d0
307 orl #0x00800001,%d0
308 fadds %d0,%fp0
309 bra t_frcinx
310
311 .global stentox
312stentox:
313|--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
314 fmovemx (%a0),%fp0-%fp0 | ...LOAD INPUT, do not set cc's
315
316 movel (%a0),%d0
317 movew 4(%a0),%d0
318 fmovex %fp0,X(%a6)
319 andil #0x7FFFFFFF,%d0
320
321 cmpil #0x3FB98000,%d0 | ...|X| >= 2**(-70)?
322 bges TENOK1
323 bra EXPBORS
324
325TENOK1:
326 cmpil #0x400B9B07,%d0 | ...|X| <= 16480*log2/log10 ?
327 bles TENMAIN
328 bra EXPBORS
329
330TENMAIN:
331|--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
332
333 fmovex %fp0,%fp1
334 fmuld L2TEN64,%fp1 | ...X*64*LOG10/LOG2
335
336 fmovel %fp1,N(%a6) | ...N=INT(X*64*LOG10/LOG2)
337 movel %d2,-(%sp)
338 lea EXPTBL,%a1 | ...LOAD ADDRESS OF TABLE OF 2^(J/64)
339 fmovel N(%a6),%fp1 | ...N --> FLOATING FMT
340 movel N(%a6),%d0
341 movel %d0,%d2
342 andil #0x3F,%d0 | ...D0 IS J
343 asll #4,%d0 | ...DISPLACEMENT FOR 2^(J/64)
344 addal %d0,%a1 | ...ADDRESS FOR 2^(J/64)
345 asrl #6,%d2 | ...d2 IS L, N = 64L + J
346 movel %d2,%d0
347 asrl #1,%d0 | ...D0 IS M
348 subl %d0,%d2 | ...d2 IS M', N = 64(M+M') + J
349 addil #0x3FFF,%d2
350 movew %d2,ADJFACT(%a6) | ...ADJFACT IS 2^(M')
351 movel (%sp)+,%d2
352
353|--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
354|--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
355|--ADJFACT = 2^(M').
356|--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
357
358 fmovex %fp1,%fp2
359
360 fmuld L10TWO1,%fp1 | ...N*(LOG2/64LOG10)_LEAD
361 movel (%a1)+,FACT1(%a6)
362
363 fmulx L10TWO2,%fp2 | ...N*(LOG2/64LOG10)_TRAIL
364
365 movel (%a1)+,FACT1HI(%a6)
366 movel (%a1)+,FACT1LOW(%a6)
367 fsubx %fp1,%fp0 | ...X - N L_LEAD
368 movew (%a1)+,FACT2(%a6)
369
370 fsubx %fp2,%fp0 | ...X - N L_TRAIL
371
372 clrw FACT2+2(%a6)
373 movew (%a1)+,FACT2HI(%a6)
374 clrw FACT2HI+2(%a6)
375 clrl FACT2LOW(%a6)
376
377 fmulx LOG10,%fp0 | ...FP0 IS R
378
379 addw %d0,FACT1(%a6)
380 addw %d0,FACT2(%a6)
381
382expr:
383|--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
384|--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
385|--FP0 IS R. THE FOLLOWING CODE COMPUTES
386|-- 2**(M'+M) * 2**(J/64) * EXP(R)
387
388 fmovex %fp0,%fp1
389 fmulx %fp1,%fp1 | ...FP1 IS S = R*R
390
391 fmoved EXPA5,%fp2 | ...FP2 IS A5
392 fmoved EXPA4,%fp3 | ...FP3 IS A4
393
394 fmulx %fp1,%fp2 | ...FP2 IS S*A5
395 fmulx %fp1,%fp3 | ...FP3 IS S*A4
396
397 faddd EXPA3,%fp2 | ...FP2 IS A3+S*A5
398 faddd EXPA2,%fp3 | ...FP3 IS A2+S*A4
399
400 fmulx %fp1,%fp2 | ...FP2 IS S*(A3+S*A5)
401 fmulx %fp1,%fp3 | ...FP3 IS S*(A2+S*A4)
402
403 faddd EXPA1,%fp2 | ...FP2 IS A1+S*(A3+S*A5)
404 fmulx %fp0,%fp3 | ...FP3 IS R*S*(A2+S*A4)
405
406 fmulx %fp1,%fp2 | ...FP2 IS S*(A1+S*(A3+S*A5))
407 faddx %fp3,%fp0 | ...FP0 IS R+R*S*(A2+S*A4)
408
409 faddx %fp2,%fp0 | ...FP0 IS EXP(R) - 1
410
411
412|--FINAL RECONSTRUCTION PROCESS
413|--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1) - (1 OR 0)
414
415 fmulx FACT1(%a6),%fp0
416 faddx FACT2(%a6),%fp0
417 faddx FACT1(%a6),%fp0
418
419 fmovel %d1,%FPCR |restore users exceptions
420 clrw ADJFACT+2(%a6)
421 movel #0x80000000,ADJFACT+4(%a6)
422 clrl ADJFACT+8(%a6)
423 fmulx ADJFACT(%a6),%fp0 | ...FINAL ADJUSTMENT
424
425 bra t_frcinx
426
427 |end
diff --git a/arch/m68k/fpsp040/tbldo.S b/arch/m68k/fpsp040/tbldo.S
new file mode 100644
index 000000000000..fe60cf4d20d7
--- /dev/null
+++ b/arch/m68k/fpsp040/tbldo.S
@@ -0,0 +1,554 @@
1|
2| tbldo.sa 3.1 12/10/90
3|
4| Modified:
5| 8/16/90 chinds The table was constructed to use only one level
6| of indirection in do_func for monadic
7| functions. Dyadic functions require two
8| levels, and the tables are still contained
9| in do_func. The table is arranged for
10| index with a 10-bit index, with the first
11| 7 bits the opcode, and the remaining 3
12| the stag. For dyadic functions, all
13| valid addresses are to the generic entry
14| point.
15|
16
17| Copyright (C) Motorola, Inc. 1990
18| All Rights Reserved
19|
20| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
21| The copyright notice above does not evidence any
22| actual or intended publication of such source code.
23
24|TBLDO idnt 2,1 | Motorola 040 Floating Point Software Package
25
26 |section 8
27
28 |xref ld_pinf,ld_pone,ld_ppi2
29 |xref t_dz2,t_operr
30 |xref serror,sone,szero,sinf,snzrinx
31 |xref sopr_inf,spi_2,src_nan,szr_inf
32
33 |xref smovcr
34 |xref pmod,prem,pscale
35 |xref satanh,satanhd
36 |xref sacos,sacosd,sasin,sasind,satan,satand
37 |xref setox,setoxd,setoxm1,setoxm1d,setoxm1i
38 |xref sgetexp,sgetexpd,sgetman,sgetmand
39 |xref sint,sintd,sintrz
40 |xref ssincos,ssincosd,ssincosi,ssincosnan,ssincosz
41 |xref scos,scosd,ssin,ssind,stan,stand
42 |xref scosh,scoshd,ssinh,ssinhd,stanh,stanhd
43 |xref sslog10,sslog2,sslogn,sslognp1
44 |xref sslog10d,sslog2d,sslognd,slognp1d
45 |xref stentox,stentoxd,stwotox,stwotoxd
46
47| instruction ;opcode-stag Notes
48 .global tblpre
49tblpre:
50 .long smovcr |$00-0 fmovecr all
51 .long smovcr |$00-1 fmovecr all
52 .long smovcr |$00-2 fmovecr all
53 .long smovcr |$00-3 fmovecr all
54 .long smovcr |$00-4 fmovecr all
55 .long smovcr |$00-5 fmovecr all
56 .long smovcr |$00-6 fmovecr all
57 .long smovcr |$00-7 fmovecr all
58
59 .long sint |$01-0 fint norm
60 .long szero |$01-1 fint zero
61 .long sinf |$01-2 fint inf
62 .long src_nan |$01-3 fint nan
63 .long sintd |$01-4 fint denorm inx
64 .long serror |$01-5 fint ERROR
65 .long serror |$01-6 fint ERROR
66 .long serror |$01-7 fint ERROR
67
68 .long ssinh |$02-0 fsinh norm
69 .long szero |$02-1 fsinh zero
70 .long sinf |$02-2 fsinh inf
71 .long src_nan |$02-3 fsinh nan
72 .long ssinhd |$02-4 fsinh denorm
73 .long serror |$02-5 fsinh ERROR
74 .long serror |$02-6 fsinh ERROR
75 .long serror |$02-7 fsinh ERROR
76
77 .long sintrz |$03-0 fintrz norm
78 .long szero |$03-1 fintrz zero
79 .long sinf |$03-2 fintrz inf
80 .long src_nan |$03-3 fintrz nan
81 .long snzrinx |$03-4 fintrz denorm inx
82 .long serror |$03-5 fintrz ERROR
83 .long serror |$03-6 fintrz ERROR
84 .long serror |$03-7 fintrz ERROR
85
86 .long serror |$04-0 ERROR - illegal extension
87 .long serror |$04-1 ERROR - illegal extension
88 .long serror |$04-2 ERROR - illegal extension
89 .long serror |$04-3 ERROR - illegal extension
90 .long serror |$04-4 ERROR - illegal extension
91 .long serror |$04-5 ERROR - illegal extension
92 .long serror |$04-6 ERROR - illegal extension
93 .long serror |$04-7 ERROR - illegal extension
94
95 .long serror |$05-0 ERROR - illegal extension
96 .long serror |$05-1 ERROR - illegal extension
97 .long serror |$05-2 ERROR - illegal extension
98 .long serror |$05-3 ERROR - illegal extension
99 .long serror |$05-4 ERROR - illegal extension
100 .long serror |$05-5 ERROR - illegal extension
101 .long serror |$05-6 ERROR - illegal extension
102 .long serror |$05-7 ERROR - illegal extension
103
104 .long sslognp1 |$06-0 flognp1 norm
105 .long szero |$06-1 flognp1 zero
106 .long sopr_inf |$06-2 flognp1 inf
107 .long src_nan |$06-3 flognp1 nan
108 .long slognp1d |$06-4 flognp1 denorm
109 .long serror |$06-5 flognp1 ERROR
110 .long serror |$06-6 flognp1 ERROR
111 .long serror |$06-7 flognp1 ERROR
112
113 .long serror |$07-0 ERROR - illegal extension
114 .long serror |$07-1 ERROR - illegal extension
115 .long serror |$07-2 ERROR - illegal extension
116 .long serror |$07-3 ERROR - illegal extension
117 .long serror |$07-4 ERROR - illegal extension
118 .long serror |$07-5 ERROR - illegal extension
119 .long serror |$07-6 ERROR - illegal extension
120 .long serror |$07-7 ERROR - illegal extension
121
122 .long setoxm1 |$08-0 fetoxm1 norm
123 .long szero |$08-1 fetoxm1 zero
124 .long setoxm1i |$08-2 fetoxm1 inf
125 .long src_nan |$08-3 fetoxm1 nan
126 .long setoxm1d |$08-4 fetoxm1 denorm
127 .long serror |$08-5 fetoxm1 ERROR
128 .long serror |$08-6 fetoxm1 ERROR
129 .long serror |$08-7 fetoxm1 ERROR
130
131 .long stanh |$09-0 ftanh norm
132 .long szero |$09-1 ftanh zero
133 .long sone |$09-2 ftanh inf
134 .long src_nan |$09-3 ftanh nan
135 .long stanhd |$09-4 ftanh denorm
136 .long serror |$09-5 ftanh ERROR
137 .long serror |$09-6 ftanh ERROR
138 .long serror |$09-7 ftanh ERROR
139
140 .long satan |$0a-0 fatan norm
141 .long szero |$0a-1 fatan zero
142 .long spi_2 |$0a-2 fatan inf
143 .long src_nan |$0a-3 fatan nan
144 .long satand |$0a-4 fatan denorm
145 .long serror |$0a-5 fatan ERROR
146 .long serror |$0a-6 fatan ERROR
147 .long serror |$0a-7 fatan ERROR
148
149 .long serror |$0b-0 ERROR - illegal extension
150 .long serror |$0b-1 ERROR - illegal extension
151 .long serror |$0b-2 ERROR - illegal extension
152 .long serror |$0b-3 ERROR - illegal extension
153 .long serror |$0b-4 ERROR - illegal extension
154 .long serror |$0b-5 ERROR - illegal extension
155 .long serror |$0b-6 ERROR - illegal extension
156 .long serror |$0b-7 ERROR - illegal extension
157
158 .long sasin |$0c-0 fasin norm
159 .long szero |$0c-1 fasin zero
160 .long t_operr |$0c-2 fasin inf
161 .long src_nan |$0c-3 fasin nan
162 .long sasind |$0c-4 fasin denorm
163 .long serror |$0c-5 fasin ERROR
164 .long serror |$0c-6 fasin ERROR
165 .long serror |$0c-7 fasin ERROR
166
167 .long satanh |$0d-0 fatanh norm
168 .long szero |$0d-1 fatanh zero
169 .long t_operr |$0d-2 fatanh inf
170 .long src_nan |$0d-3 fatanh nan
171 .long satanhd |$0d-4 fatanh denorm
172 .long serror |$0d-5 fatanh ERROR
173 .long serror |$0d-6 fatanh ERROR
174 .long serror |$0d-7 fatanh ERROR
175
176 .long ssin |$0e-0 fsin norm
177 .long szero |$0e-1 fsin zero
178 .long t_operr |$0e-2 fsin inf
179 .long src_nan |$0e-3 fsin nan
180 .long ssind |$0e-4 fsin denorm
181 .long serror |$0e-5 fsin ERROR
182 .long serror |$0e-6 fsin ERROR
183 .long serror |$0e-7 fsin ERROR
184
185 .long stan |$0f-0 ftan norm
186 .long szero |$0f-1 ftan zero
187 .long t_operr |$0f-2 ftan inf
188 .long src_nan |$0f-3 ftan nan
189 .long stand |$0f-4 ftan denorm
190 .long serror |$0f-5 ftan ERROR
191 .long serror |$0f-6 ftan ERROR
192 .long serror |$0f-7 ftan ERROR
193
194 .long setox |$10-0 fetox norm
195 .long ld_pone |$10-1 fetox zero
196 .long szr_inf |$10-2 fetox inf
197 .long src_nan |$10-3 fetox nan
198 .long setoxd |$10-4 fetox denorm
199 .long serror |$10-5 fetox ERROR
200 .long serror |$10-6 fetox ERROR
201 .long serror |$10-7 fetox ERROR
202
203 .long stwotox |$11-0 ftwotox norm
204 .long ld_pone |$11-1 ftwotox zero
205 .long szr_inf |$11-2 ftwotox inf
206 .long src_nan |$11-3 ftwotox nan
207 .long stwotoxd |$11-4 ftwotox denorm
208 .long serror |$11-5 ftwotox ERROR
209 .long serror |$11-6 ftwotox ERROR
210 .long serror |$11-7 ftwotox ERROR
211
212 .long stentox |$12-0 ftentox norm
213 .long ld_pone |$12-1 ftentox zero
214 .long szr_inf |$12-2 ftentox inf
215 .long src_nan |$12-3 ftentox nan
216 .long stentoxd |$12-4 ftentox denorm
217 .long serror |$12-5 ftentox ERROR
218 .long serror |$12-6 ftentox ERROR
219 .long serror |$12-7 ftentox ERROR
220
221 .long serror |$13-0 ERROR - illegal extension
222 .long serror |$13-1 ERROR - illegal extension
223 .long serror |$13-2 ERROR - illegal extension
224 .long serror |$13-3 ERROR - illegal extension
225 .long serror |$13-4 ERROR - illegal extension
226 .long serror |$13-5 ERROR - illegal extension
227 .long serror |$13-6 ERROR - illegal extension
228 .long serror |$13-7 ERROR - illegal extension
229
230 .long sslogn |$14-0 flogn norm
231 .long t_dz2 |$14-1 flogn zero
232 .long sopr_inf |$14-2 flogn inf
233 .long src_nan |$14-3 flogn nan
234 .long sslognd |$14-4 flogn denorm
235 .long serror |$14-5 flogn ERROR
236 .long serror |$14-6 flogn ERROR
237 .long serror |$14-7 flogn ERROR
238
239 .long sslog10 |$15-0 flog10 norm
240 .long t_dz2 |$15-1 flog10 zero
241 .long sopr_inf |$15-2 flog10 inf
242 .long src_nan |$15-3 flog10 nan
243 .long sslog10d |$15-4 flog10 denorm
244 .long serror |$15-5 flog10 ERROR
245 .long serror |$15-6 flog10 ERROR
246 .long serror |$15-7 flog10 ERROR
247
248 .long sslog2 |$16-0 flog2 norm
249 .long t_dz2 |$16-1 flog2 zero
250 .long sopr_inf |$16-2 flog2 inf
251 .long src_nan |$16-3 flog2 nan
252 .long sslog2d |$16-4 flog2 denorm
253 .long serror |$16-5 flog2 ERROR
254 .long serror |$16-6 flog2 ERROR
255 .long serror |$16-7 flog2 ERROR
256
257 .long serror |$17-0 ERROR - illegal extension
258 .long serror |$17-1 ERROR - illegal extension
259 .long serror |$17-2 ERROR - illegal extension
260 .long serror |$17-3 ERROR - illegal extension
261 .long serror |$17-4 ERROR - illegal extension
262 .long serror |$17-5 ERROR - illegal extension
263 .long serror |$17-6 ERROR - illegal extension
264 .long serror |$17-7 ERROR - illegal extension
265
266 .long serror |$18-0 ERROR - illegal extension
267 .long serror |$18-1 ERROR - illegal extension
268 .long serror |$18-2 ERROR - illegal extension
269 .long serror |$18-3 ERROR - illegal extension
270 .long serror |$18-4 ERROR - illegal extension
271 .long serror |$18-5 ERROR - illegal extension
272 .long serror |$18-6 ERROR - illegal extension
273 .long serror |$18-7 ERROR - illegal extension
274
275 .long scosh |$19-0 fcosh norm
276 .long ld_pone |$19-1 fcosh zero
277 .long ld_pinf |$19-2 fcosh inf
278 .long src_nan |$19-3 fcosh nan
279 .long scoshd |$19-4 fcosh denorm
280 .long serror |$19-5 fcosh ERROR
281 .long serror |$19-6 fcosh ERROR
282 .long serror |$19-7 fcosh ERROR
283
284 .long serror |$1a-0 ERROR - illegal extension
285 .long serror |$1a-1 ERROR - illegal extension
286 .long serror |$1a-2 ERROR - illegal extension
287 .long serror |$1a-3 ERROR - illegal extension
288 .long serror |$1a-4 ERROR - illegal extension
289 .long serror |$1a-5 ERROR - illegal extension
290 .long serror |$1a-6 ERROR - illegal extension
291 .long serror |$1a-7 ERROR - illegal extension
292
293 .long serror |$1b-0 ERROR - illegal extension
294 .long serror |$1b-1 ERROR - illegal extension
295 .long serror |$1b-2 ERROR - illegal extension
296 .long serror |$1b-3 ERROR - illegal extension
297 .long serror |$1b-4 ERROR - illegal extension
298 .long serror |$1b-5 ERROR - illegal extension
299 .long serror |$1b-6 ERROR - illegal extension
300 .long serror |$1b-7 ERROR - illegal extension
301
302 .long sacos |$1c-0 facos norm
303 .long ld_ppi2 |$1c-1 facos zero
304 .long t_operr |$1c-2 facos inf
305 .long src_nan |$1c-3 facos nan
306 .long sacosd |$1c-4 facos denorm
307 .long serror |$1c-5 facos ERROR
308 .long serror |$1c-6 facos ERROR
309 .long serror |$1c-7 facos ERROR
310
311 .long scos |$1d-0 fcos norm
312 .long ld_pone |$1d-1 fcos zero
313 .long t_operr |$1d-2 fcos inf
314 .long src_nan |$1d-3 fcos nan
315 .long scosd |$1d-4 fcos denorm
316 .long serror |$1d-5 fcos ERROR
317 .long serror |$1d-6 fcos ERROR
318 .long serror |$1d-7 fcos ERROR
319
320 .long sgetexp |$1e-0 fgetexp norm
321 .long szero |$1e-1 fgetexp zero
322 .long t_operr |$1e-2 fgetexp inf
323 .long src_nan |$1e-3 fgetexp nan
324 .long sgetexpd |$1e-4 fgetexp denorm
325 .long serror |$1e-5 fgetexp ERROR
326 .long serror |$1e-6 fgetexp ERROR
327 .long serror |$1e-7 fgetexp ERROR
328
329 .long sgetman |$1f-0 fgetman norm
330 .long szero |$1f-1 fgetman zero
331 .long t_operr |$1f-2 fgetman inf
332 .long src_nan |$1f-3 fgetman nan
333 .long sgetmand |$1f-4 fgetman denorm
334 .long serror |$1f-5 fgetman ERROR
335 .long serror |$1f-6 fgetman ERROR
336 .long serror |$1f-7 fgetman ERROR
337
338 .long serror |$20-0 ERROR - illegal extension
339 .long serror |$20-1 ERROR - illegal extension
340 .long serror |$20-2 ERROR - illegal extension
341 .long serror |$20-3 ERROR - illegal extension
342 .long serror |$20-4 ERROR - illegal extension
343 .long serror |$20-5 ERROR - illegal extension
344 .long serror |$20-6 ERROR - illegal extension
345 .long serror |$20-7 ERROR - illegal extension
346
347 .long pmod |$21-0 fmod all
348 .long pmod |$21-1 fmod all
349 .long pmod |$21-2 fmod all
350 .long pmod |$21-3 fmod all
351 .long pmod |$21-4 fmod all
352 .long serror |$21-5 fmod ERROR
353 .long serror |$21-6 fmod ERROR
354 .long serror |$21-7 fmod ERROR
355
356 .long serror |$22-0 ERROR - illegal extension
357 .long serror |$22-1 ERROR - illegal extension
358 .long serror |$22-2 ERROR - illegal extension
359 .long serror |$22-3 ERROR - illegal extension
360 .long serror |$22-4 ERROR - illegal extension
361 .long serror |$22-5 ERROR - illegal extension
362 .long serror |$22-6 ERROR - illegal extension
363 .long serror |$22-7 ERROR - illegal extension
364
365 .long serror |$23-0 ERROR - illegal extension
366 .long serror |$23-1 ERROR - illegal extension
367 .long serror |$23-2 ERROR - illegal extension
368 .long serror |$23-3 ERROR - illegal extension
369 .long serror |$23-4 ERROR - illegal extension
370 .long serror |$23-5 ERROR - illegal extension
371 .long serror |$23-6 ERROR - illegal extension
372 .long serror |$23-7 ERROR - illegal extension
373
374 .long serror |$24-0 ERROR - illegal extension
375 .long serror |$24-1 ERROR - illegal extension
376 .long serror |$24-2 ERROR - illegal extension
377 .long serror |$24-3 ERROR - illegal extension
378 .long serror |$24-4 ERROR - illegal extension
379 .long serror |$24-5 ERROR - illegal extension
380 .long serror |$24-6 ERROR - illegal extension
381 .long serror |$24-7 ERROR - illegal extension
382
383 .long prem |$25-0 frem all
384 .long prem |$25-1 frem all
385 .long prem |$25-2 frem all
386 .long prem |$25-3 frem all
387 .long prem |$25-4 frem all
388 .long serror |$25-5 frem ERROR
389 .long serror |$25-6 frem ERROR
390 .long serror |$25-7 frem ERROR
391
392 .long pscale |$26-0 fscale all
393 .long pscale |$26-1 fscale all
394 .long pscale |$26-2 fscale all
395 .long pscale |$26-3 fscale all
396 .long pscale |$26-4 fscale all
397 .long serror |$26-5 fscale ERROR
398 .long serror |$26-6 fscale ERROR
399 .long serror |$26-7 fscale ERROR
400
401 .long serror |$27-0 ERROR - illegal extension
402 .long serror |$27-1 ERROR - illegal extension
403 .long serror |$27-2 ERROR - illegal extension
404 .long serror |$27-3 ERROR - illegal extension
405 .long serror |$27-4 ERROR - illegal extension
406 .long serror |$27-5 ERROR - illegal extension
407 .long serror |$27-6 ERROR - illegal extension
408 .long serror |$27-7 ERROR - illegal extension
409
410 .long serror |$28-0 ERROR - illegal extension
411 .long serror |$28-1 ERROR - illegal extension
412 .long serror |$28-2 ERROR - illegal extension
413 .long serror |$28-3 ERROR - illegal extension
414 .long serror |$28-4 ERROR - illegal extension
415 .long serror |$28-5 ERROR - illegal extension
416 .long serror |$28-6 ERROR - illegal extension
417 .long serror |$28-7 ERROR - illegal extension
418
419 .long serror |$29-0 ERROR - illegal extension
420 .long serror |$29-1 ERROR - illegal extension
421 .long serror |$29-2 ERROR - illegal extension
422 .long serror |$29-3 ERROR - illegal extension
423 .long serror |$29-4 ERROR - illegal extension
424 .long serror |$29-5 ERROR - illegal extension
425 .long serror |$29-6 ERROR - illegal extension
426 .long serror |$29-7 ERROR - illegal extension
427
428 .long serror |$2a-0 ERROR - illegal extension
429 .long serror |$2a-1 ERROR - illegal extension
430 .long serror |$2a-2 ERROR - illegal extension
431 .long serror |$2a-3 ERROR - illegal extension
432 .long serror |$2a-4 ERROR - illegal extension
433 .long serror |$2a-5 ERROR - illegal extension
434 .long serror |$2a-6 ERROR - illegal extension
435 .long serror |$2a-7 ERROR - illegal extension
436
437 .long serror |$2b-0 ERROR - illegal extension
438 .long serror |$2b-1 ERROR - illegal extension
439 .long serror |$2b-2 ERROR - illegal extension
440 .long serror |$2b-3 ERROR - illegal extension
441 .long serror |$2b-4 ERROR - illegal extension
442 .long serror |$2b-5 ERROR - illegal extension
443 .long serror |$2b-6 ERROR - illegal extension
444 .long serror |$2b-7 ERROR - illegal extension
445
446 .long serror |$2c-0 ERROR - illegal extension
447 .long serror |$2c-1 ERROR - illegal extension
448 .long serror |$2c-2 ERROR - illegal extension
449 .long serror |$2c-3 ERROR - illegal extension
450 .long serror |$2c-4 ERROR - illegal extension
451 .long serror |$2c-5 ERROR - illegal extension
452 .long serror |$2c-6 ERROR - illegal extension
453 .long serror |$2c-7 ERROR - illegal extension
454
455 .long serror |$2d-0 ERROR - illegal extension
456 .long serror |$2d-1 ERROR - illegal extension
457 .long serror |$2d-2 ERROR - illegal extension
458 .long serror |$2d-3 ERROR - illegal extension
459 .long serror |$2d-4 ERROR - illegal extension
460 .long serror |$2d-5 ERROR - illegal extension
461 .long serror |$2d-6 ERROR - illegal extension
462 .long serror |$2d-7 ERROR - illegal extension
463
464 .long serror |$2e-0 ERROR - illegal extension
465 .long serror |$2e-1 ERROR - illegal extension
466 .long serror |$2e-2 ERROR - illegal extension
467 .long serror |$2e-3 ERROR - illegal extension
468 .long serror |$2e-4 ERROR - illegal extension
469 .long serror |$2e-5 ERROR - illegal extension
470 .long serror |$2e-6 ERROR - illegal extension
471 .long serror |$2e-7 ERROR - illegal extension
472
473 .long serror |$2f-0 ERROR - illegal extension
474 .long serror |$2f-1 ERROR - illegal extension
475 .long serror |$2f-2 ERROR - illegal extension
476 .long serror |$2f-3 ERROR - illegal extension
477 .long serror |$2f-4 ERROR - illegal extension
478 .long serror |$2f-5 ERROR - illegal extension
479 .long serror |$2f-6 ERROR - illegal extension
480 .long serror |$2f-7 ERROR - illegal extension
481
482 .long ssincos |$30-0 fsincos norm
483 .long ssincosz |$30-1 fsincos zero
484 .long ssincosi |$30-2 fsincos inf
485 .long ssincosnan |$30-3 fsincos nan
486 .long ssincosd |$30-4 fsincos denorm
487 .long serror |$30-5 fsincos ERROR
488 .long serror |$30-6 fsincos ERROR
489 .long serror |$30-7 fsincos ERROR
490
491 .long ssincos |$31-0 fsincos norm
492 .long ssincosz |$31-1 fsincos zero
493 .long ssincosi |$31-2 fsincos inf
494 .long ssincosnan |$31-3 fsincos nan
495 .long ssincosd |$31-4 fsincos denorm
496 .long serror |$31-5 fsincos ERROR
497 .long serror |$31-6 fsincos ERROR
498 .long serror |$31-7 fsincos ERROR
499
500 .long ssincos |$32-0 fsincos norm
501 .long ssincosz |$32-1 fsincos zero
502 .long ssincosi |$32-2 fsincos inf
503 .long ssincosnan |$32-3 fsincos nan
504 .long ssincosd |$32-4 fsincos denorm
505 .long serror |$32-5 fsincos ERROR
506 .long serror |$32-6 fsincos ERROR
507 .long serror |$32-7 fsincos ERROR
508
509 .long ssincos |$33-0 fsincos norm
510 .long ssincosz |$33-1 fsincos zero
511 .long ssincosi |$33-2 fsincos inf
512 .long ssincosnan |$33-3 fsincos nan
513 .long ssincosd |$33-4 fsincos denorm
514 .long serror |$33-5 fsincos ERROR
515 .long serror |$33-6 fsincos ERROR
516 .long serror |$33-7 fsincos ERROR
517
518 .long ssincos |$34-0 fsincos norm
519 .long ssincosz |$34-1 fsincos zero
520 .long ssincosi |$34-2 fsincos inf
521 .long ssincosnan |$34-3 fsincos nan
522 .long ssincosd |$34-4 fsincos denorm
523 .long serror |$34-5 fsincos ERROR
524 .long serror |$34-6 fsincos ERROR
525 .long serror |$34-7 fsincos ERROR
526
527 .long ssincos |$35-0 fsincos norm
528 .long ssincosz |$35-1 fsincos zero
529 .long ssincosi |$35-2 fsincos inf
530 .long ssincosnan |$35-3 fsincos nan
531 .long ssincosd |$35-4 fsincos denorm
532 .long serror |$35-5 fsincos ERROR
533 .long serror |$35-6 fsincos ERROR
534 .long serror |$35-7 fsincos ERROR
535
536 .long ssincos |$36-0 fsincos norm
537 .long ssincosz |$36-1 fsincos zero
538 .long ssincosi |$36-2 fsincos inf
539 .long ssincosnan |$36-3 fsincos nan
540 .long ssincosd |$36-4 fsincos denorm
541 .long serror |$36-5 fsincos ERROR
542 .long serror |$36-6 fsincos ERROR
543 .long serror |$36-7 fsincos ERROR
544
545 .long ssincos |$37-0 fsincos norm
546 .long ssincosz |$37-1 fsincos zero
547 .long ssincosi |$37-2 fsincos inf
548 .long ssincosnan |$37-3 fsincos nan
549 .long ssincosd |$37-4 fsincos denorm
550 .long serror |$37-5 fsincos ERROR
551 .long serror |$37-6 fsincos ERROR
552 .long serror |$37-7 fsincos ERROR
553
554 |end
diff --git a/arch/m68k/fpsp040/util.S b/arch/m68k/fpsp040/util.S
new file mode 100644
index 000000000000..452f3d65857b
--- /dev/null
+++ b/arch/m68k/fpsp040/util.S
@@ -0,0 +1,748 @@
1|
2| util.sa 3.7 7/29/91
3|
4| This file contains routines used by other programs.
5|
6| ovf_res: used by overflow to force the correct
7| result. ovf_r_k, ovf_r_x2, ovf_r_x3 are
8| derivatives of this routine.
9| get_fline: get user's opcode word
10| g_dfmtou: returns the destination format.
11| g_opcls: returns the opclass of the float instruction.
12| g_rndpr: returns the rounding precision.
13| reg_dest: write byte, word, or long data to Dn
14|
15|
16| Copyright (C) Motorola, Inc. 1990
17| All Rights Reserved
18|
19| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
20| The copyright notice above does not evidence any
21| actual or intended publication of such source code.
22
23|UTIL idnt 2,1 | Motorola 040 Floating Point Software Package
24
25 |section 8
26
27#include "fpsp.h"
28
29 |xref mem_read
30
31 .global g_dfmtou
32 .global g_opcls
33 .global g_rndpr
34 .global get_fline
35 .global reg_dest
36
37|
38| Final result table for ovf_res. Note that the negative counterparts
39| are unnecessary as ovf_res always returns the sign separately from
40| the exponent.
41| ;+inf
42EXT_PINF: .long 0x7fff0000,0x00000000,0x00000000,0x00000000
43| ;largest +ext
44EXT_PLRG: .long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000
45| ;largest magnitude +sgl in ext
46SGL_PLRG: .long 0x407e0000,0xffffff00,0x00000000,0x00000000
47| ;largest magnitude +dbl in ext
48DBL_PLRG: .long 0x43fe0000,0xffffffff,0xfffff800,0x00000000
49| ;largest -ext
50
51tblovfl:
52 .long EXT_RN
53 .long EXT_RZ
54 .long EXT_RM
55 .long EXT_RP
56 .long SGL_RN
57 .long SGL_RZ
58 .long SGL_RM
59 .long SGL_RP
60 .long DBL_RN
61 .long DBL_RZ
62 .long DBL_RM
63 .long DBL_RP
64 .long error
65 .long error
66 .long error
67 .long error
68
69
70|
71| ovf_r_k --- overflow result calculation
72|
73| This entry point is used by kernel_ex.
74|
75| This forces the destination precision to be extended
76|
77| Input: operand in ETEMP
78| Output: a result is in ETEMP (internal extended format)
79|
80 .global ovf_r_k
81ovf_r_k:
82 lea ETEMP(%a6),%a0 |a0 points to source operand
83 bclrb #sign_bit,ETEMP_EX(%a6)
84 sne ETEMP_SGN(%a6) |convert to internal IEEE format
85
86|
87| ovf_r_x2 --- overflow result calculation
88|
89| This entry point used by x_ovfl. (opclass 0 and 2)
90|
91| Input a0 points to an operand in the internal extended format
92| Output a0 points to the result in the internal extended format
93|
94| This sets the round precision according to the user's FPCR unless the
95| instruction is fsgldiv or fsglmul or fsadd, fdadd, fsub, fdsub, fsmul,
96| fdmul, fsdiv, fddiv, fssqrt, fsmove, fdmove, fsabs, fdabs, fsneg, fdneg.
97| If the instruction is fsgldiv of fsglmul, the rounding precision must be
98| extended. If the instruction is not fsgldiv or fsglmul but a force-
99| precision instruction, the rounding precision is then set to the force
100| precision.
101
102 .global ovf_r_x2
103ovf_r_x2:
104 btstb #E3,E_BYTE(%a6) |check for nu exception
105 beql ovf_e1_exc |it is cu exception
106ovf_e3_exc:
107 movew CMDREG3B(%a6),%d0 |get the command word
108 andiw #0x00000060,%d0 |clear all bits except 6 and 5
109 cmpil #0x00000040,%d0
110 beql ovff_sgl |force precision is single
111 cmpil #0x00000060,%d0
112 beql ovff_dbl |force precision is double
113 movew CMDREG3B(%a6),%d0 |get the command word again
114 andil #0x7f,%d0 |clear all except operation
115 cmpil #0x33,%d0
116 beql ovf_fsgl |fsglmul or fsgldiv
117 cmpil #0x30,%d0
118 beql ovf_fsgl
119 bra ovf_fpcr |instruction is none of the above
120| ;use FPCR
121ovf_e1_exc:
122 movew CMDREG1B(%a6),%d0 |get command word
123 andil #0x00000044,%d0 |clear all bits except 6 and 2
124 cmpil #0x00000040,%d0
125 beql ovff_sgl |the instruction is force single
126 cmpil #0x00000044,%d0
127 beql ovff_dbl |the instruction is force double
128 movew CMDREG1B(%a6),%d0 |again get the command word
129 andil #0x0000007f,%d0 |clear all except the op code
130 cmpil #0x00000027,%d0
131 beql ovf_fsgl |fsglmul
132 cmpil #0x00000024,%d0
133 beql ovf_fsgl |fsgldiv
134 bra ovf_fpcr |none of the above, use FPCR
135|
136|
137| Inst is either fsgldiv or fsglmul. Force extended precision.
138|
139ovf_fsgl:
140 clrl %d0
141 bra ovf_res
142
143ovff_sgl:
144 movel #0x00000001,%d0 |set single
145 bra ovf_res
146ovff_dbl:
147 movel #0x00000002,%d0 |set double
148 bra ovf_res
149|
150| The precision is in the fpcr.
151|
152ovf_fpcr:
153 bfextu FPCR_MODE(%a6){#0:#2},%d0 |set round precision
154 bra ovf_res
155
156|
157|
158| ovf_r_x3 --- overflow result calculation
159|
160| This entry point used by x_ovfl. (opclass 3 only)
161|
162| Input a0 points to an operand in the internal extended format
163| Output a0 points to the result in the internal extended format
164|
165| This sets the round precision according to the destination size.
166|
167 .global ovf_r_x3
168ovf_r_x3:
169 bsr g_dfmtou |get dest fmt in d0{1:0}
170| ;for fmovout, the destination format
171| ;is the rounding precision
172
173|
174| ovf_res --- overflow result calculation
175|
176| Input:
177| a0 points to operand in internal extended format
178| Output:
179| a0 points to result in internal extended format
180|
181 .global ovf_res
182ovf_res:
183 lsll #2,%d0 |move round precision to d0{3:2}
184 bfextu FPCR_MODE(%a6){#2:#2},%d1 |set round mode
185 orl %d1,%d0 |index is fmt:mode in d0{3:0}
186 leal tblovfl,%a1 |load a1 with table address
187 movel %a1@(%d0:l:4),%a1 |use d0 as index to the table
188 jmp (%a1) |go to the correct routine
189|
190|case DEST_FMT = EXT
191|
192EXT_RN:
193 leal EXT_PINF,%a1 |answer is +/- infinity
194 bsetb #inf_bit,FPSR_CC(%a6)
195 bra set_sign |now go set the sign
196EXT_RZ:
197 leal EXT_PLRG,%a1 |answer is +/- large number
198 bra set_sign |now go set the sign
199EXT_RM:
200 tstb LOCAL_SGN(%a0) |if negative overflow
201 beqs e_rm_pos
202e_rm_neg:
203 leal EXT_PINF,%a1 |answer is negative infinity
204 orl #neginf_mask,USER_FPSR(%a6)
205 bra end_ovfr
206e_rm_pos:
207 leal EXT_PLRG,%a1 |answer is large positive number
208 bra end_ovfr
209EXT_RP:
210 tstb LOCAL_SGN(%a0) |if negative overflow
211 beqs e_rp_pos
212e_rp_neg:
213 leal EXT_PLRG,%a1 |answer is large negative number
214 bsetb #neg_bit,FPSR_CC(%a6)
215 bra end_ovfr
216e_rp_pos:
217 leal EXT_PINF,%a1 |answer is positive infinity
218 bsetb #inf_bit,FPSR_CC(%a6)
219 bra end_ovfr
220|
221|case DEST_FMT = DBL
222|
223DBL_RN:
224 leal EXT_PINF,%a1 |answer is +/- infinity
225 bsetb #inf_bit,FPSR_CC(%a6)
226 bra set_sign
227DBL_RZ:
228 leal DBL_PLRG,%a1 |answer is +/- large number
229 bra set_sign |now go set the sign
230DBL_RM:
231 tstb LOCAL_SGN(%a0) |if negative overflow
232 beqs d_rm_pos
233d_rm_neg:
234 leal EXT_PINF,%a1 |answer is negative infinity
235 orl #neginf_mask,USER_FPSR(%a6)
236 bra end_ovfr |inf is same for all precisions (ext,dbl,sgl)
237d_rm_pos:
238 leal DBL_PLRG,%a1 |answer is large positive number
239 bra end_ovfr
240DBL_RP:
241 tstb LOCAL_SGN(%a0) |if negative overflow
242 beqs d_rp_pos
243d_rp_neg:
244 leal DBL_PLRG,%a1 |answer is large negative number
245 bsetb #neg_bit,FPSR_CC(%a6)
246 bra end_ovfr
247d_rp_pos:
248 leal EXT_PINF,%a1 |answer is positive infinity
249 bsetb #inf_bit,FPSR_CC(%a6)
250 bra end_ovfr
251|
252|case DEST_FMT = SGL
253|
254SGL_RN:
255 leal EXT_PINF,%a1 |answer is +/- infinity
256 bsetb #inf_bit,FPSR_CC(%a6)
257 bras set_sign
258SGL_RZ:
259 leal SGL_PLRG,%a1 |answer is +/- large number
260 bras set_sign
261SGL_RM:
262 tstb LOCAL_SGN(%a0) |if negative overflow
263 beqs s_rm_pos
264s_rm_neg:
265 leal EXT_PINF,%a1 |answer is negative infinity
266 orl #neginf_mask,USER_FPSR(%a6)
267 bras end_ovfr
268s_rm_pos:
269 leal SGL_PLRG,%a1 |answer is large positive number
270 bras end_ovfr
271SGL_RP:
272 tstb LOCAL_SGN(%a0) |if negative overflow
273 beqs s_rp_pos
274s_rp_neg:
275 leal SGL_PLRG,%a1 |answer is large negative number
276 bsetb #neg_bit,FPSR_CC(%a6)
277 bras end_ovfr
278s_rp_pos:
279 leal EXT_PINF,%a1 |answer is positive infinity
280 bsetb #inf_bit,FPSR_CC(%a6)
281 bras end_ovfr
282
283set_sign:
284 tstb LOCAL_SGN(%a0) |if negative overflow
285 beqs end_ovfr
286neg_sign:
287 bsetb #neg_bit,FPSR_CC(%a6)
288
289end_ovfr:
290 movew LOCAL_EX(%a1),LOCAL_EX(%a0) |do not overwrite sign
291 movel LOCAL_HI(%a1),LOCAL_HI(%a0)
292 movel LOCAL_LO(%a1),LOCAL_LO(%a0)
293 rts
294
295
296|
297| ERROR
298|
299error:
300 rts
301|
302| get_fline --- get f-line opcode of interrupted instruction
303|
304| Returns opcode in the low word of d0.
305|
306get_fline:
307 movel USER_FPIAR(%a6),%a0 |opcode address
308 movel #0,-(%a7) |reserve a word on the stack
309 leal 2(%a7),%a1 |point to low word of temporary
310 movel #2,%d0 |count
311 bsrl mem_read
312 movel (%a7)+,%d0
313 rts
314|
315| g_rndpr --- put rounding precision in d0{1:0}
316|
317| valid return codes are:
318| 00 - extended
319| 01 - single
320| 10 - double
321|
322| begin
323| get rounding precision (cmdreg3b{6:5})
324| begin
325| case opclass = 011 (move out)
326| get destination format - this is the also the rounding precision
327|
328| case opclass = 0x0
329| if E3
330| *case RndPr(from cmdreg3b{6:5} = 11 then RND_PREC = DBL
331| *case RndPr(from cmdreg3b{6:5} = 10 then RND_PREC = SGL
332| case RndPr(from cmdreg3b{6:5} = 00 | 01
333| use precision from FPCR{7:6}
334| case 00 then RND_PREC = EXT
335| case 01 then RND_PREC = SGL
336| case 10 then RND_PREC = DBL
337| else E1
338| use precision in FPCR{7:6}
339| case 00 then RND_PREC = EXT
340| case 01 then RND_PREC = SGL
341| case 10 then RND_PREC = DBL
342| end
343|
344g_rndpr:
345 bsr g_opcls |get opclass in d0{2:0}
346 cmpw #0x0003,%d0 |check for opclass 011
347 bnes op_0x0
348
349|
350| For move out instructions (opclass 011) the destination format
351| is the same as the rounding precision. Pass results from g_dfmtou.
352|
353 bsr g_dfmtou
354 rts
355op_0x0:
356 btstb #E3,E_BYTE(%a6)
357 beql unf_e1_exc |branch to e1 underflow
358unf_e3_exc:
359 movel CMDREG3B(%a6),%d0 |rounding precision in d0{10:9}
360 bfextu %d0{#9:#2},%d0 |move the rounding prec bits to d0{1:0}
361 cmpil #0x2,%d0
362 beql unff_sgl |force precision is single
363 cmpil #0x3,%d0 |force precision is double
364 beql unff_dbl
365 movew CMDREG3B(%a6),%d0 |get the command word again
366 andil #0x7f,%d0 |clear all except operation
367 cmpil #0x33,%d0
368 beql unf_fsgl |fsglmul or fsgldiv
369 cmpil #0x30,%d0
370 beql unf_fsgl |fsgldiv or fsglmul
371 bra unf_fpcr
372unf_e1_exc:
373 movel CMDREG1B(%a6),%d0 |get 32 bits off the stack, 1st 16 bits
374| ;are the command word
375 andil #0x00440000,%d0 |clear all bits except bits 6 and 2
376 cmpil #0x00400000,%d0
377 beql unff_sgl |force single
378 cmpil #0x00440000,%d0 |force double
379 beql unff_dbl
380 movel CMDREG1B(%a6),%d0 |get the command word again
381 andil #0x007f0000,%d0 |clear all bits except the operation
382 cmpil #0x00270000,%d0
383 beql unf_fsgl |fsglmul
384 cmpil #0x00240000,%d0
385 beql unf_fsgl |fsgldiv
386 bra unf_fpcr
387
388|
389| Convert to return format. The values from cmdreg3b and the return
390| values are:
391| cmdreg3b return precision
392| -------- ------ ---------
393| 00,01 0 ext
394| 10 1 sgl
395| 11 2 dbl
396| Force single
397|
398unff_sgl:
399 movel #1,%d0 |return 1
400 rts
401|
402| Force double
403|
404unff_dbl:
405 movel #2,%d0 |return 2
406 rts
407|
408| Force extended
409|
410unf_fsgl:
411 movel #0,%d0
412 rts
413|
414| Get rounding precision set in FPCR{7:6}.
415|
416unf_fpcr:
417 movel USER_FPCR(%a6),%d0 |rounding precision bits in d0{7:6}
418 bfextu %d0{#24:#2},%d0 |move the rounding prec bits to d0{1:0}
419 rts
420|
421| g_opcls --- put opclass in d0{2:0}
422|
423g_opcls:
424 btstb #E3,E_BYTE(%a6)
425 beqs opc_1b |if set, go to cmdreg1b
426opc_3b:
427 clrl %d0 |if E3, only opclass 0x0 is possible
428 rts
429opc_1b:
430 movel CMDREG1B(%a6),%d0
431 bfextu %d0{#0:#3},%d0 |shift opclass bits d0{31:29} to d0{2:0}
432 rts
433|
434| g_dfmtou --- put destination format in d0{1:0}
435|
436| If E1, the format is from cmdreg1b{12:10}
437| If E3, the format is extended.
438|
439| Dest. Fmt.
440| extended 010 -> 00
441| single 001 -> 01
442| double 101 -> 10
443|
444g_dfmtou:
445 btstb #E3,E_BYTE(%a6)
446 beqs op011
447 clrl %d0 |if E1, size is always ext
448 rts
449op011:
450 movel CMDREG1B(%a6),%d0
451 bfextu %d0{#3:#3},%d0 |dest fmt from cmdreg1b{12:10}
452 cmpb #1,%d0 |check for single
453 bnes not_sgl
454 movel #1,%d0
455 rts
456not_sgl:
457 cmpb #5,%d0 |check for double
458 bnes not_dbl
459 movel #2,%d0
460 rts
461not_dbl:
462 clrl %d0 |must be extended
463 rts
464
465|
466|
467| Final result table for unf_sub. Note that the negative counterparts
468| are unnecessary as unf_sub always returns the sign separately from
469| the exponent.
470| ;+zero
471EXT_PZRO: .long 0x00000000,0x00000000,0x00000000,0x00000000
472| ;+zero
473SGL_PZRO: .long 0x3f810000,0x00000000,0x00000000,0x00000000
474| ;+zero
475DBL_PZRO: .long 0x3c010000,0x00000000,0x00000000,0x00000000
476| ;smallest +ext denorm
477EXT_PSML: .long 0x00000000,0x00000000,0x00000001,0x00000000
478| ;smallest +sgl denorm
479SGL_PSML: .long 0x3f810000,0x00000100,0x00000000,0x00000000
480| ;smallest +dbl denorm
481DBL_PSML: .long 0x3c010000,0x00000000,0x00000800,0x00000000
482|
483| UNF_SUB --- underflow result calculation
484|
485| Input:
486| d0 contains round precision
487| a0 points to input operand in the internal extended format
488|
489| Output:
490| a0 points to correct internal extended precision result.
491|
492
493tblunf:
494 .long uEXT_RN
495 .long uEXT_RZ
496 .long uEXT_RM
497 .long uEXT_RP
498 .long uSGL_RN
499 .long uSGL_RZ
500 .long uSGL_RM
501 .long uSGL_RP
502 .long uDBL_RN
503 .long uDBL_RZ
504 .long uDBL_RM
505 .long uDBL_RP
506 .long uDBL_RN
507 .long uDBL_RZ
508 .long uDBL_RM
509 .long uDBL_RP
510
511 .global unf_sub
512unf_sub:
513 lsll #2,%d0 |move round precision to d0{3:2}
514 bfextu FPCR_MODE(%a6){#2:#2},%d1 |set round mode
515 orl %d1,%d0 |index is fmt:mode in d0{3:0}
516 leal tblunf,%a1 |load a1 with table address
517 movel %a1@(%d0:l:4),%a1 |use d0 as index to the table
518 jmp (%a1) |go to the correct routine
519|
520|case DEST_FMT = EXT
521|
522uEXT_RN:
523 leal EXT_PZRO,%a1 |answer is +/- zero
524 bsetb #z_bit,FPSR_CC(%a6)
525 bra uset_sign |now go set the sign
526uEXT_RZ:
527 leal EXT_PZRO,%a1 |answer is +/- zero
528 bsetb #z_bit,FPSR_CC(%a6)
529 bra uset_sign |now go set the sign
530uEXT_RM:
531 tstb LOCAL_SGN(%a0) |if negative underflow
532 beqs ue_rm_pos
533ue_rm_neg:
534 leal EXT_PSML,%a1 |answer is negative smallest denorm
535 bsetb #neg_bit,FPSR_CC(%a6)
536 bra end_unfr
537ue_rm_pos:
538 leal EXT_PZRO,%a1 |answer is positive zero
539 bsetb #z_bit,FPSR_CC(%a6)
540 bra end_unfr
541uEXT_RP:
542 tstb LOCAL_SGN(%a0) |if negative underflow
543 beqs ue_rp_pos
544ue_rp_neg:
545 leal EXT_PZRO,%a1 |answer is negative zero
546 oril #negz_mask,USER_FPSR(%a6)
547 bra end_unfr
548ue_rp_pos:
549 leal EXT_PSML,%a1 |answer is positive smallest denorm
550 bra end_unfr
551|
552|case DEST_FMT = DBL
553|
554uDBL_RN:
555 leal DBL_PZRO,%a1 |answer is +/- zero
556 bsetb #z_bit,FPSR_CC(%a6)
557 bra uset_sign
558uDBL_RZ:
559 leal DBL_PZRO,%a1 |answer is +/- zero
560 bsetb #z_bit,FPSR_CC(%a6)
561 bra uset_sign |now go set the sign
562uDBL_RM:
563 tstb LOCAL_SGN(%a0) |if negative overflow
564 beqs ud_rm_pos
565ud_rm_neg:
566 leal DBL_PSML,%a1 |answer is smallest denormalized negative
567 bsetb #neg_bit,FPSR_CC(%a6)
568 bra end_unfr
569ud_rm_pos:
570 leal DBL_PZRO,%a1 |answer is positive zero
571 bsetb #z_bit,FPSR_CC(%a6)
572 bra end_unfr
573uDBL_RP:
574 tstb LOCAL_SGN(%a0) |if negative overflow
575 beqs ud_rp_pos
576ud_rp_neg:
577 leal DBL_PZRO,%a1 |answer is negative zero
578 oril #negz_mask,USER_FPSR(%a6)
579 bra end_unfr
580ud_rp_pos:
581 leal DBL_PSML,%a1 |answer is smallest denormalized negative
582 bra end_unfr
583|
584|case DEST_FMT = SGL
585|
586uSGL_RN:
587 leal SGL_PZRO,%a1 |answer is +/- zero
588 bsetb #z_bit,FPSR_CC(%a6)
589 bras uset_sign
590uSGL_RZ:
591 leal SGL_PZRO,%a1 |answer is +/- zero
592 bsetb #z_bit,FPSR_CC(%a6)
593 bras uset_sign
594uSGL_RM:
595 tstb LOCAL_SGN(%a0) |if negative overflow
596 beqs us_rm_pos
597us_rm_neg:
598 leal SGL_PSML,%a1 |answer is smallest denormalized negative
599 bsetb #neg_bit,FPSR_CC(%a6)
600 bras end_unfr
601us_rm_pos:
602 leal SGL_PZRO,%a1 |answer is positive zero
603 bsetb #z_bit,FPSR_CC(%a6)
604 bras end_unfr
605uSGL_RP:
606 tstb LOCAL_SGN(%a0) |if negative overflow
607 beqs us_rp_pos
608us_rp_neg:
609 leal SGL_PZRO,%a1 |answer is negative zero
610 oril #negz_mask,USER_FPSR(%a6)
611 bras end_unfr
612us_rp_pos:
613 leal SGL_PSML,%a1 |answer is smallest denormalized positive
614 bras end_unfr
615
616uset_sign:
617 tstb LOCAL_SGN(%a0) |if negative overflow
618 beqs end_unfr
619uneg_sign:
620 bsetb #neg_bit,FPSR_CC(%a6)
621
622end_unfr:
623 movew LOCAL_EX(%a1),LOCAL_EX(%a0) |be careful not to overwrite sign
624 movel LOCAL_HI(%a1),LOCAL_HI(%a0)
625 movel LOCAL_LO(%a1),LOCAL_LO(%a0)
626 rts
627|
628| reg_dest --- write byte, word, or long data to Dn
629|
630|
631| Input:
632| L_SCR1: Data
633| d1: data size and dest register number formatted as:
634|
635| 32 5 4 3 2 1 0
636| -----------------------------------------------
637| | 0 | Size | Dest Reg # |
638| -----------------------------------------------
639|
640| Size is:
641| 0 - Byte
642| 1 - Word
643| 2 - Long/Single
644|
645pregdst:
646 .long byte_d0
647 .long byte_d1
648 .long byte_d2
649 .long byte_d3
650 .long byte_d4
651 .long byte_d5
652 .long byte_d6
653 .long byte_d7
654 .long word_d0
655 .long word_d1
656 .long word_d2
657 .long word_d3
658 .long word_d4
659 .long word_d5
660 .long word_d6
661 .long word_d7
662 .long long_d0
663 .long long_d1
664 .long long_d2
665 .long long_d3
666 .long long_d4
667 .long long_d5
668 .long long_d6
669 .long long_d7
670
671reg_dest:
672 leal pregdst,%a0
673 movel %a0@(%d1:l:4),%a0
674 jmp (%a0)
675
676byte_d0:
677 moveb L_SCR1(%a6),USER_D0+3(%a6)
678 rts
679byte_d1:
680 moveb L_SCR1(%a6),USER_D1+3(%a6)
681 rts
682byte_d2:
683 moveb L_SCR1(%a6),%d2
684 rts
685byte_d3:
686 moveb L_SCR1(%a6),%d3
687 rts
688byte_d4:
689 moveb L_SCR1(%a6),%d4
690 rts
691byte_d5:
692 moveb L_SCR1(%a6),%d5
693 rts
694byte_d6:
695 moveb L_SCR1(%a6),%d6
696 rts
697byte_d7:
698 moveb L_SCR1(%a6),%d7
699 rts
700word_d0:
701 movew L_SCR1(%a6),USER_D0+2(%a6)
702 rts
703word_d1:
704 movew L_SCR1(%a6),USER_D1+2(%a6)
705 rts
706word_d2:
707 movew L_SCR1(%a6),%d2
708 rts
709word_d3:
710 movew L_SCR1(%a6),%d3
711 rts
712word_d4:
713 movew L_SCR1(%a6),%d4
714 rts
715word_d5:
716 movew L_SCR1(%a6),%d5
717 rts
718word_d6:
719 movew L_SCR1(%a6),%d6
720 rts
721word_d7:
722 movew L_SCR1(%a6),%d7
723 rts
724long_d0:
725 movel L_SCR1(%a6),USER_D0(%a6)
726 rts
727long_d1:
728 movel L_SCR1(%a6),USER_D1(%a6)
729 rts
730long_d2:
731 movel L_SCR1(%a6),%d2
732 rts
733long_d3:
734 movel L_SCR1(%a6),%d3
735 rts
736long_d4:
737 movel L_SCR1(%a6),%d4
738 rts
739long_d5:
740 movel L_SCR1(%a6),%d5
741 rts
742long_d6:
743 movel L_SCR1(%a6),%d6
744 rts
745long_d7:
746 movel L_SCR1(%a6),%d7
747 rts
748 |end
diff --git a/arch/m68k/fpsp040/x_bsun.S b/arch/m68k/fpsp040/x_bsun.S
new file mode 100644
index 000000000000..039247b09c8b
--- /dev/null
+++ b/arch/m68k/fpsp040/x_bsun.S
@@ -0,0 +1,47 @@
1|
2| x_bsun.sa 3.3 7/1/91
3|
4| fpsp_bsun --- FPSP handler for branch/set on unordered exception
5|
6| Copy the PC to FPIAR to maintain 881/882 compatibility
7|
8| The real_bsun handler will need to perform further corrective
9| measures as outlined in the 040 User's Manual on pages
10| 9-41f, section 9.8.3.
11|
12
13| Copyright (C) Motorola, Inc. 1990
14| All Rights Reserved
15|
16| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
17| The copyright notice above does not evidence any
18| actual or intended publication of such source code.
19
20X_BSUN: |idnt 2,1 | Motorola 040 Floating Point Software Package
21
22 |section 8
23
24#include "fpsp.h"
25
26 |xref real_bsun
27
28 .global fpsp_bsun
29fpsp_bsun:
30|
31 link %a6,#-LOCAL_SIZE
32 fsave -(%a7)
33 moveml %d0-%d1/%a0-%a1,USER_DA(%a6)
34 fmovemx %fp0-%fp3,USER_FP0(%a6)
35 fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
36
37|
38 movel EXC_PC(%a6),USER_FPIAR(%a6)
39|
40 moveml USER_DA(%a6),%d0-%d1/%a0-%a1
41 fmovemx USER_FP0(%a6),%fp0-%fp3
42 fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
43 frestore (%a7)+
44 unlk %a6
45 bral real_bsun
46|
47 |end
diff --git a/arch/m68k/fpsp040/x_fline.S b/arch/m68k/fpsp040/x_fline.S
new file mode 100644
index 000000000000..3917710b0fde
--- /dev/null
+++ b/arch/m68k/fpsp040/x_fline.S
@@ -0,0 +1,104 @@
1|
2| x_fline.sa 3.3 1/10/91
3|
4| fpsp_fline --- FPSP handler for fline exception
5|
6| First determine if the exception is one of the unimplemented
7| floating point instructions. If so, let fpsp_unimp handle it.
8| Next, determine if the instruction is an fmovecr with a non-zero
9| <ea> field. If so, handle here and return. Otherwise, it
10| must be a real F-line exception.
11|
12
13| Copyright (C) Motorola, Inc. 1990
14| All Rights Reserved
15|
16| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
17| The copyright notice above does not evidence any
18| actual or intended publication of such source code.
19
20X_FLINE: |idnt 2,1 | Motorola 040 Floating Point Software Package
21
22 |section 8
23
24#include "fpsp.h"
25
26 |xref real_fline
27 |xref fpsp_unimp
28 |xref uni_2
29 |xref mem_read
30 |xref fpsp_fmt_error
31
32 .global fpsp_fline
33fpsp_fline:
34|
35| check for unimplemented vector first. Use EXC_VEC-4 because
36| the equate is valid only after a 'link a6' has pushed one more
37| long onto the stack.
38|
39 cmpw #UNIMP_VEC,EXC_VEC-4(%a7)
40 beql fpsp_unimp
41
42|
43| fmovecr with non-zero <ea> handling here
44|
45 subl #4,%a7 |4 accounts for 2-word difference
46| ;between six word frame (unimp) and
47| ;four word frame
48 link %a6,#-LOCAL_SIZE
49 fsave -(%a7)
50 moveml %d0-%d1/%a0-%a1,USER_DA(%a6)
51 moveal EXC_PC+4(%a6),%a0 |get address of fline instruction
52 leal L_SCR1(%a6),%a1 |use L_SCR1 as scratch
53 movel #4,%d0
54 addl #4,%a6 |to offset the sub.l #4,a7 above so that
55| ;a6 can point correctly to the stack frame
56| ;before branching to mem_read
57 bsrl mem_read
58 subl #4,%a6
59 movel L_SCR1(%a6),%d0 |d0 contains the fline and command word
60 bfextu %d0{#4:#3},%d1 |extract coprocessor id
61 cmpib #1,%d1 |check if cpid=1
62 bne not_mvcr |exit if not
63 bfextu %d0{#16:#6},%d1
64 cmpib #0x17,%d1 |check if it is an FMOVECR encoding
65 bne not_mvcr
66| ;if an FMOVECR instruction, fix stack
67| ;and go to FPSP_UNIMP
68fix_stack:
69 cmpib #VER_40,(%a7) |test for orig unimp frame
70 bnes ck_rev
71 subl #UNIMP_40_SIZE-4,%a7 |emulate an orig fsave
72 moveb #VER_40,(%a7)
73 moveb #UNIMP_40_SIZE-4,1(%a7)
74 clrw 2(%a7)
75 bras fix_con
76ck_rev:
77 cmpib #VER_41,(%a7) |test for rev unimp frame
78 bnel fpsp_fmt_error |if not $40 or $41, exit with error
79 subl #UNIMP_41_SIZE-4,%a7 |emulate a rev fsave
80 moveb #VER_41,(%a7)
81 moveb #UNIMP_41_SIZE-4,1(%a7)
82 clrw 2(%a7)
83fix_con:
84 movew EXC_SR+4(%a6),EXC_SR(%a6) |move stacked sr to new position
85 movel EXC_PC+4(%a6),EXC_PC(%a6) |move stacked pc to new position
86 fmovel EXC_PC(%a6),%FPIAR |point FPIAR to fline inst
87 movel #4,%d1
88 addl %d1,EXC_PC(%a6) |increment stacked pc value to next inst
89 movew #0x202c,EXC_VEC(%a6) |reformat vector to unimp
90 clrl EXC_EA(%a6) |clear the EXC_EA field
91 movew %d0,CMDREG1B(%a6) |move the lower word into CMDREG1B
92 clrl E_BYTE(%a6)
93 bsetb #UFLAG,T_BYTE(%a6)
94 moveml USER_DA(%a6),%d0-%d1/%a0-%a1 |restore data registers
95 bral uni_2
96
97not_mvcr:
98 moveml USER_DA(%a6),%d0-%d1/%a0-%a1 |restore data registers
99 frestore (%a7)+
100 unlk %a6
101 addl #4,%a7
102 bral real_fline
103
104 |end
diff --git a/arch/m68k/fpsp040/x_operr.S b/arch/m68k/fpsp040/x_operr.S
new file mode 100644
index 000000000000..b0f54bcb49a7
--- /dev/null
+++ b/arch/m68k/fpsp040/x_operr.S
@@ -0,0 +1,356 @@
1|
2| x_operr.sa 3.5 7/1/91
3|
4| fpsp_operr --- FPSP handler for operand error exception
5|
6| See 68040 User's Manual pp. 9-44f
7|
8| Note 1: For trap disabled 040 does the following:
9| If the dest is a fp reg, then an extended precision non_signaling
10| NAN is stored in the dest reg. If the dest format is b, w, or l and
11| the source op is a NAN, then garbage is stored as the result (actually
12| the upper 32 bits of the mantissa are sent to the integer unit). If
13| the dest format is integer (b, w, l) and the operr is caused by
14| integer overflow, or the source op is inf, then the result stored is
15| garbage.
16| There are three cases in which operr is incorrectly signaled on the
17| 040. This occurs for move_out of format b, w, or l for the largest
18| negative integer (-2^7 for b, -2^15 for w, -2^31 for l).
19|
20| On opclass = 011 fmove.(b,w,l) that causes a conversion
21| overflow -> OPERR, the exponent in wbte (and fpte) is:
22| byte 56 - (62 - exp)
23| word 48 - (62 - exp)
24| long 32 - (62 - exp)
25|
26| where exp = (true exp) - 1
27|
28| So, wbtemp and fptemp will contain the following on erroneously
29| signalled operr:
30| fpts = 1
31| fpte = $4000 (15 bit externally)
32| byte fptm = $ffffffff ffffff80
33| word fptm = $ffffffff ffff8000
34| long fptm = $ffffffff 80000000
35|
36| Note 2: For trap enabled 040 does the following:
37| If the inst is move_out, then same as Note 1.
38| If the inst is not move_out, the dest is not modified.
39| The exceptional operand is not defined for integer overflow
40| during a move_out.
41|
42
43| Copyright (C) Motorola, Inc. 1990
44| All Rights Reserved
45|
46| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
47| The copyright notice above does not evidence any
48| actual or intended publication of such source code.
49
50X_OPERR: |idnt 2,1 | Motorola 040 Floating Point Software Package
51
52 |section 8
53
54#include "fpsp.h"
55
56 |xref mem_write
57 |xref real_operr
58 |xref real_inex
59 |xref get_fline
60 |xref fpsp_done
61 |xref reg_dest
62
63 .global fpsp_operr
64fpsp_operr:
65|
66 link %a6,#-LOCAL_SIZE
67 fsave -(%a7)
68 moveml %d0-%d1/%a0-%a1,USER_DA(%a6)
69 fmovemx %fp0-%fp3,USER_FP0(%a6)
70 fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
71
72|
73| Check if this is an opclass 3 instruction.
74| If so, fall through, else branch to operr_end
75|
76 btstb #TFLAG,T_BYTE(%a6)
77 beqs operr_end
78
79|
80| If the destination size is B,W,or L, the operr must be
81| handled here.
82|
83 movel CMDREG1B(%a6),%d0
84 bfextu %d0{#3:#3},%d0 |0=long, 4=word, 6=byte
85 cmpib #0,%d0 |determine size; check long
86 beq operr_long
87 cmpib #4,%d0 |check word
88 beq operr_word
89 cmpib #6,%d0 |check byte
90 beq operr_byte
91
92|
93| The size is not B,W,or L, so the operr is handled by the
94| kernel handler. Set the operr bits and clean up, leaving
95| only the integer exception frame on the stack, and the
96| fpu in the original exceptional state.
97|
98operr_end:
99 bsetb #operr_bit,FPSR_EXCEPT(%a6)
100 bsetb #aiop_bit,FPSR_AEXCEPT(%a6)
101
102 moveml USER_DA(%a6),%d0-%d1/%a0-%a1
103 fmovemx USER_FP0(%a6),%fp0-%fp3
104 fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
105 frestore (%a7)+
106 unlk %a6
107 bral real_operr
108
109operr_long:
110 moveql #4,%d1 |write size to d1
111 moveb STAG(%a6),%d0 |test stag for nan
112 andib #0xe0,%d0 |clr all but tag
113 cmpib #0x60,%d0 |check for nan
114 beq operr_nan
115 cmpil #0x80000000,FPTEMP_LO(%a6) |test if ls lword is special
116 bnes chklerr |if not equal, check for incorrect operr
117 bsr check_upper |check if exp and ms mant are special
118 tstl %d0
119 bnes chklerr |if d0 is true, check for incorrect operr
120 movel #0x80000000,%d0 |store special case result
121 bsr operr_store
122 bra not_enabled |clean and exit
123|
124| CHECK FOR INCORRECTLY GENERATED OPERR EXCEPTION HERE
125|
126chklerr:
127 movew FPTEMP_EX(%a6),%d0
128 andw #0x7FFF,%d0 |ignore sign bit
129 cmpw #0x3FFE,%d0 |this is the only possible exponent value
130 bnes chklerr2
131fixlong:
132 movel FPTEMP_LO(%a6),%d0
133 bsr operr_store
134 bra not_enabled
135chklerr2:
136 movew FPTEMP_EX(%a6),%d0
137 andw #0x7FFF,%d0 |ignore sign bit
138 cmpw #0x4000,%d0
139 bcc store_max |exponent out of range
140
141 movel FPTEMP_LO(%a6),%d0
142 andl #0x7FFF0000,%d0 |look for all 1's on bits 30-16
143 cmpl #0x7FFF0000,%d0
144 beqs fixlong
145
146 tstl FPTEMP_LO(%a6)
147 bpls chklepos
148 cmpl #0xFFFFFFFF,FPTEMP_HI(%a6)
149 beqs fixlong
150 bra store_max
151chklepos:
152 tstl FPTEMP_HI(%a6)
153 beqs fixlong
154 bra store_max
155
156operr_word:
157 moveql #2,%d1 |write size to d1
158 moveb STAG(%a6),%d0 |test stag for nan
159 andib #0xe0,%d0 |clr all but tag
160 cmpib #0x60,%d0 |check for nan
161 beq operr_nan
162 cmpil #0xffff8000,FPTEMP_LO(%a6) |test if ls lword is special
163 bnes chkwerr |if not equal, check for incorrect operr
164 bsr check_upper |check if exp and ms mant are special
165 tstl %d0
166 bnes chkwerr |if d0 is true, check for incorrect operr
167 movel #0x80000000,%d0 |store special case result
168 bsr operr_store
169 bra not_enabled |clean and exit
170|
171| CHECK FOR INCORRECTLY GENERATED OPERR EXCEPTION HERE
172|
173chkwerr:
174 movew FPTEMP_EX(%a6),%d0
175 andw #0x7FFF,%d0 |ignore sign bit
176 cmpw #0x3FFE,%d0 |this is the only possible exponent value
177 bnes store_max
178 movel FPTEMP_LO(%a6),%d0
179 swap %d0
180 bsr operr_store
181 bra not_enabled
182
183operr_byte:
184 moveql #1,%d1 |write size to d1
185 moveb STAG(%a6),%d0 |test stag for nan
186 andib #0xe0,%d0 |clr all but tag
187 cmpib #0x60,%d0 |check for nan
188 beqs operr_nan
189 cmpil #0xffffff80,FPTEMP_LO(%a6) |test if ls lword is special
190 bnes chkberr |if not equal, check for incorrect operr
191 bsr check_upper |check if exp and ms mant are special
192 tstl %d0
193 bnes chkberr |if d0 is true, check for incorrect operr
194 movel #0x80000000,%d0 |store special case result
195 bsr operr_store
196 bra not_enabled |clean and exit
197|
198| CHECK FOR INCORRECTLY GENERATED OPERR EXCEPTION HERE
199|
200chkberr:
201 movew FPTEMP_EX(%a6),%d0
202 andw #0x7FFF,%d0 |ignore sign bit
203 cmpw #0x3FFE,%d0 |this is the only possible exponent value
204 bnes store_max
205 movel FPTEMP_LO(%a6),%d0
206 asll #8,%d0
207 swap %d0
208 bsr operr_store
209 bra not_enabled
210
211|
212| This operr condition is not of the special case. Set operr
213| and aiop and write the portion of the nan to memory for the
214| given size.
215|
216operr_nan:
217 orl #opaop_mask,USER_FPSR(%a6) |set operr & aiop
218
219 movel ETEMP_HI(%a6),%d0 |output will be from upper 32 bits
220 bsr operr_store
221 bra end_operr
222|
223| Store_max loads the max pos or negative for the size, sets
224| the operr and aiop bits, and clears inex and ainex, incorrectly
225| set by the 040.
226|
227store_max:
228 orl #opaop_mask,USER_FPSR(%a6) |set operr & aiop
229 bclrb #inex2_bit,FPSR_EXCEPT(%a6)
230 bclrb #ainex_bit,FPSR_AEXCEPT(%a6)
231 fmovel #0,%FPSR
232
233 tstw FPTEMP_EX(%a6) |check sign
234 blts load_neg
235 movel #0x7fffffff,%d0
236 bsr operr_store
237 bra end_operr
238load_neg:
239 movel #0x80000000,%d0
240 bsr operr_store
241 bra end_operr
242
243|
244| This routine stores the data in d0, for the given size in d1,
245| to memory or data register as required. A read of the fline
246| is required to determine the destination.
247|
248operr_store:
249 movel %d0,L_SCR1(%a6) |move write data to L_SCR1
250 movel %d1,-(%a7) |save register size
251 bsrl get_fline |fline returned in d0
252 movel (%a7)+,%d1
253 bftst %d0{#26:#3} |if mode is zero, dest is Dn
254 bnes dest_mem
255|
256| Destination is Dn. Get register number from d0. Data is on
257| the stack at (a7). D1 has size: 1=byte,2=word,4=long/single
258|
259 andil #7,%d0 |isolate register number
260 cmpil #4,%d1
261 beqs op_long |the most frequent case
262 cmpil #2,%d1
263 bnes op_con
264 orl #8,%d0
265 bras op_con
266op_long:
267 orl #0x10,%d0
268op_con:
269 movel %d0,%d1 |format size:reg for reg_dest
270 bral reg_dest |call to reg_dest returns to caller
271| ;of operr_store
272|
273| Destination is memory. Get <ea> from integer exception frame
274| and call mem_write.
275|
276dest_mem:
277 leal L_SCR1(%a6),%a0 |put ptr to write data in a0
278 movel EXC_EA(%a6),%a1 |put user destination address in a1
279 movel %d1,%d0 |put size in d0
280 bsrl mem_write
281 rts
282|
283| Check the exponent for $c000 and the upper 32 bits of the
284| mantissa for $ffffffff. If both are true, return d0 clr
285| and store the lower n bits of the least lword of FPTEMP
286| to d0 for write out. If not, it is a real operr, and set d0.
287|
288check_upper:
289 cmpil #0xffffffff,FPTEMP_HI(%a6) |check if first byte is all 1's
290 bnes true_operr |if not all 1's then was true operr
291 cmpiw #0xc000,FPTEMP_EX(%a6) |check if incorrectly signalled
292 beqs not_true_operr |branch if not true operr
293 cmpiw #0xbfff,FPTEMP_EX(%a6) |check if incorrectly signalled
294 beqs not_true_operr |branch if not true operr
295true_operr:
296 movel #1,%d0 |signal real operr
297 rts
298not_true_operr:
299 clrl %d0 |signal no real operr
300 rts
301
302|
303| End_operr tests for operr enabled. If not, it cleans up the stack
304| and does an rte. If enabled, it cleans up the stack and branches
305| to the kernel operr handler with only the integer exception
306| frame on the stack and the fpu in the original exceptional state
307| with correct data written to the destination.
308|
309end_operr:
310 btstb #operr_bit,FPCR_ENABLE(%a6)
311 beqs not_enabled
312enabled:
313 moveml USER_DA(%a6),%d0-%d1/%a0-%a1
314 fmovemx USER_FP0(%a6),%fp0-%fp3
315 fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
316 frestore (%a7)+
317 unlk %a6
318 bral real_operr
319
320not_enabled:
321|
322| It is possible to have either inex2 or inex1 exceptions with the
323| operr. If the inex enable bit is set in the FPCR, and either
324| inex2 or inex1 occurred, we must clean up and branch to the
325| real inex handler.
326|
327ck_inex:
328 moveb FPCR_ENABLE(%a6),%d0
329 andb FPSR_EXCEPT(%a6),%d0
330 andib #0x3,%d0
331 beq operr_exit
332|
333| Inexact enabled and reported, and we must take an inexact exception.
334|
335take_inex:
336 moveb #INEX_VEC,EXC_VEC+1(%a6)
337 movel USER_FPSR(%a6),FPSR_SHADOW(%a6)
338 orl #sx_mask,E_BYTE(%a6)
339 moveml USER_DA(%a6),%d0-%d1/%a0-%a1
340 fmovemx USER_FP0(%a6),%fp0-%fp3
341 fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
342 frestore (%a7)+
343 unlk %a6
344 bral real_inex
345|
346| Since operr is only an E1 exception, there is no need to frestore
347| any state back to the fpu.
348|
349operr_exit:
350 moveml USER_DA(%a6),%d0-%d1/%a0-%a1
351 fmovemx USER_FP0(%a6),%fp0-%fp3
352 fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
353 unlk %a6
354 bral fpsp_done
355
356 |end
diff --git a/arch/m68k/fpsp040/x_ovfl.S b/arch/m68k/fpsp040/x_ovfl.S
new file mode 100644
index 000000000000..22cb8b42c7b6
--- /dev/null
+++ b/arch/m68k/fpsp040/x_ovfl.S
@@ -0,0 +1,186 @@
1|
2| x_ovfl.sa 3.5 7/1/91
3|
4| fpsp_ovfl --- FPSP handler for overflow exception
5|
6| Overflow occurs when a floating-point intermediate result is
7| too large to be represented in a floating-point data register,
8| or when storing to memory, the contents of a floating-point
9| data register are too large to be represented in the
10| destination format.
11|
12| Trap disabled results
13|
14| If the instruction is move_out, then garbage is stored in the
15| destination. If the instruction is not move_out, then the
16| destination is not affected. For 68881 compatibility, the
17| following values should be stored at the destination, based
18| on the current rounding mode:
19|
20| RN Infinity with the sign of the intermediate result.
21| RZ Largest magnitude number, with the sign of the
22| intermediate result.
23| RM For pos overflow, the largest pos number. For neg overflow,
24| -infinity
25| RP For pos overflow, +infinity. For neg overflow, the largest
26| neg number
27|
28| Trap enabled results
29| All trap disabled code applies. In addition the exceptional
30| operand needs to be made available to the users exception handler
31| with a bias of $6000 subtracted from the exponent.
32|
33|
34
35| Copyright (C) Motorola, Inc. 1990
36| All Rights Reserved
37|
38| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
39| The copyright notice above does not evidence any
40| actual or intended publication of such source code.
41
42X_OVFL: |idnt 2,1 | Motorola 040 Floating Point Software Package
43
44 |section 8
45
46#include "fpsp.h"
47
48 |xref ovf_r_x2
49 |xref ovf_r_x3
50 |xref store
51 |xref real_ovfl
52 |xref real_inex
53 |xref fpsp_done
54 |xref g_opcls
55 |xref b1238_fix
56
57 .global fpsp_ovfl
58fpsp_ovfl:
59 link %a6,#-LOCAL_SIZE
60 fsave -(%a7)
61 moveml %d0-%d1/%a0-%a1,USER_DA(%a6)
62 fmovemx %fp0-%fp3,USER_FP0(%a6)
63 fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
64
65|
66| The 040 doesn't set the AINEX bit in the FPSR, the following
67| line temporarily rectifies this error.
68|
69 bsetb #ainex_bit,FPSR_AEXCEPT(%a6)
70|
71 bsrl ovf_adj |denormalize, round & store interm op
72|
73| if overflow traps not enabled check for inexact exception
74|
75 btstb #ovfl_bit,FPCR_ENABLE(%a6)
76 beqs ck_inex
77|
78 btstb #E3,E_BYTE(%a6)
79 beqs no_e3_1
80 bfextu CMDREG3B(%a6){#6:#3},%d0 |get dest reg no
81 bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit
82 bsrl b1238_fix
83 movel USER_FPSR(%a6),FPSR_SHADOW(%a6)
84 orl #sx_mask,E_BYTE(%a6)
85no_e3_1:
86 moveml USER_DA(%a6),%d0-%d1/%a0-%a1
87 fmovemx USER_FP0(%a6),%fp0-%fp3
88 fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
89 frestore (%a7)+
90 unlk %a6
91 bral real_ovfl
92|
93| It is possible to have either inex2 or inex1 exceptions with the
94| ovfl. If the inex enable bit is set in the FPCR, and either
95| inex2 or inex1 occurred, we must clean up and branch to the
96| real inex handler.
97|
98ck_inex:
99| move.b FPCR_ENABLE(%a6),%d0
100| and.b FPSR_EXCEPT(%a6),%d0
101| andi.b #$3,%d0
102 btstb #inex2_bit,FPCR_ENABLE(%a6)
103 beqs ovfl_exit
104|
105| Inexact enabled and reported, and we must take an inexact exception.
106|
107take_inex:
108 btstb #E3,E_BYTE(%a6)
109 beqs no_e3_2
110 bfextu CMDREG3B(%a6){#6:#3},%d0 |get dest reg no
111 bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit
112 bsrl b1238_fix
113 movel USER_FPSR(%a6),FPSR_SHADOW(%a6)
114 orl #sx_mask,E_BYTE(%a6)
115no_e3_2:
116 moveb #INEX_VEC,EXC_VEC+1(%a6)
117 moveml USER_DA(%a6),%d0-%d1/%a0-%a1
118 fmovemx USER_FP0(%a6),%fp0-%fp3
119 fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
120 frestore (%a7)+
121 unlk %a6
122 bral real_inex
123
124ovfl_exit:
125 bclrb #E3,E_BYTE(%a6) |test and clear E3 bit
126 beqs e1_set
127|
128| Clear dirty bit on dest resister in the frame before branching
129| to b1238_fix.
130|
131 bfextu CMDREG3B(%a6){#6:#3},%d0 |get dest reg no
132 bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit
133 bsrl b1238_fix |test for bug1238 case
134
135 movel USER_FPSR(%a6),FPSR_SHADOW(%a6)
136 orl #sx_mask,E_BYTE(%a6)
137 moveml USER_DA(%a6),%d0-%d1/%a0-%a1
138 fmovemx USER_FP0(%a6),%fp0-%fp3
139 fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
140 frestore (%a7)+
141 unlk %a6
142 bral fpsp_done
143e1_set:
144 moveml USER_DA(%a6),%d0-%d1/%a0-%a1
145 fmovemx USER_FP0(%a6),%fp0-%fp3
146 fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
147 unlk %a6
148 bral fpsp_done
149
150|
151| ovf_adj
152|
153ovf_adj:
154|
155| Have a0 point to the correct operand.
156|
157 btstb #E3,E_BYTE(%a6) |test E3 bit
158 beqs ovf_e1
159
160 lea WBTEMP(%a6),%a0
161 bras ovf_com
162ovf_e1:
163 lea ETEMP(%a6),%a0
164
165ovf_com:
166 bclrb #sign_bit,LOCAL_EX(%a0)
167 sne LOCAL_SGN(%a0)
168
169 bsrl g_opcls |returns opclass in d0
170 cmpiw #3,%d0 |check for opclass3
171 bnes not_opc011
172
173|
174| FPSR_CC is saved and restored because ovf_r_x3 affects it. The
175| CCs are defined to be 'not affected' for the opclass3 instruction.
176|
177 moveb FPSR_CC(%a6),L_SCR1(%a6)
178 bsrl ovf_r_x3 |returns a0 pointing to result
179 moveb L_SCR1(%a6),FPSR_CC(%a6)
180 bral store |stores to memory or register
181
182not_opc011:
183 bsrl ovf_r_x2 |returns a0 pointing to result
184 bral store |stores to memory or register
185
186 |end
diff --git a/arch/m68k/fpsp040/x_snan.S b/arch/m68k/fpsp040/x_snan.S
new file mode 100644
index 000000000000..039af573312e
--- /dev/null
+++ b/arch/m68k/fpsp040/x_snan.S
@@ -0,0 +1,277 @@
1|
2| x_snan.sa 3.3 7/1/91
3|
4| fpsp_snan --- FPSP handler for signalling NAN exception
5|
6| SNAN for float -> integer conversions (integer conversion of
7| an SNAN) is a non-maskable run-time exception.
8|
9| For trap disabled the 040 does the following:
10| If the dest data format is s, d, or x, then the SNAN bit in the NAN
11| is set to one and the resulting non-signaling NAN (truncated if
12| necessary) is transferred to the dest. If the dest format is b, w,
13| or l, then garbage is written to the dest (actually the upper 32 bits
14| of the mantissa are sent to the integer unit).
15|
16| For trap enabled the 040 does the following:
17| If the inst is move_out, then the results are the same as for trap
18| disabled with the exception posted. If the instruction is not move_
19| out, the dest. is not modified, and the exception is posted.
20|
21
22| Copyright (C) Motorola, Inc. 1990
23| All Rights Reserved
24|
25| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
26| The copyright notice above does not evidence any
27| actual or intended publication of such source code.
28
29X_SNAN: |idnt 2,1 | Motorola 040 Floating Point Software Package
30
31 |section 8
32
33#include "fpsp.h"
34
35 |xref get_fline
36 |xref mem_write
37 |xref real_snan
38 |xref real_inex
39 |xref fpsp_done
40 |xref reg_dest
41
42 .global fpsp_snan
43fpsp_snan:
44 link %a6,#-LOCAL_SIZE
45 fsave -(%a7)
46 moveml %d0-%d1/%a0-%a1,USER_DA(%a6)
47 fmovemx %fp0-%fp3,USER_FP0(%a6)
48 fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
49
50|
51| Check if trap enabled
52|
53 btstb #snan_bit,FPCR_ENABLE(%a6)
54 bnes ena |If enabled, then branch
55
56 bsrl move_out |else SNAN disabled
57|
58| It is possible to have an inex1 exception with the
59| snan. If the inex enable bit is set in the FPCR, and either
60| inex2 or inex1 occurred, we must clean up and branch to the
61| real inex handler.
62|
63ck_inex:
64 moveb FPCR_ENABLE(%a6),%d0
65 andb FPSR_EXCEPT(%a6),%d0
66 andib #0x3,%d0
67 beq end_snan
68|
69| Inexact enabled and reported, and we must take an inexact exception.
70|
71take_inex:
72 moveb #INEX_VEC,EXC_VEC+1(%a6)
73 moveml USER_DA(%a6),%d0-%d1/%a0-%a1
74 fmovemx USER_FP0(%a6),%fp0-%fp3
75 fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
76 frestore (%a7)+
77 unlk %a6
78 bral real_inex
79|
80| SNAN is enabled. Check if inst is move_out.
81| Make any corrections to the 040 output as necessary.
82|
83ena:
84 btstb #5,CMDREG1B(%a6) |if set, inst is move out
85 beq not_out
86
87 bsrl move_out
88
89report_snan:
90 moveb (%a7),VER_TMP(%a6)
91 cmpib #VER_40,(%a7) |test for orig unimp frame
92 bnes ck_rev
93 moveql #13,%d0 |need to zero 14 lwords
94 bras rep_con
95ck_rev:
96 moveql #11,%d0 |need to zero 12 lwords
97rep_con:
98 clrl (%a7)
99loop1:
100 clrl -(%a7) |clear and dec a7
101 dbra %d0,loop1
102 moveb VER_TMP(%a6),(%a7) |format a busy frame
103 moveb #BUSY_SIZE-4,1(%a7)
104 movel USER_FPSR(%a6),FPSR_SHADOW(%a6)
105 orl #sx_mask,E_BYTE(%a6)
106 moveml USER_DA(%a6),%d0-%d1/%a0-%a1
107 fmovemx USER_FP0(%a6),%fp0-%fp3
108 fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
109 frestore (%a7)+
110 unlk %a6
111 bral real_snan
112|
113| Exit snan handler by expanding the unimp frame into a busy frame
114|
115end_snan:
116 bclrb #E1,E_BYTE(%a6)
117
118 moveb (%a7),VER_TMP(%a6)
119 cmpib #VER_40,(%a7) |test for orig unimp frame
120 bnes ck_rev2
121 moveql #13,%d0 |need to zero 14 lwords
122 bras rep_con2
123ck_rev2:
124 moveql #11,%d0 |need to zero 12 lwords
125rep_con2:
126 clrl (%a7)
127loop2:
128 clrl -(%a7) |clear and dec a7
129 dbra %d0,loop2
130 moveb VER_TMP(%a6),(%a7) |format a busy frame
131 moveb #BUSY_SIZE-4,1(%a7) |write busy size
132 movel USER_FPSR(%a6),FPSR_SHADOW(%a6)
133 orl #sx_mask,E_BYTE(%a6)
134 moveml USER_DA(%a6),%d0-%d1/%a0-%a1
135 fmovemx USER_FP0(%a6),%fp0-%fp3
136 fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
137 frestore (%a7)+
138 unlk %a6
139 bral fpsp_done
140
141|
142| Move_out
143|
144move_out:
145 movel EXC_EA(%a6),%a0 |get <ea> from exc frame
146
147 bfextu CMDREG1B(%a6){#3:#3},%d0 |move rx field to d0{2:0}
148 cmpil #0,%d0 |check for long
149 beqs sto_long |branch if move_out long
150
151 cmpil #4,%d0 |check for word
152 beqs sto_word |branch if move_out word
153
154 cmpil #6,%d0 |check for byte
155 beqs sto_byte |branch if move_out byte
156
157|
158| Not byte, word or long
159|
160 rts
161|
162| Get the 32 most significant bits of etemp mantissa
163|
164sto_long:
165 movel ETEMP_HI(%a6),%d1
166 movel #4,%d0 |load byte count
167|
168| Set signalling nan bit
169|
170 bsetl #30,%d1
171|
172| Store to the users destination address
173|
174 tstl %a0 |check if <ea> is 0
175 beqs wrt_dn |destination is a data register
176
177 movel %d1,-(%a7) |move the snan onto the stack
178 movel %a0,%a1 |load dest addr into a1
179 movel %a7,%a0 |load src addr of snan into a0
180 bsrl mem_write |write snan to user memory
181 movel (%a7)+,%d1 |clear off stack
182 rts
183|
184| Get the 16 most significant bits of etemp mantissa
185|
186sto_word:
187 movel ETEMP_HI(%a6),%d1
188 movel #2,%d0 |load byte count
189|
190| Set signalling nan bit
191|
192 bsetl #30,%d1
193|
194| Store to the users destination address
195|
196 tstl %a0 |check if <ea> is 0
197 beqs wrt_dn |destination is a data register
198
199 movel %d1,-(%a7) |move the snan onto the stack
200 movel %a0,%a1 |load dest addr into a1
201 movel %a7,%a0 |point to low word
202 bsrl mem_write |write snan to user memory
203 movel (%a7)+,%d1 |clear off stack
204 rts
205|
206| Get the 8 most significant bits of etemp mantissa
207|
208sto_byte:
209 movel ETEMP_HI(%a6),%d1
210 movel #1,%d0 |load byte count
211|
212| Set signalling nan bit
213|
214 bsetl #30,%d1
215|
216| Store to the users destination address
217|
218 tstl %a0 |check if <ea> is 0
219 beqs wrt_dn |destination is a data register
220 movel %d1,-(%a7) |move the snan onto the stack
221 movel %a0,%a1 |load dest addr into a1
222 movel %a7,%a0 |point to source byte
223 bsrl mem_write |write snan to user memory
224 movel (%a7)+,%d1 |clear off stack
225 rts
226
227|
228| wrt_dn --- write to a data register
229|
230| We get here with D1 containing the data to write and D0 the
231| number of bytes to write: 1=byte,2=word,4=long.
232|
233wrt_dn:
234 movel %d1,L_SCR1(%a6) |data
235 movel %d0,-(%a7) |size
236 bsrl get_fline |returns fline word in d0
237 movel %d0,%d1
238 andil #0x7,%d1 |d1 now holds register number
239 movel (%sp)+,%d0 |get original size
240 cmpil #4,%d0
241 beqs wrt_long
242 cmpil #2,%d0
243 bnes wrt_byte
244wrt_word:
245 orl #0x8,%d1
246 bral reg_dest
247wrt_long:
248 orl #0x10,%d1
249 bral reg_dest
250wrt_byte:
251 bral reg_dest
252|
253| Check if it is a src nan or dst nan
254|
255not_out:
256 movel DTAG(%a6),%d0
257 bfextu %d0{#0:#3},%d0 |isolate dtag in lsbs
258
259 cmpib #3,%d0 |check for nan in destination
260 bnes issrc |destination nan has priority
261dst_nan:
262 btstb #6,FPTEMP_HI(%a6) |check if dest nan is an snan
263 bnes issrc |no, so check source for snan
264 movew FPTEMP_EX(%a6),%d0
265 bras cont
266issrc:
267 movew ETEMP_EX(%a6),%d0
268cont:
269 btstl #15,%d0 |test for sign of snan
270 beqs clr_neg
271 bsetb #neg_bit,FPSR_CC(%a6)
272 bra report_snan
273clr_neg:
274 bclrb #neg_bit,FPSR_CC(%a6)
275 bra report_snan
276
277 |end
diff --git a/arch/m68k/fpsp040/x_store.S b/arch/m68k/fpsp040/x_store.S
new file mode 100644
index 000000000000..4282fa67d449
--- /dev/null
+++ b/arch/m68k/fpsp040/x_store.S
@@ -0,0 +1,256 @@
1|
2| x_store.sa 3.2 1/24/91
3|
4| store --- store operand to memory or register
5|
6| Used by underflow and overflow handlers.
7|
8| a6 = points to fp value to be stored.
9|
10
11| Copyright (C) Motorola, Inc. 1990
12| All Rights Reserved
13|
14| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
15| The copyright notice above does not evidence any
16| actual or intended publication of such source code.
17
18X_STORE: |idnt 2,1 | Motorola 040 Floating Point Software Package
19
20 |section 8
21
22fpreg_mask:
23 .byte 0x80,0x40,0x20,0x10,0x08,0x04,0x02,0x01
24
25#include "fpsp.h"
26
27 |xref mem_write
28 |xref get_fline
29 |xref g_opcls
30 |xref g_dfmtou
31 |xref reg_dest
32
33 .global dest_ext
34 .global dest_dbl
35 .global dest_sgl
36
37 .global store
38store:
39 btstb #E3,E_BYTE(%a6)
40 beqs E1_sto
41E3_sto:
42 movel CMDREG3B(%a6),%d0
43 bfextu %d0{#6:#3},%d0 |isolate dest. reg from cmdreg3b
44sto_fp:
45 lea fpreg_mask,%a1
46 moveb (%a1,%d0.w),%d0 |convert reg# to dynamic register mask
47 tstb LOCAL_SGN(%a0)
48 beqs is_pos
49 bsetb #sign_bit,LOCAL_EX(%a0)
50is_pos:
51 fmovemx (%a0),%d0 |move to correct register
52|
53| if fp0-fp3 is being modified, we must put a copy
54| in the USER_FPn variable on the stack because all exception
55| handlers restore fp0-fp3 from there.
56|
57 cmpb #0x80,%d0
58 bnes not_fp0
59 fmovemx %fp0-%fp0,USER_FP0(%a6)
60 rts
61not_fp0:
62 cmpb #0x40,%d0
63 bnes not_fp1
64 fmovemx %fp1-%fp1,USER_FP1(%a6)
65 rts
66not_fp1:
67 cmpb #0x20,%d0
68 bnes not_fp2
69 fmovemx %fp2-%fp2,USER_FP2(%a6)
70 rts
71not_fp2:
72 cmpb #0x10,%d0
73 bnes not_fp3
74 fmovemx %fp3-%fp3,USER_FP3(%a6)
75 rts
76not_fp3:
77 rts
78
79E1_sto:
80 bsrl g_opcls |returns opclass in d0
81 cmpib #3,%d0
82 beq opc011 |branch if opclass 3
83 movel CMDREG1B(%a6),%d0
84 bfextu %d0{#6:#3},%d0 |extract destination register
85 bras sto_fp
86
87opc011:
88 bsrl g_dfmtou |returns dest format in d0
89| ;ext=00, sgl=01, dbl=10
90 movel %a0,%a1 |save source addr in a1
91 movel EXC_EA(%a6),%a0 |get the address
92 cmpil #0,%d0 |if dest format is extended
93 beq dest_ext |then branch
94 cmpil #1,%d0 |if dest format is single
95 beq dest_sgl |then branch
96|
97| fall through to dest_dbl
98|
99
100|
101| dest_dbl --- write double precision value to user space
102|
103|Input
104| a0 -> destination address
105| a1 -> source in extended precision
106|Output
107| a0 -> destroyed
108| a1 -> destroyed
109| d0 -> 0
110|
111|Changes extended precision to double precision.
112| Note: no attempt is made to round the extended value to double.
113| dbl_sign = ext_sign
114| dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias)
115| get rid of ext integer bit
116| dbl_mant = ext_mant{62:12}
117|
118| --------------- --------------- ---------------
119| extended -> |s| exp | |1| ms mant | | ls mant |
120| --------------- --------------- ---------------
121| 95 64 63 62 32 31 11 0
122| | |
123| | |
124| | |
125| v v
126| --------------- ---------------
127| double -> |s|exp| mant | | mant |
128| --------------- ---------------
129| 63 51 32 31 0
130|
131dest_dbl:
132 clrl %d0 |clear d0
133 movew LOCAL_EX(%a1),%d0 |get exponent
134 subw #0x3fff,%d0 |subtract extended precision bias
135 cmpw #0x4000,%d0 |check if inf
136 beqs inf |if so, special case
137 addw #0x3ff,%d0 |add double precision bias
138 swap %d0 |d0 now in upper word
139 lsll #4,%d0 |d0 now in proper place for dbl prec exp
140 tstb LOCAL_SGN(%a1)
141 beqs get_mant |if positive, go process mantissa
142 bsetl #31,%d0 |if negative, put in sign information
143| ; before continuing
144 bras get_mant |go process mantissa
145inf:
146 movel #0x7ff00000,%d0 |load dbl inf exponent
147 clrl LOCAL_HI(%a1) |clear msb
148 tstb LOCAL_SGN(%a1)
149 beqs dbl_inf |if positive, go ahead and write it
150 bsetl #31,%d0 |if negative put in sign information
151dbl_inf:
152 movel %d0,LOCAL_EX(%a1) |put the new exp back on the stack
153 bras dbl_wrt
154get_mant:
155 movel LOCAL_HI(%a1),%d1 |get ms mantissa
156 bfextu %d1{#1:#20},%d1 |get upper 20 bits of ms
157 orl %d1,%d0 |put these bits in ms word of double
158 movel %d0,LOCAL_EX(%a1) |put the new exp back on the stack
159 movel LOCAL_HI(%a1),%d1 |get ms mantissa
160 movel #21,%d0 |load shift count
161 lsll %d0,%d1 |put lower 11 bits in upper bits
162 movel %d1,LOCAL_HI(%a1) |build lower lword in memory
163 movel LOCAL_LO(%a1),%d1 |get ls mantissa
164 bfextu %d1{#0:#21},%d0 |get ls 21 bits of double
165 orl %d0,LOCAL_HI(%a1) |put them in double result
166dbl_wrt:
167 movel #0x8,%d0 |byte count for double precision number
168 exg %a0,%a1 |a0=supervisor source, a1=user dest
169 bsrl mem_write |move the number to the user's memory
170 rts
171|
172| dest_sgl --- write single precision value to user space
173|
174|Input
175| a0 -> destination address
176| a1 -> source in extended precision
177|
178|Output
179| a0 -> destroyed
180| a1 -> destroyed
181| d0 -> 0
182|
183|Changes extended precision to single precision.
184| sgl_sign = ext_sign
185| sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias)
186| get rid of ext integer bit
187| sgl_mant = ext_mant{62:12}
188|
189| --------------- --------------- ---------------
190| extended -> |s| exp | |1| ms mant | | ls mant |
191| --------------- --------------- ---------------
192| 95 64 63 62 40 32 31 12 0
193| | |
194| | |
195| | |
196| v v
197| ---------------
198| single -> |s|exp| mant |
199| ---------------
200| 31 22 0
201|
202dest_sgl:
203 clrl %d0
204 movew LOCAL_EX(%a1),%d0 |get exponent
205 subw #0x3fff,%d0 |subtract extended precision bias
206 cmpw #0x4000,%d0 |check if inf
207 beqs sinf |if so, special case
208 addw #0x7f,%d0 |add single precision bias
209 swap %d0 |put exp in upper word of d0
210 lsll #7,%d0 |shift it into single exp bits
211 tstb LOCAL_SGN(%a1)
212 beqs get_sman |if positive, continue
213 bsetl #31,%d0 |if negative, put in sign first
214 bras get_sman |get mantissa
215sinf:
216 movel #0x7f800000,%d0 |load single inf exp to d0
217 tstb LOCAL_SGN(%a1)
218 beqs sgl_wrt |if positive, continue
219 bsetl #31,%d0 |if negative, put in sign info
220 bras sgl_wrt
221
222get_sman:
223 movel LOCAL_HI(%a1),%d1 |get ms mantissa
224 bfextu %d1{#1:#23},%d1 |get upper 23 bits of ms
225 orl %d1,%d0 |put these bits in ms word of single
226
227sgl_wrt:
228 movel %d0,L_SCR1(%a6) |put the new exp back on the stack
229 movel #0x4,%d0 |byte count for single precision number
230 tstl %a0 |users destination address
231 beqs sgl_Dn |destination is a data register
232 exg %a0,%a1 |a0=supervisor source, a1=user dest
233 leal L_SCR1(%a6),%a0 |point a0 to data
234 bsrl mem_write |move the number to the user's memory
235 rts
236sgl_Dn:
237 bsrl get_fline |returns fline word in d0
238 andw #0x7,%d0 |isolate register number
239 movel %d0,%d1 |d1 has size:reg formatted for reg_dest
240 orl #0x10,%d1 |reg_dest wants size added to reg#
241 bral reg_dest |size is X, rts in reg_dest will
242| ;return to caller of dest_sgl
243
244dest_ext:
245 tstb LOCAL_SGN(%a1) |put back sign into exponent word
246 beqs dstx_cont
247 bsetb #sign_bit,LOCAL_EX(%a1)
248dstx_cont:
249 clrb LOCAL_SGN(%a1) |clear out the sign byte
250
251 movel #0x0c,%d0 |byte count for extended number
252 exg %a0,%a1 |a0=supervisor source, a1=user dest
253 bsrl mem_write |move the number to the user's memory
254 rts
255
256 |end
diff --git a/arch/m68k/fpsp040/x_unfl.S b/arch/m68k/fpsp040/x_unfl.S
new file mode 100644
index 000000000000..077fcc230fcc
--- /dev/null
+++ b/arch/m68k/fpsp040/x_unfl.S
@@ -0,0 +1,269 @@
1|
2| x_unfl.sa 3.4 7/1/91
3|
4| fpsp_unfl --- FPSP handler for underflow exception
5|
6| Trap disabled results
7| For 881/2 compatibility, sw must denormalize the intermediate
8| result, then store the result. Denormalization is accomplished
9| by taking the intermediate result (which is always normalized) and
10| shifting the mantissa right while incrementing the exponent until
11| it is equal to the denormalized exponent for the destination
12| format. After denormalization, the result is rounded to the
13| destination format.
14|
15| Trap enabled results
16| All trap disabled code applies. In addition the exceptional
17| operand needs to made available to the user with a bias of $6000
18| added to the exponent.
19|
20
21| Copyright (C) Motorola, Inc. 1990
22| All Rights Reserved
23|
24| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
25| The copyright notice above does not evidence any
26| actual or intended publication of such source code.
27
28X_UNFL: |idnt 2,1 | Motorola 040 Floating Point Software Package
29
30 |section 8
31
32#include "fpsp.h"
33
34 |xref denorm
35 |xref round
36 |xref store
37 |xref g_rndpr
38 |xref g_opcls
39 |xref g_dfmtou
40 |xref real_unfl
41 |xref real_inex
42 |xref fpsp_done
43 |xref b1238_fix
44
45 .global fpsp_unfl
46fpsp_unfl:
47 link %a6,#-LOCAL_SIZE
48 fsave -(%a7)
49 moveml %d0-%d1/%a0-%a1,USER_DA(%a6)
50 fmovemx %fp0-%fp3,USER_FP0(%a6)
51 fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
52
53|
54 bsrl unf_res |denormalize, round & store interm op
55|
56| If underflow exceptions are not enabled, check for inexact
57| exception
58|
59 btstb #unfl_bit,FPCR_ENABLE(%a6)
60 beqs ck_inex
61
62 btstb #E3,E_BYTE(%a6)
63 beqs no_e3_1
64|
65| Clear dirty bit on dest resister in the frame before branching
66| to b1238_fix.
67|
68 bfextu CMDREG3B(%a6){#6:#3},%d0 |get dest reg no
69 bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit
70 bsrl b1238_fix |test for bug1238 case
71 movel USER_FPSR(%a6),FPSR_SHADOW(%a6)
72 orl #sx_mask,E_BYTE(%a6)
73no_e3_1:
74 moveml USER_DA(%a6),%d0-%d1/%a0-%a1
75 fmovemx USER_FP0(%a6),%fp0-%fp3
76 fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
77 frestore (%a7)+
78 unlk %a6
79 bral real_unfl
80|
81| It is possible to have either inex2 or inex1 exceptions with the
82| unfl. If the inex enable bit is set in the FPCR, and either
83| inex2 or inex1 occurred, we must clean up and branch to the
84| real inex handler.
85|
86ck_inex:
87 moveb FPCR_ENABLE(%a6),%d0
88 andb FPSR_EXCEPT(%a6),%d0
89 andib #0x3,%d0
90 beqs unfl_done
91
92|
93| Inexact enabled and reported, and we must take an inexact exception
94|
95take_inex:
96 btstb #E3,E_BYTE(%a6)
97 beqs no_e3_2
98|
99| Clear dirty bit on dest resister in the frame before branching
100| to b1238_fix.
101|
102 bfextu CMDREG3B(%a6){#6:#3},%d0 |get dest reg no
103 bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit
104 bsrl b1238_fix |test for bug1238 case
105 movel USER_FPSR(%a6),FPSR_SHADOW(%a6)
106 orl #sx_mask,E_BYTE(%a6)
107no_e3_2:
108 moveb #INEX_VEC,EXC_VEC+1(%a6)
109 moveml USER_DA(%a6),%d0-%d1/%a0-%a1
110 fmovemx USER_FP0(%a6),%fp0-%fp3
111 fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
112 frestore (%a7)+
113 unlk %a6
114 bral real_inex
115
116unfl_done:
117 bclrb #E3,E_BYTE(%a6)
118 beqs e1_set |if set then branch
119|
120| Clear dirty bit on dest resister in the frame before branching
121| to b1238_fix.
122|
123 bfextu CMDREG3B(%a6){#6:#3},%d0 |get dest reg no
124 bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit
125 bsrl b1238_fix |test for bug1238 case
126 movel USER_FPSR(%a6),FPSR_SHADOW(%a6)
127 orl #sx_mask,E_BYTE(%a6)
128 moveml USER_DA(%a6),%d0-%d1/%a0-%a1
129 fmovemx USER_FP0(%a6),%fp0-%fp3
130 fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
131 frestore (%a7)+
132 unlk %a6
133 bral fpsp_done
134e1_set:
135 moveml USER_DA(%a6),%d0-%d1/%a0-%a1
136 fmovemx USER_FP0(%a6),%fp0-%fp3
137 fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
138 unlk %a6
139 bral fpsp_done
140|
141| unf_res --- underflow result calculation
142|
143unf_res:
144 bsrl g_rndpr |returns RND_PREC in d0 0=ext,
145| ;1=sgl, 2=dbl
146| ;we need the RND_PREC in the
147| ;upper word for round
148 movew #0,-(%a7)
149 movew %d0,-(%a7) |copy RND_PREC to stack
150|
151|
152| If the exception bit set is E3, the exceptional operand from the
153| fpu is in WBTEMP; else it is in FPTEMP.
154|
155 btstb #E3,E_BYTE(%a6)
156 beqs unf_E1
157unf_E3:
158 lea WBTEMP(%a6),%a0 |a0 now points to operand
159|
160| Test for fsgldiv and fsglmul. If the inst was one of these, then
161| force the precision to extended for the denorm routine. Use
162| the user's precision for the round routine.
163|
164 movew CMDREG3B(%a6),%d1 |check for fsgldiv or fsglmul
165 andiw #0x7f,%d1
166 cmpiw #0x30,%d1 |check for sgldiv
167 beqs unf_sgl
168 cmpiw #0x33,%d1 |check for sglmul
169 bnes unf_cont |if not, use fpcr prec in round
170unf_sgl:
171 clrl %d0
172 movew #0x1,(%a7) |override g_rndpr precision
173| ;force single
174 bras unf_cont
175unf_E1:
176 lea FPTEMP(%a6),%a0 |a0 now points to operand
177unf_cont:
178 bclrb #sign_bit,LOCAL_EX(%a0) |clear sign bit
179 sne LOCAL_SGN(%a0) |store sign
180
181 bsrl denorm |returns denorm, a0 points to it
182|
183| WARNING:
184| ;d0 has guard,round sticky bit
185| ;make sure that it is not corrupted
186| ;before it reaches the round subroutine
187| ;also ensure that a0 isn't corrupted
188
189|
190| Set up d1 for round subroutine d1 contains the PREC/MODE
191| information respectively on upper/lower register halves.
192|
193 bfextu FPCR_MODE(%a6){#2:#2},%d1 |get mode from FPCR
194| ;mode in lower d1
195 addl (%a7)+,%d1 |merge PREC/MODE
196|
197| WARNING: a0 and d0 are assumed to be intact between the denorm and
198| round subroutines. All code between these two subroutines
199| must not corrupt a0 and d0.
200|
201|
202| Perform Round
203| Input: a0 points to input operand
204| d0{31:29} has guard, round, sticky
205| d1{01:00} has rounding mode
206| d1{17:16} has rounding precision
207| Output: a0 points to rounded operand
208|
209
210 bsrl round |returns rounded denorm at (a0)
211|
212| Differentiate between store to memory vs. store to register
213|
214unf_store:
215 bsrl g_opcls |returns opclass in d0{2:0}
216 cmpib #0x3,%d0
217 bnes not_opc011
218|
219| At this point, a store to memory is pending
220|
221opc011:
222 bsrl g_dfmtou
223 tstb %d0
224 beqs ext_opc011 |If extended, do not subtract
225| ;If destination format is sgl/dbl,
226 tstb LOCAL_HI(%a0) |If rounded result is normal,don't
227| ;subtract
228 bmis ext_opc011
229 subqw #1,LOCAL_EX(%a0) |account for denorm bias vs.
230| ;normalized bias
231| ; normalized denormalized
232| ;single $7f $7e
233| ;double $3ff $3fe
234|
235ext_opc011:
236 bsrl store |stores to memory
237 bras unf_done |finish up
238
239|
240| At this point, a store to a float register is pending
241|
242not_opc011:
243 bsrl store |stores to float register
244| ;a0 is not corrupted on a store to a
245| ;float register.
246|
247| Set the condition codes according to result
248|
249 tstl LOCAL_HI(%a0) |check upper mantissa
250 bnes ck_sgn
251 tstl LOCAL_LO(%a0) |check lower mantissa
252 bnes ck_sgn
253 bsetb #z_bit,FPSR_CC(%a6) |set condition codes if zero
254ck_sgn:
255 btstb #sign_bit,LOCAL_EX(%a0) |check the sign bit
256 beqs unf_done
257 bsetb #neg_bit,FPSR_CC(%a6)
258
259|
260| Finish.
261|
262unf_done:
263 btstb #inex2_bit,FPSR_EXCEPT(%a6)
264 beqs no_aunfl
265 bsetb #aunfl_bit,FPSR_AEXCEPT(%a6)
266no_aunfl:
267 rts
268
269 |end
diff --git a/arch/m68k/fpsp040/x_unimp.S b/arch/m68k/fpsp040/x_unimp.S
new file mode 100644
index 000000000000..920cb9410e9e
--- /dev/null
+++ b/arch/m68k/fpsp040/x_unimp.S
@@ -0,0 +1,77 @@
1|
2| x_unimp.sa 3.3 7/1/91
3|
4| fpsp_unimp --- FPSP handler for unimplemented instruction
5| exception.
6|
7| Invoked when the user program encounters a floating-point
8| op-code that hardware does not support. Trap vector# 11
9| (See table 8-1 MC68030 User's Manual).
10|
11|
12| Note: An fsave for an unimplemented inst. will create a short
13| fsave stack.
14|
15| Input: 1. Six word stack frame for unimplemented inst, four word
16| for illegal
17| (See table 8-7 MC68030 User's Manual).
18| 2. Unimp (short) fsave state frame created here by fsave
19| instruction.
20|
21|
22| Copyright (C) Motorola, Inc. 1990
23| All Rights Reserved
24|
25| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
26| The copyright notice above does not evidence any
27| actual or intended publication of such source code.
28
29X_UNIMP: |idnt 2,1 | Motorola 040 Floating Point Software Package
30
31 |section 8
32
33#include "fpsp.h"
34
35 |xref get_op
36 |xref do_func
37 |xref sto_res
38 |xref gen_except
39 |xref fpsp_fmt_error
40
41 .global fpsp_unimp
42 .global uni_2
43fpsp_unimp:
44 link %a6,#-LOCAL_SIZE
45 fsave -(%a7)
46uni_2:
47 moveml %d0-%d1/%a0-%a1,USER_DA(%a6)
48 fmovemx %fp0-%fp3,USER_FP0(%a6)
49 fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
50 moveb (%a7),%d0 |test for valid version num
51 andib #0xf0,%d0 |test for $4x
52 cmpib #VER_4,%d0 |must be $4x or exit
53 bnel fpsp_fmt_error
54|
55| Temporary D25B Fix
56| The following lines are used to ensure that the FPSR
57| exception byte and condition codes are clear before proceeding
58|
59 movel USER_FPSR(%a6),%d0
60 andl #0xFF00FF,%d0 |clear all but accrued exceptions
61 movel %d0,USER_FPSR(%a6)
62 fmovel #0,%FPSR |clear all user bits
63 fmovel #0,%FPCR |clear all user exceptions for FPSP
64
65 clrb UFLG_TMP(%a6) |clr flag for unsupp data
66
67 bsrl get_op |go get operand(s)
68 clrb STORE_FLG(%a6)
69 bsrl do_func |do the function
70 fsave -(%a7) |capture possible exc state
71 tstb STORE_FLG(%a6)
72 bnes no_store |if STORE_FLG is set, no store
73 bsrl sto_res |store the result in user space
74no_store:
75 bral gen_except |post any exceptions and return
76
77 |end
diff --git a/arch/m68k/fpsp040/x_unsupp.S b/arch/m68k/fpsp040/x_unsupp.S
new file mode 100644
index 000000000000..4ec57285b683
--- /dev/null
+++ b/arch/m68k/fpsp040/x_unsupp.S
@@ -0,0 +1,83 @@
1|
2| x_unsupp.sa 3.3 7/1/91
3|
4| fpsp_unsupp --- FPSP handler for unsupported data type exception
5|
6| Trap vector #55 (See table 8-1 Mc68030 User's manual).
7| Invoked when the user program encounters a data format (packed) that
8| hardware does not support or a data type (denormalized numbers or un-
9| normalized numbers).
10| Normalizes denorms and unnorms, unpacks packed numbers then stores
11| them back into the machine to let the 040 finish the operation.
12|
13| Unsupp calls two routines:
14| 1. get_op - gets the operand(s)
15| 2. res_func - restore the function back into the 040 or
16| if fmove.p fpm,<ea> then pack source (fpm)
17| and store in users memory <ea>.
18|
19| Input: Long fsave stack frame
20|
21|
22
23| Copyright (C) Motorola, Inc. 1990
24| All Rights Reserved
25|
26| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
27| The copyright notice above does not evidence any
28| actual or intended publication of such source code.
29
30X_UNSUPP: |idnt 2,1 | Motorola 040 Floating Point Software Package
31
32 |section 8
33
34#include "fpsp.h"
35
36 |xref get_op
37 |xref res_func
38 |xref gen_except
39 |xref fpsp_fmt_error
40
41 .global fpsp_unsupp
42fpsp_unsupp:
43|
44 link %a6,#-LOCAL_SIZE
45 fsave -(%a7)
46 moveml %d0-%d1/%a0-%a1,USER_DA(%a6)
47 fmovemx %fp0-%fp3,USER_FP0(%a6)
48 fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
49
50
51 moveb (%a7),VER_TMP(%a6) |save version number
52 moveb (%a7),%d0 |test for valid version num
53 andib #0xf0,%d0 |test for $4x
54 cmpib #VER_4,%d0 |must be $4x or exit
55 bnel fpsp_fmt_error
56
57 fmovel #0,%FPSR |clear all user status bits
58 fmovel #0,%FPCR |clear all user control bits
59|
60| The following lines are used to ensure that the FPSR
61| exception byte and condition codes are clear before proceeding,
62| except in the case of fmove, which leaves the cc's intact.
63|
64unsupp_con:
65 movel USER_FPSR(%a6),%d1
66 btst #5,CMDREG1B(%a6) |looking for fmove out
67 bne fmove_con
68 andl #0xFF00FF,%d1 |clear all but aexcs and qbyte
69 bras end_fix
70fmove_con:
71 andl #0x0FFF40FF,%d1 |clear all but cc's, snan bit, aexcs, and qbyte
72end_fix:
73 movel %d1,USER_FPSR(%a6)
74
75 st UFLG_TMP(%a6) |set flag for unsupp data
76
77 bsrl get_op |everything okay, go get operand(s)
78 bsrl res_func |fix up stack frame so can restore it
79 clrl -(%a7)
80 moveb VER_TMP(%a6),(%a7) |move idle fmt word to top of stack
81 bral gen_except
82|
83 |end