aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/math-emu
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/i386/math-emu
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'arch/i386/math-emu')
-rw-r--r--arch/i386/math-emu/Makefile30
-rw-r--r--arch/i386/math-emu/README427
-rw-r--r--arch/i386/math-emu/control_w.h45
-rw-r--r--arch/i386/math-emu/div_Xsig.S365
-rw-r--r--arch/i386/math-emu/div_small.S47
-rw-r--r--arch/i386/math-emu/errors.c739
-rw-r--r--arch/i386/math-emu/exception.h53
-rw-r--r--arch/i386/math-emu/fpu_arith.c174
-rw-r--r--arch/i386/math-emu/fpu_asm.h32
-rw-r--r--arch/i386/math-emu/fpu_aux.c204
-rw-r--r--arch/i386/math-emu/fpu_emu.h217
-rw-r--r--arch/i386/math-emu/fpu_entry.c760
-rw-r--r--arch/i386/math-emu/fpu_etc.c143
-rw-r--r--arch/i386/math-emu/fpu_proto.h140
-rw-r--r--arch/i386/math-emu/fpu_system.h89
-rw-r--r--arch/i386/math-emu/fpu_tags.c127
-rw-r--r--arch/i386/math-emu/fpu_trig.c1845
-rw-r--r--arch/i386/math-emu/get_address.c449
-rw-r--r--arch/i386/math-emu/load_store.c270
-rw-r--r--arch/i386/math-emu/mul_Xsig.S176
-rw-r--r--arch/i386/math-emu/poly.h121
-rw-r--r--arch/i386/math-emu/poly_2xm1.c156
-rw-r--r--arch/i386/math-emu/poly_atan.c229
-rw-r--r--arch/i386/math-emu/poly_l2.c272
-rw-r--r--arch/i386/math-emu/poly_sin.c397
-rw-r--r--arch/i386/math-emu/poly_tan.c222
-rw-r--r--arch/i386/math-emu/polynom_Xsig.S135
-rw-r--r--arch/i386/math-emu/reg_add_sub.c374
-rw-r--r--arch/i386/math-emu/reg_compare.c381
-rw-r--r--arch/i386/math-emu/reg_constant.c120
-rw-r--r--arch/i386/math-emu/reg_constant.h25
-rw-r--r--arch/i386/math-emu/reg_convert.c53
-rw-r--r--arch/i386/math-emu/reg_divide.c207
-rw-r--r--arch/i386/math-emu/reg_ld_str.c1370
-rw-r--r--arch/i386/math-emu/reg_mul.c132
-rw-r--r--arch/i386/math-emu/reg_norm.S147
-rw-r--r--arch/i386/math-emu/reg_round.S708
-rw-r--r--arch/i386/math-emu/reg_u_add.S167
-rw-r--r--arch/i386/math-emu/reg_u_div.S471
-rw-r--r--arch/i386/math-emu/reg_u_mul.S148
-rw-r--r--arch/i386/math-emu/reg_u_sub.S272
-rw-r--r--arch/i386/math-emu/round_Xsig.S141
-rw-r--r--arch/i386/math-emu/shr_Xsig.S87
-rw-r--r--arch/i386/math-emu/status_w.h65
-rw-r--r--arch/i386/math-emu/version.h12
-rw-r--r--arch/i386/math-emu/wm_shrx.S204
-rw-r--r--arch/i386/math-emu/wm_sqrt.S470
47 files changed, 13418 insertions, 0 deletions
diff --git a/arch/i386/math-emu/Makefile b/arch/i386/math-emu/Makefile
new file mode 100644
index 000000000000..9c943fa6ce6b
--- /dev/null
+++ b/arch/i386/math-emu/Makefile
@@ -0,0 +1,30 @@
1#
2# Makefile for wm-FPU-emu
3#
4
5#DEBUG = -DDEBUGGING
6DEBUG =
7PARANOID = -DPARANOID
8CFLAGS := $(CFLAGS) $(PARANOID) $(DEBUG) -fno-builtin $(MATH_EMULATION)
9
10EXTRA_AFLAGS := $(PARANOID)
11
12# From 'C' language sources:
13C_OBJS =fpu_entry.o errors.o \
14 fpu_arith.o fpu_aux.o fpu_etc.o fpu_tags.o fpu_trig.o \
15 load_store.o get_address.o \
16 poly_atan.o poly_l2.o poly_2xm1.o poly_sin.o poly_tan.o \
17 reg_add_sub.o reg_compare.o reg_constant.o reg_convert.o \
18 reg_ld_str.o reg_divide.o reg_mul.o
19
20# From 80x86 assembler sources:
21A_OBJS =reg_u_add.o reg_u_div.o reg_u_mul.o reg_u_sub.o \
22 div_small.o reg_norm.o reg_round.o \
23 wm_shrx.o wm_sqrt.o \
24 div_Xsig.o polynom_Xsig.o round_Xsig.o \
25 shr_Xsig.o mul_Xsig.o
26
27obj-y =$(C_OBJS) $(A_OBJS)
28
29proto:
30 cproto -e -DMAKING_PROTO *.c >fpu_proto.h
diff --git a/arch/i386/math-emu/README b/arch/i386/math-emu/README
new file mode 100644
index 000000000000..e6235491d6eb
--- /dev/null
+++ b/arch/i386/math-emu/README
@@ -0,0 +1,427 @@
1 +---------------------------------------------------------------------------+
2 | wm-FPU-emu an FPU emulator for 80386 and 80486SX microprocessors. |
3 | |
4 | Copyright (C) 1992,1993,1994,1995,1996,1997,1999 |
5 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
6 | Australia. E-mail billm@melbpc.org.au |
7 | |
8 | This program is free software; you can redistribute it and/or modify |
9 | it under the terms of the GNU General Public License version 2 as |
10 | published by the Free Software Foundation. |
11 | |
12 | This program is distributed in the hope that it will be useful, |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | GNU General Public License for more details. |
16 | |
17 | You should have received a copy of the GNU General Public License |
18 | along with this program; if not, write to the Free Software |
19 | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
20 | |
21 +---------------------------------------------------------------------------+
22
23
24
25wm-FPU-emu is an FPU emulator for Linux. It is derived from wm-emu387
26which was my 80387 emulator for early versions of djgpp (gcc under
27msdos); wm-emu387 was in turn based upon emu387 which was written by
28DJ Delorie for djgpp. The interface to the Linux kernel is based upon
29the original Linux math emulator by Linus Torvalds.
30
31My target FPU for wm-FPU-emu is that described in the Intel486
32Programmer's Reference Manual (1992 edition). Unfortunately, numerous
33facets of the functioning of the FPU are not well covered in the
34Reference Manual. The information in the manual has been supplemented
35with measurements on real 80486's. Unfortunately, it is simply not
36possible to be sure that all of the peculiarities of the 80486 have
37been discovered, so there is always likely to be obscure differences
38in the detailed behaviour of the emulator and a real 80486.
39
40wm-FPU-emu does not implement all of the behaviour of the 80486 FPU,
41but is very close. See "Limitations" later in this file for a list of
42some differences.
43
44Please report bugs, etc to me at:
45 billm@melbpc.org.au
46or b.metzenthen@medoto.unimelb.edu.au
47
48For more information on the emulator and on floating point topics, see
49my web pages, currently at http://www.suburbia.net/~billm/
50
51
52--Bill Metzenthen
53 December 1999
54
55
56----------------------- Internals of wm-FPU-emu -----------------------
57
58Numeric algorithms:
59(1) Add, subtract, and multiply. Nothing remarkable in these.
60(2) Divide has been tuned to get reasonable performance. The algorithm
61 is not the obvious one which most people seem to use, but is designed
62 to take advantage of the characteristics of the 80386. I expect that
63 it has been invented many times before I discovered it, but I have not
64 seen it. It is based upon one of those ideas which one carries around
65 for years without ever bothering to check it out.
66(3) The sqrt function has been tuned to get good performance. It is based
67 upon Newton's classic method. Performance was improved by capitalizing
68 upon the properties of Newton's method, and the code is once again
69 structured taking account of the 80386 characteristics.
70(4) The trig, log, and exp functions are based in each case upon quasi-
71 "optimal" polynomial approximations. My definition of "optimal" was
72 based upon getting good accuracy with reasonable speed.
73(5) The argument reducing code for the trig function effectively uses
74 a value of pi which is accurate to more than 128 bits. As a consequence,
75 the reduced argument is accurate to more than 64 bits for arguments up
76 to a few pi, and accurate to more than 64 bits for most arguments,
77 even for arguments approaching 2^63. This is far superior to an
78 80486, which uses a value of pi which is accurate to 66 bits.
79
80The code of the emulator is complicated slightly by the need to
81account for a limited form of re-entrancy. Normally, the emulator will
82emulate each FPU instruction to completion without interruption.
83However, it may happen that when the emulator is accessing the user
84memory space, swapping may be needed. In this case the emulator may be
85temporarily suspended while disk i/o takes place. During this time
86another process may use the emulator, thereby perhaps changing static
87variables. The code which accesses user memory is confined to five
88files:
89 fpu_entry.c
90 reg_ld_str.c
91 load_store.c
92 get_address.c
93 errors.c
94As from version 1.12 of the emulator, no static variables are used
95(apart from those in the kernel's per-process tables). The emulator is
96therefore now fully re-entrant, rather than having just the restricted
97form of re-entrancy which is required by the Linux kernel.
98
99----------------------- Limitations of wm-FPU-emu -----------------------
100
101There are a number of differences between the current wm-FPU-emu
102(version 2.01) and the 80486 FPU (apart from bugs). The differences
103are fewer than those which applied to the 1.xx series of the emulator.
104Some of the more important differences are listed below:
105
106The Roundup flag does not have much meaning for the transcendental
107functions and its 80486 value with these functions is likely to differ
108from its emulator value.
109
110In a few rare cases the Underflow flag obtained with the emulator will
111be different from that obtained with an 80486. This occurs when the
112following conditions apply simultaneously:
113(a) the operands have a higher precision than the current setting of the
114 precision control (PC) flags.
115(b) the underflow exception is masked.
116(c) the magnitude of the exact result (before rounding) is less than 2^-16382.
117(d) the magnitude of the final result (after rounding) is exactly 2^-16382.
118(e) the magnitude of the exact result would be exactly 2^-16382 if the
119 operands were rounded to the current precision before the arithmetic
120 operation was performed.
121If all of these apply, the emulator will set the Underflow flag but a real
12280486 will not.
123
124NOTE: Certain formats of Extended Real are UNSUPPORTED. They are
125unsupported by the 80486. They are the Pseudo-NaNs, Pseudoinfinities,
126and Unnormals. None of these will be generated by an 80486 or by the
127emulator. Do not use them. The emulator treats them differently in
128detail from the way an 80486 does.
129
130Self modifying code can cause the emulator to fail. An example of such
131code is:
132 movl %esp,[%ebx]
133 fld1
134The FPU instruction may be (usually will be) loaded into the pre-fetch
135queue of the CPU before the mov instruction is executed. If the
136destination of the 'movl' overlaps the FPU instruction then the bytes
137in the prefetch queue and memory will be inconsistent when the FPU
138instruction is executed. The emulator will be invoked but will not be
139able to find the instruction which caused the device-not-present
140exception. For this case, the emulator cannot emulate the behaviour of
141an 80486DX.
142
143Handling of the address size override prefix byte (0x67) has not been
144extensively tested yet. A major problem exists because using it in
145vm86 mode can cause a general protection fault. Address offsets
146greater than 0xffff appear to be illegal in vm86 mode but are quite
147acceptable (and work) in real mode. A small test program developed to
148check the addressing, and which runs successfully in real mode,
149crashes dosemu under Linux and also brings Windows down with a general
150protection fault message when run under the MS-DOS prompt of Windows
1513.1. (The program simply reads data from a valid address).
152
153The emulator supports 16-bit protected mode, with one difference from
154an 80486DX. A 80486DX will allow some floating point instructions to
155write a few bytes below the lowest address of the stack. The emulator
156will not allow this in 16-bit protected mode: no instructions are
157allowed to write outside the bounds set by the protection.
158
159----------------------- Performance of wm-FPU-emu -----------------------
160
161Speed.
162-----
163
164The speed of floating point computation with the emulator will depend
165upon instruction mix. Relative performance is best for the instructions
166which require most computation. The simple instructions are adversely
167affected by the FPU instruction trap overhead.
168
169
170Timing: Some simple timing tests have been made on the emulator functions.
171The times include load/store instructions. All times are in microseconds
172measured on a 33MHz 386 with 64k cache. The Turbo C tests were under
173ms-dos, the next two columns are for emulators running with the djgpp
174ms-dos extender. The final column is for wm-FPU-emu in Linux 0.97,
175using libm4.0 (hard).
176
177function Turbo C djgpp 1.06 WM-emu387 wm-FPU-emu
178
179 + 60.5 154.8 76.5 139.4
180 - 61.1-65.5 157.3-160.8 76.2-79.5 142.9-144.7
181 * 71.0 190.8 79.6 146.6
182 / 61.2-75.0 261.4-266.9 75.3-91.6 142.2-158.1
183
184 sin() 310.8 4692.0 319.0 398.5
185 cos() 284.4 4855.2 308.0 388.7
186 tan() 495.0 8807.1 394.9 504.7
187 atan() 328.9 4866.4 601.1 419.5-491.9
188
189 sqrt() 128.7 crashed 145.2 227.0
190 log() 413.1-419.1 5103.4-5354.21 254.7-282.2 409.4-437.1
191 exp() 479.1 6619.2 469.1 850.8
192
193
194The performance under Linux is improved by the use of look-ahead code.
195The following results show the improvement which is obtained under
196Linux due to the look-ahead code. Also given are the times for the
197original Linux emulator with the 4.1 'soft' lib.
198
199 [ Linus' note: I changed look-ahead to be the default under linux, as
200 there was no reason not to use it after I had edited it to be
201 disabled during tracing ]
202
203 wm-FPU-emu w original w
204 look-ahead 'soft' lib
205 + 106.4 190.2
206 - 108.6-111.6 192.4-216.2
207 * 113.4 193.1
208 / 108.8-124.4 700.1-706.2
209
210 sin() 390.5 2642.0
211 cos() 381.5 2767.4
212 tan() 496.5 3153.3
213 atan() 367.2-435.5 2439.4-3396.8
214
215 sqrt() 195.1 4732.5
216 log() 358.0-387.5 3359.2-3390.3
217 exp() 619.3 4046.4
218
219
220These figures are now somewhat out-of-date. The emulator has become
221progressively slower for most functions as more of the 80486 features
222have been implemented.
223
224
225----------------------- Accuracy of wm-FPU-emu -----------------------
226
227
228The accuracy of the emulator is in almost all cases equal to or better
229than that of an Intel 80486 FPU.
230
231The results of the basic arithmetic functions (+,-,*,/), and fsqrt
232match those of an 80486 FPU. They are the best possible; the error for
233these never exceeds 1/2 an lsb. The fprem and fprem1 instructions
234return exact results; they have no error.
235
236
237The following table compares the emulator accuracy for the sqrt(),
238trig and log functions against the Turbo C "emulator". For this table,
239each function was tested at about 400 points. Ideal worst-case results
240would be 64 bits. The reduced Turbo C accuracy of cos() and tan() for
241arguments greater than pi/4 can be thought of as being related to the
242precision of the argument x; e.g. an argument of pi/2-(1e-10) which is
243accurate to 64 bits can result in a relative accuracy in cos() of
244about 64 + log2(cos(x)) = 31 bits.
245
246
247Function Tested x range Worst result Turbo C
248 (relative bits)
249
250sqrt(x) 1 .. 2 64.1 63.2
251atan(x) 1e-10 .. 200 64.2 62.8
252cos(x) 0 .. pi/2-(1e-10) 64.4 (x <= pi/4) 62.4
253 64.1 (x = pi/2-(1e-10)) 31.9
254sin(x) 1e-10 .. pi/2 64.0 62.8
255tan(x) 1e-10 .. pi/2-(1e-10) 64.0 (x <= pi/4) 62.1
256 64.1 (x = pi/2-(1e-10)) 31.9
257exp(x) 0 .. 1 63.1 ** 62.9
258log(x) 1+1e-6 .. 2 63.8 ** 62.1
259
260** The accuracy for exp() and log() is low because the FPU (emulator)
261does not compute them directly; two operations are required.
262
263
264The emulator passes the "paranoia" tests (compiled with gcc 2.3.3 or
265later) for 'float' variables (24 bit precision numbers) when precision
266control is set to 24, 53 or 64 bits, and for 'double' variables (53
267bit precision numbers) when precision control is set to 53 bits (a
268properly performing FPU cannot pass the 'paranoia' tests for 'double'
269variables when precision control is set to 64 bits).
270
271The code for reducing the argument for the trig functions (fsin, fcos,
272fptan and fsincos) has been improved and now effectively uses a value
273for pi which is accurate to more than 128 bits precision. As a
274consequence, the accuracy of these functions for large arguments has
275been dramatically improved (and is now very much better than an 80486
276FPU). There is also now no degradation of accuracy for fcos and fptan
277for operands close to pi/2. Measured results are (note that the
278definition of accuracy has changed slightly from that used for the
279above table):
280
281Function Tested x range Worst result
282 (absolute bits)
283
284cos(x) 0 .. 9.22e+18 62.0
285sin(x) 1e-16 .. 9.22e+18 62.1
286tan(x) 1e-16 .. 9.22e+18 61.8
287
288It is possible with some effort to find very large arguments which
289give much degraded precision. For example, the integer number
290 8227740058411162616.0
291is within about 10e-7 of a multiple of pi. To find the tan (for
292example) of this number to 64 bits precision it would be necessary to
293have a value of pi which had about 150 bits precision. The FPU
294emulator computes the result to about 42.6 bits precision (the correct
295result is about -9.739715e-8). On the other hand, an 80486 FPU returns
2960.01059, which in relative terms is hopelessly inaccurate.
297
298For arguments close to critical angles (which occur at multiples of
299pi/2) the emulator is more accurate than an 80486 FPU. For very large
300arguments, the emulator is far more accurate.
301
302
303Prior to version 1.20 of the emulator, the accuracy of the results for
304the transcendental functions (in their principal range) was not as
305good as the results from an 80486 FPU. From version 1.20, the accuracy
306has been considerably improved and these functions now give measured
307worst-case results which are better than the worst-case results given
308by an 80486 FPU.
309
310The following table gives the measured results for the emulator. The
311number of randomly selected arguments in each case is about half a
312million. The group of three columns gives the frequency of the given
313accuracy in number of times per million, thus the second of these
314columns shows that an accuracy of between 63.80 and 63.89 bits was
315found at a rate of 133 times per one million measurements for fsin.
316The results show that the fsin, fcos and fptan instructions return
317results which are in error (i.e. less accurate than the best possible
318result (which is 64 bits)) for about one per cent of all arguments
319between -pi/2 and +pi/2. The other instructions have a lower
320frequency of results which are in error. The last two columns give
321the worst accuracy which was found (in bits) and the approximate value
322of the argument which produced it.
323
324 frequency (per M)
325 ------------------- ---------------
326instr arg range # tests 63.7 63.8 63.9 worst at arg
327 bits bits bits bits
328----- ------------ ------- ---- ---- ----- ----- --------
329fsin (0,pi/2) 547756 0 133 10673 63.89 0.451317
330fcos (0,pi/2) 547563 0 126 10532 63.85 0.700801
331fptan (0,pi/2) 536274 11 267 10059 63.74 0.784876
332fpatan 4 quadrants 517087 0 8 1855 63.88 0.435121 (4q)
333fyl2x (0,20) 541861 0 0 1323 63.94 1.40923 (x)
334fyl2xp1 (-.293,.414) 520256 0 0 5678 63.93 0.408542 (x)
335f2xm1 (-1,1) 538847 4 481 6488 63.79 0.167709
336
337
338Tests performed on an 80486 FPU showed results of lower accuracy. The
339following table gives the results which were obtained with an AMD
340486DX2/66 (other tests indicate that an Intel 486DX produces
341identical results). The tests were basically the same as those used
342to measure the emulator (the values, being random, were in general not
343the same). The total number of tests for each instruction are given
344at the end of the table, in case each about 100k tests were performed.
345Another line of figures at the end of the table shows that most of the
346instructions return results which are in error for more than 10
347percent of the arguments tested.
348
349The numbers in the body of the table give the approx number of times a
350result of the given accuracy in bits (given in the left-most column)
351was obtained per one million arguments. For three of the instructions,
352two columns of results are given: * The second column for f2xm1 gives
353the number cases where the results of the first column were for a
354positive argument, this shows that this instruction gives better
355results for positive arguments than it does for negative. * In the
356cases of fcos and fptan, the first column gives the results when all
357cases where arguments greater than 1.5 were removed from the results
358given in the second column. Unlike the emulator, an 80486 FPU returns
359results of relatively poor accuracy for these instructions when the
360argument approaches pi/2. The table does not show those cases when the
361accuracy of the results were less than 62 bits, which occurs quite
362often for fsin and fptan when the argument approaches pi/2. This poor
363accuracy is discussed above in relation to the Turbo C "emulator", and
364the accuracy of the value of pi.
365
366
367bits f2xm1 f2xm1 fpatan fcos fcos fyl2x fyl2xp1 fsin fptan fptan
36862.0 0 0 0 0 437 0 0 0 0 925
36962.1 0 0 10 0 894 0 0 0 0 1023
37062.2 14 0 0 0 1033 0 0 0 0 945
37162.3 57 0 0 0 1202 0 0 0 0 1023
37262.4 385 0 0 10 1292 0 23 0 0 1178
37362.5 1140 0 0 119 1649 0 39 0 0 1149
37462.6 2037 0 0 189 1620 0 16 0 0 1169
37562.7 5086 14 0 646 2315 10 101 35 39 1402
37662.8 8818 86 0 984 3050 59 287 131 224 2036
37762.9 11340 1355 0 2126 4153 79 605 357 321 1948
37863.0 15557 4750 0 3319 5376 246 1281 862 808 2688
37963.1 20016 8288 0 4620 6628 511 2569 1723 1510 3302
38063.2 24945 11127 10 6588 8098 1120 4470 2968 2990 4724
38163.3 25686 12382 69 8774 10682 1906 6775 4482 5474 7236
38263.4 29219 14722 79 11109 12311 3094 9414 7259 8912 10587
38363.5 30458 14936 393 13802 15014 5874 12666 9609 13762 15262
38463.6 32439 16448 1277 17945 19028 10226 15537 14657 19158 20346
38563.7 35031 16805 4067 23003 23947 18910 20116 21333 25001 26209
38663.8 33251 15820 7673 24781 25675 24617 25354 24440 29433 30329
38763.9 33293 16833 18529 28318 29233 31267 31470 27748 29676 30601
388
389Per cent with error:
390 30.9 3.2 18.5 9.8 13.1 11.6 17.4
391Total arguments tested:
392 70194 70099 101784 100641 100641 101799 128853 114893 102675 102675
393
394
395------------------------- Contributors -------------------------------
396
397A number of people have contributed to the development of the
398emulator, often by just reporting bugs, sometimes with suggested
399fixes, and a few kind people have provided me with access in one way
400or another to an 80486 machine. Contributors include (to those people
401who I may have forgotten, please forgive me):
402
403Linus Torvalds
404Tommy.Thorn@daimi.aau.dk
405Andrew.Tridgell@anu.edu.au
406Nick Holloway, alfie@dcs.warwick.ac.uk
407Hermano Moura, moura@dcs.gla.ac.uk
408Jon Jagger, J.Jagger@scp.ac.uk
409Lennart Benschop
410Brian Gallew, geek+@CMU.EDU
411Thomas Staniszewski, ts3v+@andrew.cmu.edu
412Martin Howell, mph@plasma.apana.org.au
413M Saggaf, alsaggaf@athena.mit.edu
414Peter Barker, PETER@socpsy.sci.fau.edu
415tom@vlsivie.tuwien.ac.at
416Dan Russel, russed@rpi.edu
417Daniel Carosone, danielce@ee.mu.oz.au
418cae@jpmorgan.com
419Hamish Coleman, t933093@minyos.xx.rmit.oz.au
420Bruce Evans, bde@kralizec.zeta.org.au
421Timo Korvola, Timo.Korvola@hut.fi
422Rick Lyons, rick@razorback.brisnet.org.au
423Rick, jrs@world.std.com
424
425...and numerous others who responded to my request for help with
426a real 80486.
427
diff --git a/arch/i386/math-emu/control_w.h b/arch/i386/math-emu/control_w.h
new file mode 100644
index 000000000000..ae2274dbd305
--- /dev/null
+++ b/arch/i386/math-emu/control_w.h
@@ -0,0 +1,45 @@
1/*---------------------------------------------------------------------------+
2 | control_w.h |
3 | |
4 | Copyright (C) 1992,1993 |
5 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
6 | Australia. E-mail billm@vaxc.cc.monash.edu.au |
7 | |
8 +---------------------------------------------------------------------------*/
9
10#ifndef _CONTROLW_H_
11#define _CONTROLW_H_
12
13#ifdef __ASSEMBLY__
14#define _Const_(x) $##x
15#else
16#define _Const_(x) x
17#endif
18
19#define CW_RC _Const_(0x0C00) /* rounding control */
20#define CW_PC _Const_(0x0300) /* precision control */
21
22#define CW_Precision Const_(0x0020) /* loss of precision mask */
23#define CW_Underflow Const_(0x0010) /* underflow mask */
24#define CW_Overflow Const_(0x0008) /* overflow mask */
25#define CW_ZeroDiv Const_(0x0004) /* divide by zero mask */
26#define CW_Denormal Const_(0x0002) /* denormalized operand mask */
27#define CW_Invalid Const_(0x0001) /* invalid operation mask */
28
29#define CW_Exceptions _Const_(0x003f) /* all masks */
30
31#define RC_RND _Const_(0x0000)
32#define RC_DOWN _Const_(0x0400)
33#define RC_UP _Const_(0x0800)
34#define RC_CHOP _Const_(0x0C00)
35
36/* p 15-5: Precision control bits affect only the following:
37 ADD, SUB(R), MUL, DIV(R), and SQRT */
38#define PR_24_BITS _Const_(0x000)
39#define PR_53_BITS _Const_(0x200)
40#define PR_64_BITS _Const_(0x300)
41#define PR_RESERVED_BITS _Const_(0x100)
42/* FULL_PRECISION simulates all exceptions masked */
43#define FULL_PRECISION (PR_64_BITS | RC_RND | 0x3f)
44
45#endif /* _CONTROLW_H_ */
diff --git a/arch/i386/math-emu/div_Xsig.S b/arch/i386/math-emu/div_Xsig.S
new file mode 100644
index 000000000000..f77ba3058b31
--- /dev/null
+++ b/arch/i386/math-emu/div_Xsig.S
@@ -0,0 +1,365 @@
1 .file "div_Xsig.S"
2/*---------------------------------------------------------------------------+
3 | div_Xsig.S |
4 | |
5 | Division subroutine for 96 bit quantities |
6 | |
7 | Copyright (C) 1994,1995 |
8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
9 | Australia. E-mail billm@jacobi.maths.monash.edu.au |
10 | |
11 | |
12 +---------------------------------------------------------------------------*/
13
14/*---------------------------------------------------------------------------+
15 | Divide the 96 bit quantity pointed to by a, by that pointed to by b, and |
16 | put the 96 bit result at the location d. |
17 | |
18 | The result may not be accurate to 96 bits. It is intended for use where |
19 | a result better than 64 bits is required. The result should usually be |
20 | good to at least 94 bits. |
21 | The returned result is actually divided by one half. This is done to |
22 | prevent overflow. |
23 | |
24 | .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb -> .dddddddddddd |
25 | |
26 | void div_Xsig(Xsig *a, Xsig *b, Xsig *dest) |
27 | |
28 +---------------------------------------------------------------------------*/
29
30#include "exception.h"
31#include "fpu_emu.h"
32
33
34#define XsigLL(x) (x)
35#define XsigL(x) 4(x)
36#define XsigH(x) 8(x)
37
38
39#ifndef NON_REENTRANT_FPU
40/*
41 Local storage on the stack:
42 Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
43 */
44#define FPU_accum_3 -4(%ebp)
45#define FPU_accum_2 -8(%ebp)
46#define FPU_accum_1 -12(%ebp)
47#define FPU_accum_0 -16(%ebp)
48#define FPU_result_3 -20(%ebp)
49#define FPU_result_2 -24(%ebp)
50#define FPU_result_1 -28(%ebp)
51
52#else
53.data
54/*
55 Local storage in a static area:
56 Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
57 */
58 .align 4,0
59FPU_accum_3:
60 .long 0
61FPU_accum_2:
62 .long 0
63FPU_accum_1:
64 .long 0
65FPU_accum_0:
66 .long 0
67FPU_result_3:
68 .long 0
69FPU_result_2:
70 .long 0
71FPU_result_1:
72 .long 0
73#endif /* NON_REENTRANT_FPU */
74
75
76.text
77ENTRY(div_Xsig)
78 pushl %ebp
79 movl %esp,%ebp
80#ifndef NON_REENTRANT_FPU
81 subl $28,%esp
82#endif /* NON_REENTRANT_FPU */
83
84 pushl %esi
85 pushl %edi
86 pushl %ebx
87
88 movl PARAM1,%esi /* pointer to num */
89 movl PARAM2,%ebx /* pointer to denom */
90
91#ifdef PARANOID
92 testl $0x80000000, XsigH(%ebx) /* Divisor */
93 je L_bugged
94#endif /* PARANOID */
95
96
97/*---------------------------------------------------------------------------+
98 | Divide: Return arg1/arg2 to arg3. |
99 | |
100 | The maximum returned value is (ignoring exponents) |
101 | .ffffffff ffffffff |
102 | ------------------ = 1.ffffffff fffffffe |
103 | .80000000 00000000 |
104 | and the minimum is |
105 | .80000000 00000000 |
106 | ------------------ = .80000000 00000001 (rounded) |
107 | .ffffffff ffffffff |
108 | |
109 +---------------------------------------------------------------------------*/
110
111 /* Save extended dividend in local register */
112
113 /* Divide by 2 to prevent overflow */
114 clc
115 movl XsigH(%esi),%eax
116 rcrl %eax
117 movl %eax,FPU_accum_3
118 movl XsigL(%esi),%eax
119 rcrl %eax
120 movl %eax,FPU_accum_2
121 movl XsigLL(%esi),%eax
122 rcrl %eax
123 movl %eax,FPU_accum_1
124 movl $0,%eax
125 rcrl %eax
126 movl %eax,FPU_accum_0
127
128 movl FPU_accum_2,%eax /* Get the current num */
129 movl FPU_accum_3,%edx
130
131/*----------------------------------------------------------------------*/
132/* Initialization done.
133 Do the first 32 bits. */
134
135 /* We will divide by a number which is too large */
136 movl XsigH(%ebx),%ecx
137 addl $1,%ecx
138 jnc LFirst_div_not_1
139
140 /* here we need to divide by 100000000h,
141 i.e., no division at all.. */
142 mov %edx,%eax
143 jmp LFirst_div_done
144
145LFirst_div_not_1:
146 divl %ecx /* Divide the numerator by the augmented
147 denom ms dw */
148
149LFirst_div_done:
150 movl %eax,FPU_result_3 /* Put the result in the answer */
151
152 mull XsigH(%ebx) /* mul by the ms dw of the denom */
153
154 subl %eax,FPU_accum_2 /* Subtract from the num local reg */
155 sbbl %edx,FPU_accum_3
156
157 movl FPU_result_3,%eax /* Get the result back */
158 mull XsigL(%ebx) /* now mul the ls dw of the denom */
159
160 subl %eax,FPU_accum_1 /* Subtract from the num local reg */
161 sbbl %edx,FPU_accum_2
162 sbbl $0,FPU_accum_3
163 je LDo_2nd_32_bits /* Must check for non-zero result here */
164
165#ifdef PARANOID
166 jb L_bugged_1
167#endif /* PARANOID */
168
169 /* need to subtract another once of the denom */
170 incl FPU_result_3 /* Correct the answer */
171
172 movl XsigL(%ebx),%eax
173 movl XsigH(%ebx),%edx
174 subl %eax,FPU_accum_1 /* Subtract from the num local reg */
175 sbbl %edx,FPU_accum_2
176
177#ifdef PARANOID
178 sbbl $0,FPU_accum_3
179 jne L_bugged_1 /* Must check for non-zero result here */
180#endif /* PARANOID */
181
182/*----------------------------------------------------------------------*/
183/* Half of the main problem is done, there is just a reduced numerator
184 to handle now.
185 Work with the second 32 bits, FPU_accum_0 not used from now on */
186LDo_2nd_32_bits:
187 movl FPU_accum_2,%edx /* get the reduced num */
188 movl FPU_accum_1,%eax
189
190 /* need to check for possible subsequent overflow */
191 cmpl XsigH(%ebx),%edx
192 jb LDo_2nd_div
193 ja LPrevent_2nd_overflow
194
195 cmpl XsigL(%ebx),%eax
196 jb LDo_2nd_div
197
198LPrevent_2nd_overflow:
199/* The numerator is greater or equal, would cause overflow */
200 /* prevent overflow */
201 subl XsigL(%ebx),%eax
202 sbbl XsigH(%ebx),%edx
203 movl %edx,FPU_accum_2
204 movl %eax,FPU_accum_1
205
206 incl FPU_result_3 /* Reflect the subtraction in the answer */
207
208#ifdef PARANOID
209 je L_bugged_2 /* Can't bump the result to 1.0 */
210#endif /* PARANOID */
211
212LDo_2nd_div:
213 cmpl $0,%ecx /* augmented denom msw */
214 jnz LSecond_div_not_1
215
216 /* %ecx == 0, we are dividing by 1.0 */
217 mov %edx,%eax
218 jmp LSecond_div_done
219
220LSecond_div_not_1:
221 divl %ecx /* Divide the numerator by the denom ms dw */
222
223LSecond_div_done:
224 movl %eax,FPU_result_2 /* Put the result in the answer */
225
226 mull XsigH(%ebx) /* mul by the ms dw of the denom */
227
228 subl %eax,FPU_accum_1 /* Subtract from the num local reg */
229 sbbl %edx,FPU_accum_2
230
231#ifdef PARANOID
232 jc L_bugged_2
233#endif /* PARANOID */
234
235 movl FPU_result_2,%eax /* Get the result back */
236 mull XsigL(%ebx) /* now mul the ls dw of the denom */
237
238 subl %eax,FPU_accum_0 /* Subtract from the num local reg */
239 sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */
240 sbbl $0,FPU_accum_2
241
242#ifdef PARANOID
243 jc L_bugged_2
244#endif /* PARANOID */
245
246 jz LDo_3rd_32_bits
247
248#ifdef PARANOID
249 cmpl $1,FPU_accum_2
250 jne L_bugged_2
251#endif /* PARANOID */
252
253 /* need to subtract another once of the denom */
254 movl XsigL(%ebx),%eax
255 movl XsigH(%ebx),%edx
256 subl %eax,FPU_accum_0 /* Subtract from the num local reg */
257 sbbl %edx,FPU_accum_1
258 sbbl $0,FPU_accum_2
259
260#ifdef PARANOID
261 jc L_bugged_2
262 jne L_bugged_2
263#endif /* PARANOID */
264
265 addl $1,FPU_result_2 /* Correct the answer */
266 adcl $0,FPU_result_3
267
268#ifdef PARANOID
269 jc L_bugged_2 /* Must check for non-zero result here */
270#endif /* PARANOID */
271
272/*----------------------------------------------------------------------*/
273/* The division is essentially finished here, we just need to perform
274 tidying operations.
275 Deal with the 3rd 32 bits */
276LDo_3rd_32_bits:
277 /* We use an approximation for the third 32 bits.
278 To take account of the 3rd 32 bits of the divisor
279 (call them del), we subtract del * (a/b) */
280
281 movl FPU_result_3,%eax /* a/b */
282 mull XsigLL(%ebx) /* del */
283
284 subl %edx,FPU_accum_1
285
286 /* A borrow indicates that the result is negative */
287 jnb LTest_over
288
289 movl XsigH(%ebx),%edx
290 addl %edx,FPU_accum_1
291
292 subl $1,FPU_result_2 /* Adjust the answer */
293 sbbl $0,FPU_result_3
294
295 /* The above addition might not have been enough, check again. */
296 movl FPU_accum_1,%edx /* get the reduced num */
297 cmpl XsigH(%ebx),%edx /* denom */
298 jb LDo_3rd_div
299
300 movl XsigH(%ebx),%edx
301 addl %edx,FPU_accum_1
302
303 subl $1,FPU_result_2 /* Adjust the answer */
304 sbbl $0,FPU_result_3
305 jmp LDo_3rd_div
306
307LTest_over:
308 movl FPU_accum_1,%edx /* get the reduced num */
309
310 /* need to check for possible subsequent overflow */
311 cmpl XsigH(%ebx),%edx /* denom */
312 jb LDo_3rd_div
313
314 /* prevent overflow */
315 subl XsigH(%ebx),%edx
316 movl %edx,FPU_accum_1
317
318 addl $1,FPU_result_2 /* Reflect the subtraction in the answer */
319 adcl $0,FPU_result_3
320
321LDo_3rd_div:
322 movl FPU_accum_0,%eax
323 movl FPU_accum_1,%edx
324 divl XsigH(%ebx)
325
326 movl %eax,FPU_result_1 /* Rough estimate of third word */
327
328 movl PARAM3,%esi /* pointer to answer */
329
330 movl FPU_result_1,%eax
331 movl %eax,XsigLL(%esi)
332 movl FPU_result_2,%eax
333 movl %eax,XsigL(%esi)
334 movl FPU_result_3,%eax
335 movl %eax,XsigH(%esi)
336
337L_exit:
338 popl %ebx
339 popl %edi
340 popl %esi
341
342 leave
343 ret
344
345
346#ifdef PARANOID
347/* The logic is wrong if we got here */
348L_bugged:
349 pushl EX_INTERNAL|0x240
350 call EXCEPTION
351 pop %ebx
352 jmp L_exit
353
354L_bugged_1:
355 pushl EX_INTERNAL|0x241
356 call EXCEPTION
357 pop %ebx
358 jmp L_exit
359
360L_bugged_2:
361 pushl EX_INTERNAL|0x242
362 call EXCEPTION
363 pop %ebx
364 jmp L_exit
365#endif /* PARANOID */
diff --git a/arch/i386/math-emu/div_small.S b/arch/i386/math-emu/div_small.S
new file mode 100644
index 000000000000..47099628fa4c
--- /dev/null
+++ b/arch/i386/math-emu/div_small.S
@@ -0,0 +1,47 @@
1 .file "div_small.S"
2/*---------------------------------------------------------------------------+
3 | div_small.S |
4 | |
5 | Divide a 64 bit integer by a 32 bit integer & return remainder. |
6 | |
7 | Copyright (C) 1992,1995 |
8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
9 | Australia. E-mail billm@jacobi.maths.monash.edu.au |
10 | |
11 | |
12 +---------------------------------------------------------------------------*/
13
14/*---------------------------------------------------------------------------+
15 | unsigned long FPU_div_small(unsigned long long *x, unsigned long y) |
16 +---------------------------------------------------------------------------*/
17
18#include "fpu_emu.h"
19
20.text
21ENTRY(FPU_div_small)
22 pushl %ebp
23 movl %esp,%ebp
24
25 pushl %esi
26
27 movl PARAM1,%esi /* pointer to num */
28 movl PARAM2,%ecx /* The denominator */
29
30 movl 4(%esi),%eax /* Get the current num msw */
31 xorl %edx,%edx
32 divl %ecx
33
34 movl %eax,4(%esi)
35
36 movl (%esi),%eax /* Get the num lsw */
37 divl %ecx
38
39 movl %eax,(%esi)
40
41 movl %edx,%eax /* Return the remainder in eax */
42
43 popl %esi
44
45 leave
46 ret
47
diff --git a/arch/i386/math-emu/errors.c b/arch/i386/math-emu/errors.c
new file mode 100644
index 000000000000..a1b0d22f6978
--- /dev/null
+++ b/arch/i386/math-emu/errors.c
@@ -0,0 +1,739 @@
1/*---------------------------------------------------------------------------+
2 | errors.c |
3 | |
4 | The error handling functions for wm-FPU-emu |
5 | |
6 | Copyright (C) 1992,1993,1994,1996 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
8 | E-mail billm@jacobi.maths.monash.edu.au |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13/*---------------------------------------------------------------------------+
14 | Note: |
15 | The file contains code which accesses user memory. |
16 | Emulator static data may change when user memory is accessed, due to |
17 | other processes using the emulator while swapping is in progress. |
18 +---------------------------------------------------------------------------*/
19
20#include <linux/signal.h>
21
22#include <asm/uaccess.h>
23
24#include "fpu_emu.h"
25#include "fpu_system.h"
26#include "exception.h"
27#include "status_w.h"
28#include "control_w.h"
29#include "reg_constant.h"
30#include "version.h"
31
32/* */
33#undef PRINT_MESSAGES
34/* */
35
36
37#if 0
38void Un_impl(void)
39{
40 u_char byte1, FPU_modrm;
41 unsigned long address = FPU_ORIG_EIP;
42
43 RE_ENTRANT_CHECK_OFF;
44 /* No need to check access_ok(), we have previously fetched these bytes. */
45 printk("Unimplemented FPU Opcode at eip=%p : ", (void __user *) address);
46 if ( FPU_CS == __USER_CS )
47 {
48 while ( 1 )
49 {
50 FPU_get_user(byte1, (u_char __user *) address);
51 if ( (byte1 & 0xf8) == 0xd8 ) break;
52 printk("[%02x]", byte1);
53 address++;
54 }
55 printk("%02x ", byte1);
56 FPU_get_user(FPU_modrm, 1 + (u_char __user *) address);
57
58 if (FPU_modrm >= 0300)
59 printk("%02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7);
60 else
61 printk("/%d\n", (FPU_modrm >> 3) & 7);
62 }
63 else
64 {
65 printk("cs selector = %04x\n", FPU_CS);
66 }
67
68 RE_ENTRANT_CHECK_ON;
69
70 EXCEPTION(EX_Invalid);
71
72}
73#endif /* 0 */
74
75
76/*
77 Called for opcodes which are illegal and which are known to result in a
78 SIGILL with a real 80486.
79 */
80void FPU_illegal(void)
81{
82 math_abort(FPU_info,SIGILL);
83}
84
85
86
87void FPU_printall(void)
88{
89 int i;
90 static const char *tag_desc[] = { "Valid", "Zero", "ERROR", "Empty",
91 "DeNorm", "Inf", "NaN" };
92 u_char byte1, FPU_modrm;
93 unsigned long address = FPU_ORIG_EIP;
94
95 RE_ENTRANT_CHECK_OFF;
96 /* No need to check access_ok(), we have previously fetched these bytes. */
97 printk("At %p:", (void *) address);
98 if ( FPU_CS == __USER_CS )
99 {
100#define MAX_PRINTED_BYTES 20
101 for ( i = 0; i < MAX_PRINTED_BYTES; i++ )
102 {
103 FPU_get_user(byte1, (u_char __user *) address);
104 if ( (byte1 & 0xf8) == 0xd8 )
105 {
106 printk(" %02x", byte1);
107 break;
108 }
109 printk(" [%02x]", byte1);
110 address++;
111 }
112 if ( i == MAX_PRINTED_BYTES )
113 printk(" [more..]\n");
114 else
115 {
116 FPU_get_user(FPU_modrm, 1 + (u_char __user *) address);
117
118 if (FPU_modrm >= 0300)
119 printk(" %02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7);
120 else
121 printk(" /%d, mod=%d rm=%d\n",
122 (FPU_modrm >> 3) & 7, (FPU_modrm >> 6) & 3, FPU_modrm & 7);
123 }
124 }
125 else
126 {
127 printk("%04x\n", FPU_CS);
128 }
129
130 partial_status = status_word();
131
132#ifdef DEBUGGING
133if ( partial_status & SW_Backward ) printk("SW: backward compatibility\n");
134if ( partial_status & SW_C3 ) printk("SW: condition bit 3\n");
135if ( partial_status & SW_C2 ) printk("SW: condition bit 2\n");
136if ( partial_status & SW_C1 ) printk("SW: condition bit 1\n");
137if ( partial_status & SW_C0 ) printk("SW: condition bit 0\n");
138if ( partial_status & SW_Summary ) printk("SW: exception summary\n");
139if ( partial_status & SW_Stack_Fault ) printk("SW: stack fault\n");
140if ( partial_status & SW_Precision ) printk("SW: loss of precision\n");
141if ( partial_status & SW_Underflow ) printk("SW: underflow\n");
142if ( partial_status & SW_Overflow ) printk("SW: overflow\n");
143if ( partial_status & SW_Zero_Div ) printk("SW: divide by zero\n");
144if ( partial_status & SW_Denorm_Op ) printk("SW: denormalized operand\n");
145if ( partial_status & SW_Invalid ) printk("SW: invalid operation\n");
146#endif /* DEBUGGING */
147
148 printk(" SW: b=%d st=%ld es=%d sf=%d cc=%d%d%d%d ef=%d%d%d%d%d%d\n",
149 partial_status & 0x8000 ? 1 : 0, /* busy */
150 (partial_status & 0x3800) >> 11, /* stack top pointer */
151 partial_status & 0x80 ? 1 : 0, /* Error summary status */
152 partial_status & 0x40 ? 1 : 0, /* Stack flag */
153 partial_status & SW_C3?1:0, partial_status & SW_C2?1:0, /* cc */
154 partial_status & SW_C1?1:0, partial_status & SW_C0?1:0, /* cc */
155 partial_status & SW_Precision?1:0, partial_status & SW_Underflow?1:0,
156 partial_status & SW_Overflow?1:0, partial_status & SW_Zero_Div?1:0,
157 partial_status & SW_Denorm_Op?1:0, partial_status & SW_Invalid?1:0);
158
159printk(" CW: ic=%d rc=%ld%ld pc=%ld%ld iem=%d ef=%d%d%d%d%d%d\n",
160 control_word & 0x1000 ? 1 : 0,
161 (control_word & 0x800) >> 11, (control_word & 0x400) >> 10,
162 (control_word & 0x200) >> 9, (control_word & 0x100) >> 8,
163 control_word & 0x80 ? 1 : 0,
164 control_word & SW_Precision?1:0, control_word & SW_Underflow?1:0,
165 control_word & SW_Overflow?1:0, control_word & SW_Zero_Div?1:0,
166 control_word & SW_Denorm_Op?1:0, control_word & SW_Invalid?1:0);
167
168 for ( i = 0; i < 8; i++ )
169 {
170 FPU_REG *r = &st(i);
171 u_char tagi = FPU_gettagi(i);
172 switch (tagi)
173 {
174 case TAG_Empty:
175 continue;
176 break;
177 case TAG_Zero:
178 case TAG_Special:
179 tagi = FPU_Special(r);
180 case TAG_Valid:
181 printk("st(%d) %c .%04lx %04lx %04lx %04lx e%+-6d ", i,
182 getsign(r) ? '-' : '+',
183 (long)(r->sigh >> 16),
184 (long)(r->sigh & 0xFFFF),
185 (long)(r->sigl >> 16),
186 (long)(r->sigl & 0xFFFF),
187 exponent(r) - EXP_BIAS + 1);
188 break;
189 default:
190 printk("Whoops! Error in errors.c: tag%d is %d ", i, tagi);
191 continue;
192 break;
193 }
194 printk("%s\n", tag_desc[(int) (unsigned) tagi]);
195 }
196
197 RE_ENTRANT_CHECK_ON;
198
199}
200
201static struct {
202 int type;
203 const char *name;
204} exception_names[] = {
205 { EX_StackOver, "stack overflow" },
206 { EX_StackUnder, "stack underflow" },
207 { EX_Precision, "loss of precision" },
208 { EX_Underflow, "underflow" },
209 { EX_Overflow, "overflow" },
210 { EX_ZeroDiv, "divide by zero" },
211 { EX_Denormal, "denormalized operand" },
212 { EX_Invalid, "invalid operation" },
213 { EX_INTERNAL, "INTERNAL BUG in "FPU_VERSION },
214 { 0, NULL }
215};
216
217/*
218 EX_INTERNAL is always given with a code which indicates where the
219 error was detected.
220
221 Internal error types:
222 0x14 in fpu_etc.c
223 0x1nn in a *.c file:
224 0x101 in reg_add_sub.c
225 0x102 in reg_mul.c
226 0x104 in poly_atan.c
227 0x105 in reg_mul.c
228 0x107 in fpu_trig.c
229 0x108 in reg_compare.c
230 0x109 in reg_compare.c
231 0x110 in reg_add_sub.c
232 0x111 in fpe_entry.c
233 0x112 in fpu_trig.c
234 0x113 in errors.c
235 0x115 in fpu_trig.c
236 0x116 in fpu_trig.c
237 0x117 in fpu_trig.c
238 0x118 in fpu_trig.c
239 0x119 in fpu_trig.c
240 0x120 in poly_atan.c
241 0x121 in reg_compare.c
242 0x122 in reg_compare.c
243 0x123 in reg_compare.c
244 0x125 in fpu_trig.c
245 0x126 in fpu_entry.c
246 0x127 in poly_2xm1.c
247 0x128 in fpu_entry.c
248 0x129 in fpu_entry.c
249 0x130 in get_address.c
250 0x131 in get_address.c
251 0x132 in get_address.c
252 0x133 in get_address.c
253 0x140 in load_store.c
254 0x141 in load_store.c
255 0x150 in poly_sin.c
256 0x151 in poly_sin.c
257 0x160 in reg_ld_str.c
258 0x161 in reg_ld_str.c
259 0x162 in reg_ld_str.c
260 0x163 in reg_ld_str.c
261 0x164 in reg_ld_str.c
262 0x170 in fpu_tags.c
263 0x171 in fpu_tags.c
264 0x172 in fpu_tags.c
265 0x180 in reg_convert.c
266 0x2nn in an *.S file:
267 0x201 in reg_u_add.S
268 0x202 in reg_u_div.S
269 0x203 in reg_u_div.S
270 0x204 in reg_u_div.S
271 0x205 in reg_u_mul.S
272 0x206 in reg_u_sub.S
273 0x207 in wm_sqrt.S
274 0x208 in reg_div.S
275 0x209 in reg_u_sub.S
276 0x210 in reg_u_sub.S
277 0x211 in reg_u_sub.S
278 0x212 in reg_u_sub.S
279 0x213 in wm_sqrt.S
280 0x214 in wm_sqrt.S
281 0x215 in wm_sqrt.S
282 0x220 in reg_norm.S
283 0x221 in reg_norm.S
284 0x230 in reg_round.S
285 0x231 in reg_round.S
286 0x232 in reg_round.S
287 0x233 in reg_round.S
288 0x234 in reg_round.S
289 0x235 in reg_round.S
290 0x236 in reg_round.S
291 0x240 in div_Xsig.S
292 0x241 in div_Xsig.S
293 0x242 in div_Xsig.S
294 */
295
296asmlinkage void FPU_exception(int n)
297{
298 int i, int_type;
299
300 int_type = 0; /* Needed only to stop compiler warnings */
301 if ( n & EX_INTERNAL )
302 {
303 int_type = n - EX_INTERNAL;
304 n = EX_INTERNAL;
305 /* Set lots of exception bits! */
306 partial_status |= (SW_Exc_Mask | SW_Summary | SW_Backward);
307 }
308 else
309 {
310 /* Extract only the bits which we use to set the status word */
311 n &= (SW_Exc_Mask);
312 /* Set the corresponding exception bit */
313 partial_status |= n;
314 /* Set summary bits iff exception isn't masked */
315 if ( partial_status & ~control_word & CW_Exceptions )
316 partial_status |= (SW_Summary | SW_Backward);
317 if ( n & (SW_Stack_Fault | EX_Precision) )
318 {
319 if ( !(n & SW_C1) )
320 /* This bit distinguishes over- from underflow for a stack fault,
321 and roundup from round-down for precision loss. */
322 partial_status &= ~SW_C1;
323 }
324 }
325
326 RE_ENTRANT_CHECK_OFF;
327 if ( (~control_word & n & CW_Exceptions) || (n == EX_INTERNAL) )
328 {
329#ifdef PRINT_MESSAGES
330 /* My message from the sponsor */
331 printk(FPU_VERSION" "__DATE__" (C) W. Metzenthen.\n");
332#endif /* PRINT_MESSAGES */
333
334 /* Get a name string for error reporting */
335 for (i=0; exception_names[i].type; i++)
336 if ( (exception_names[i].type & n) == exception_names[i].type )
337 break;
338
339 if (exception_names[i].type)
340 {
341#ifdef PRINT_MESSAGES
342 printk("FP Exception: %s!\n", exception_names[i].name);
343#endif /* PRINT_MESSAGES */
344 }
345 else
346 printk("FPU emulator: Unknown Exception: 0x%04x!\n", n);
347
348 if ( n == EX_INTERNAL )
349 {
350 printk("FPU emulator: Internal error type 0x%04x\n", int_type);
351 FPU_printall();
352 }
353#ifdef PRINT_MESSAGES
354 else
355 FPU_printall();
356#endif /* PRINT_MESSAGES */
357
358 /*
359 * The 80486 generates an interrupt on the next non-control FPU
360 * instruction. So we need some means of flagging it.
361 * We use the ES (Error Summary) bit for this.
362 */
363 }
364 RE_ENTRANT_CHECK_ON;
365
366#ifdef __DEBUG__
367 math_abort(FPU_info,SIGFPE);
368#endif /* __DEBUG__ */
369
370}
371
372
373/* Real operation attempted on a NaN. */
374/* Returns < 0 if the exception is unmasked */
375int real_1op_NaN(FPU_REG *a)
376{
377 int signalling, isNaN;
378
379 isNaN = (exponent(a) == EXP_OVER) && (a->sigh & 0x80000000);
380
381 /* The default result for the case of two "equal" NaNs (signs may
382 differ) is chosen to reproduce 80486 behaviour */
383 signalling = isNaN && !(a->sigh & 0x40000000);
384
385 if ( !signalling )
386 {
387 if ( !isNaN ) /* pseudo-NaN, or other unsupported? */
388 {
389 if ( control_word & CW_Invalid )
390 {
391 /* Masked response */
392 reg_copy(&CONST_QNaN, a);
393 }
394 EXCEPTION(EX_Invalid);
395 return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
396 }
397 return TAG_Special;
398 }
399
400 if ( control_word & CW_Invalid )
401 {
402 /* The masked response */
403 if ( !(a->sigh & 0x80000000) ) /* pseudo-NaN ? */
404 {
405 reg_copy(&CONST_QNaN, a);
406 }
407 /* ensure a Quiet NaN */
408 a->sigh |= 0x40000000;
409 }
410
411 EXCEPTION(EX_Invalid);
412
413 return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
414}
415
416
417/* Real operation attempted on two operands, one a NaN. */
418/* Returns < 0 if the exception is unmasked */
419int real_2op_NaN(FPU_REG const *b, u_char tagb,
420 int deststnr,
421 FPU_REG const *defaultNaN)
422{
423 FPU_REG *dest = &st(deststnr);
424 FPU_REG const *a = dest;
425 u_char taga = FPU_gettagi(deststnr);
426 FPU_REG const *x;
427 int signalling, unsupported;
428
429 if ( taga == TAG_Special )
430 taga = FPU_Special(a);
431 if ( tagb == TAG_Special )
432 tagb = FPU_Special(b);
433
434 /* TW_NaN is also used for unsupported data types. */
435 unsupported = ((taga == TW_NaN)
436 && !((exponent(a) == EXP_OVER) && (a->sigh & 0x80000000)))
437 || ((tagb == TW_NaN)
438 && !((exponent(b) == EXP_OVER) && (b->sigh & 0x80000000)));
439 if ( unsupported )
440 {
441 if ( control_word & CW_Invalid )
442 {
443 /* Masked response */
444 FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr);
445 }
446 EXCEPTION(EX_Invalid);
447 return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
448 }
449
450 if (taga == TW_NaN)
451 {
452 x = a;
453 if (tagb == TW_NaN)
454 {
455 signalling = !(a->sigh & b->sigh & 0x40000000);
456 if ( significand(b) > significand(a) )
457 x = b;
458 else if ( significand(b) == significand(a) )
459 {
460 /* The default result for the case of two "equal" NaNs (signs may
461 differ) is chosen to reproduce 80486 behaviour */
462 x = defaultNaN;
463 }
464 }
465 else
466 {
467 /* return the quiet version of the NaN in a */
468 signalling = !(a->sigh & 0x40000000);
469 }
470 }
471 else
472#ifdef PARANOID
473 if (tagb == TW_NaN)
474#endif /* PARANOID */
475 {
476 signalling = !(b->sigh & 0x40000000);
477 x = b;
478 }
479#ifdef PARANOID
480 else
481 {
482 signalling = 0;
483 EXCEPTION(EX_INTERNAL|0x113);
484 x = &CONST_QNaN;
485 }
486#endif /* PARANOID */
487
488 if ( (!signalling) || (control_word & CW_Invalid) )
489 {
490 if ( ! x )
491 x = b;
492
493 if ( !(x->sigh & 0x80000000) ) /* pseudo-NaN ? */
494 x = &CONST_QNaN;
495
496 FPU_copy_to_regi(x, TAG_Special, deststnr);
497
498 if ( !signalling )
499 return TAG_Special;
500
501 /* ensure a Quiet NaN */
502 dest->sigh |= 0x40000000;
503 }
504
505 EXCEPTION(EX_Invalid);
506
507 return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
508}
509
510
511/* Invalid arith operation on Valid registers */
512/* Returns < 0 if the exception is unmasked */
513asmlinkage int arith_invalid(int deststnr)
514{
515
516 EXCEPTION(EX_Invalid);
517
518 if ( control_word & CW_Invalid )
519 {
520 /* The masked response */
521 FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr);
522 }
523
524 return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Valid;
525
526}
527
528
529/* Divide a finite number by zero */
530asmlinkage int FPU_divide_by_zero(int deststnr, u_char sign)
531{
532 FPU_REG *dest = &st(deststnr);
533 int tag = TAG_Valid;
534
535 if ( control_word & CW_ZeroDiv )
536 {
537 /* The masked response */
538 FPU_copy_to_regi(&CONST_INF, TAG_Special, deststnr);
539 setsign(dest, sign);
540 tag = TAG_Special;
541 }
542
543 EXCEPTION(EX_ZeroDiv);
544
545 return (!(control_word & CW_ZeroDiv) ? FPU_Exception : 0) | tag;
546
547}
548
549
550/* This may be called often, so keep it lean */
551int set_precision_flag(int flags)
552{
553 if ( control_word & CW_Precision )
554 {
555 partial_status &= ~(SW_C1 & flags);
556 partial_status |= flags; /* The masked response */
557 return 0;
558 }
559 else
560 {
561 EXCEPTION(flags);
562 return 1;
563 }
564}
565
566
567/* This may be called often, so keep it lean */
568asmlinkage void set_precision_flag_up(void)
569{
570 if ( control_word & CW_Precision )
571 partial_status |= (SW_Precision | SW_C1); /* The masked response */
572 else
573 EXCEPTION(EX_Precision | SW_C1);
574}
575
576
577/* This may be called often, so keep it lean */
578asmlinkage void set_precision_flag_down(void)
579{
580 if ( control_word & CW_Precision )
581 { /* The masked response */
582 partial_status &= ~SW_C1;
583 partial_status |= SW_Precision;
584 }
585 else
586 EXCEPTION(EX_Precision);
587}
588
589
590asmlinkage int denormal_operand(void)
591{
592 if ( control_word & CW_Denormal )
593 { /* The masked response */
594 partial_status |= SW_Denorm_Op;
595 return TAG_Special;
596 }
597 else
598 {
599 EXCEPTION(EX_Denormal);
600 return TAG_Special | FPU_Exception;
601 }
602}
603
604
605asmlinkage int arith_overflow(FPU_REG *dest)
606{
607 int tag = TAG_Valid;
608
609 if ( control_word & CW_Overflow )
610 {
611 /* The masked response */
612/* ###### The response here depends upon the rounding mode */
613 reg_copy(&CONST_INF, dest);
614 tag = TAG_Special;
615 }
616 else
617 {
618 /* Subtract the magic number from the exponent */
619 addexponent(dest, (-3 * (1 << 13)));
620 }
621
622 EXCEPTION(EX_Overflow);
623 if ( control_word & CW_Overflow )
624 {
625 /* The overflow exception is masked. */
626 /* By definition, precision is lost.
627 The roundup bit (C1) is also set because we have
628 "rounded" upwards to Infinity. */
629 EXCEPTION(EX_Precision | SW_C1);
630 return tag;
631 }
632
633 return tag;
634
635}
636
637
638asmlinkage int arith_underflow(FPU_REG *dest)
639{
640 int tag = TAG_Valid;
641
642 if ( control_word & CW_Underflow )
643 {
644 /* The masked response */
645 if ( exponent16(dest) <= EXP_UNDER - 63 )
646 {
647 reg_copy(&CONST_Z, dest);
648 partial_status &= ~SW_C1; /* Round down. */
649 tag = TAG_Zero;
650 }
651 else
652 {
653 stdexp(dest);
654 }
655 }
656 else
657 {
658 /* Add the magic number to the exponent. */
659 addexponent(dest, (3 * (1 << 13)) + EXTENDED_Ebias);
660 }
661
662 EXCEPTION(EX_Underflow);
663 if ( control_word & CW_Underflow )
664 {
665 /* The underflow exception is masked. */
666 EXCEPTION(EX_Precision);
667 return tag;
668 }
669
670 return tag;
671
672}
673
674
675void FPU_stack_overflow(void)
676{
677
678 if ( control_word & CW_Invalid )
679 {
680 /* The masked response */
681 top--;
682 FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
683 }
684
685 EXCEPTION(EX_StackOver);
686
687 return;
688
689}
690
691
692void FPU_stack_underflow(void)
693{
694
695 if ( control_word & CW_Invalid )
696 {
697 /* The masked response */
698 FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
699 }
700
701 EXCEPTION(EX_StackUnder);
702
703 return;
704
705}
706
707
708void FPU_stack_underflow_i(int i)
709{
710
711 if ( control_word & CW_Invalid )
712 {
713 /* The masked response */
714 FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i);
715 }
716
717 EXCEPTION(EX_StackUnder);
718
719 return;
720
721}
722
723
724void FPU_stack_underflow_pop(int i)
725{
726
727 if ( control_word & CW_Invalid )
728 {
729 /* The masked response */
730 FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i);
731 FPU_pop();
732 }
733
734 EXCEPTION(EX_StackUnder);
735
736 return;
737
738}
739
diff --git a/arch/i386/math-emu/exception.h b/arch/i386/math-emu/exception.h
new file mode 100644
index 000000000000..b463f21a811e
--- /dev/null
+++ b/arch/i386/math-emu/exception.h
@@ -0,0 +1,53 @@
1/*---------------------------------------------------------------------------+
2 | exception.h |
3 | |
4 | Copyright (C) 1992 W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
5 | Australia. E-mail billm@vaxc.cc.monash.edu.au |
6 | |
7 +---------------------------------------------------------------------------*/
8
9#ifndef _EXCEPTION_H_
10#define _EXCEPTION_H_
11
12
13#ifdef __ASSEMBLY__
14#define Const_(x) $##x
15#else
16#define Const_(x) x
17#endif
18
19#ifndef SW_C1
20#include "fpu_emu.h"
21#endif /* SW_C1 */
22
23#define FPU_BUSY Const_(0x8000) /* FPU busy bit (8087 compatibility) */
24#define EX_ErrorSummary Const_(0x0080) /* Error summary status */
25/* Special exceptions: */
26#define EX_INTERNAL Const_(0x8000) /* Internal error in wm-FPU-emu */
27#define EX_StackOver Const_(0x0041|SW_C1) /* stack overflow */
28#define EX_StackUnder Const_(0x0041) /* stack underflow */
29/* Exception flags: */
30#define EX_Precision Const_(0x0020) /* loss of precision */
31#define EX_Underflow Const_(0x0010) /* underflow */
32#define EX_Overflow Const_(0x0008) /* overflow */
33#define EX_ZeroDiv Const_(0x0004) /* divide by zero */
34#define EX_Denormal Const_(0x0002) /* denormalized operand */
35#define EX_Invalid Const_(0x0001) /* invalid operation */
36
37
38#define PRECISION_LOST_UP Const_((EX_Precision | SW_C1))
39#define PRECISION_LOST_DOWN Const_(EX_Precision)
40
41
42#ifndef __ASSEMBLY__
43
44#ifdef DEBUG
45#define EXCEPTION(x) { printk("exception in %s at line %d\n", \
46 __FILE__, __LINE__); FPU_exception(x); }
47#else
48#define EXCEPTION(x) FPU_exception(x)
49#endif
50
51#endif /* __ASSEMBLY__ */
52
53#endif /* _EXCEPTION_H_ */
diff --git a/arch/i386/math-emu/fpu_arith.c b/arch/i386/math-emu/fpu_arith.c
new file mode 100644
index 000000000000..6972dec01af6
--- /dev/null
+++ b/arch/i386/math-emu/fpu_arith.c
@@ -0,0 +1,174 @@
1/*---------------------------------------------------------------------------+
2 | fpu_arith.c |
3 | |
4 | Code to implement the FPU register/register arithmetic instructions |
5 | |
6 | Copyright (C) 1992,1993,1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
8 | E-mail billm@suburbia.net |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13#include "fpu_system.h"
14#include "fpu_emu.h"
15#include "control_w.h"
16#include "status_w.h"
17
18
19void fadd__(void)
20{
21 /* fadd st,st(i) */
22 int i = FPU_rm;
23 clear_C1();
24 FPU_add(&st(i), FPU_gettagi(i), 0, control_word);
25}
26
27
28void fmul__(void)
29{
30 /* fmul st,st(i) */
31 int i = FPU_rm;
32 clear_C1();
33 FPU_mul(&st(i), FPU_gettagi(i), 0, control_word);
34}
35
36
37
38void fsub__(void)
39{
40 /* fsub st,st(i) */
41 clear_C1();
42 FPU_sub(0, FPU_rm, control_word);
43}
44
45
46void fsubr_(void)
47{
48 /* fsubr st,st(i) */
49 clear_C1();
50 FPU_sub(REV, FPU_rm, control_word);
51}
52
53
54void fdiv__(void)
55{
56 /* fdiv st,st(i) */
57 clear_C1();
58 FPU_div(0, FPU_rm, control_word);
59}
60
61
62void fdivr_(void)
63{
64 /* fdivr st,st(i) */
65 clear_C1();
66 FPU_div(REV, FPU_rm, control_word);
67}
68
69
70
71void fadd_i(void)
72{
73 /* fadd st(i),st */
74 int i = FPU_rm;
75 clear_C1();
76 FPU_add(&st(i), FPU_gettagi(i), i, control_word);
77}
78
79
80void fmul_i(void)
81{
82 /* fmul st(i),st */
83 clear_C1();
84 FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word);
85}
86
87
88void fsubri(void)
89{
90 /* fsubr st(i),st */
91 clear_C1();
92 FPU_sub(DEST_RM, FPU_rm, control_word);
93}
94
95
96void fsub_i(void)
97{
98 /* fsub st(i),st */
99 clear_C1();
100 FPU_sub(REV|DEST_RM, FPU_rm, control_word);
101}
102
103
104void fdivri(void)
105{
106 /* fdivr st(i),st */
107 clear_C1();
108 FPU_div(DEST_RM, FPU_rm, control_word);
109}
110
111
112void fdiv_i(void)
113{
114 /* fdiv st(i),st */
115 clear_C1();
116 FPU_div(REV|DEST_RM, FPU_rm, control_word);
117}
118
119
120
121void faddp_(void)
122{
123 /* faddp st(i),st */
124 int i = FPU_rm;
125 clear_C1();
126 if ( FPU_add(&st(i), FPU_gettagi(i), i, control_word) >= 0 )
127 FPU_pop();
128}
129
130
131void fmulp_(void)
132{
133 /* fmulp st(i),st */
134 clear_C1();
135 if ( FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word) >= 0 )
136 FPU_pop();
137}
138
139
140
141void fsubrp(void)
142{
143 /* fsubrp st(i),st */
144 clear_C1();
145 if ( FPU_sub(DEST_RM, FPU_rm, control_word) >= 0 )
146 FPU_pop();
147}
148
149
150void fsubp_(void)
151{
152 /* fsubp st(i),st */
153 clear_C1();
154 if ( FPU_sub(REV|DEST_RM, FPU_rm, control_word) >= 0 )
155 FPU_pop();
156}
157
158
159void fdivrp(void)
160{
161 /* fdivrp st(i),st */
162 clear_C1();
163 if ( FPU_div(DEST_RM, FPU_rm, control_word) >= 0 )
164 FPU_pop();
165}
166
167
168void fdivp_(void)
169{
170 /* fdivp st(i),st */
171 clear_C1();
172 if ( FPU_div(REV|DEST_RM, FPU_rm, control_word) >= 0 )
173 FPU_pop();
174}
diff --git a/arch/i386/math-emu/fpu_asm.h b/arch/i386/math-emu/fpu_asm.h
new file mode 100644
index 000000000000..9ba12416df12
--- /dev/null
+++ b/arch/i386/math-emu/fpu_asm.h
@@ -0,0 +1,32 @@
1/*---------------------------------------------------------------------------+
2 | fpu_asm.h |
3 | |
4 | Copyright (C) 1992,1995,1997 |
5 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
6 | Australia. E-mail billm@suburbia.net |
7 | |
8 +---------------------------------------------------------------------------*/
9
10#ifndef _FPU_ASM_H_
11#define _FPU_ASM_H_
12
13#include <linux/linkage.h>
14
15#define EXCEPTION FPU_exception
16
17
18#define PARAM1 8(%ebp)
19#define PARAM2 12(%ebp)
20#define PARAM3 16(%ebp)
21#define PARAM4 20(%ebp)
22#define PARAM5 24(%ebp)
23#define PARAM6 28(%ebp)
24#define PARAM7 32(%ebp)
25
26#define SIGL_OFFSET 0
27#define EXP(x) 8(x)
28#define SIG(x) SIGL_OFFSET##(x)
29#define SIGL(x) SIGL_OFFSET##(x)
30#define SIGH(x) 4(x)
31
32#endif /* _FPU_ASM_H_ */
diff --git a/arch/i386/math-emu/fpu_aux.c b/arch/i386/math-emu/fpu_aux.c
new file mode 100644
index 000000000000..20886cfb9f76
--- /dev/null
+++ b/arch/i386/math-emu/fpu_aux.c
@@ -0,0 +1,204 @@
1/*---------------------------------------------------------------------------+
2 | fpu_aux.c |
3 | |
4 | Code to implement some of the FPU auxiliary instructions. |
5 | |
6 | Copyright (C) 1992,1993,1994,1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
8 | E-mail billm@suburbia.net |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13#include "fpu_system.h"
14#include "exception.h"
15#include "fpu_emu.h"
16#include "status_w.h"
17#include "control_w.h"
18
19
20static void fnop(void)
21{
22}
23
24static void fclex(void)
25{
26 partial_status &= ~(SW_Backward|SW_Summary|SW_Stack_Fault|SW_Precision|
27 SW_Underflow|SW_Overflow|SW_Zero_Div|SW_Denorm_Op|
28 SW_Invalid);
29 no_ip_update = 1;
30}
31
32/* Needs to be externally visible */
33void finit(void)
34{
35 control_word = 0x037f;
36 partial_status = 0;
37 top = 0; /* We don't keep top in the status word internally. */
38 fpu_tag_word = 0xffff;
39 /* The behaviour is different from that detailed in
40 Section 15.1.6 of the Intel manual */
41 operand_address.offset = 0;
42 operand_address.selector = 0;
43 instruction_address.offset = 0;
44 instruction_address.selector = 0;
45 instruction_address.opcode = 0;
46 no_ip_update = 1;
47}
48
49/*
50 * These are nops on the i387..
51 */
52#define feni fnop
53#define fdisi fnop
54#define fsetpm fnop
55
56static FUNC const finit_table[] = {
57 feni, fdisi, fclex, finit,
58 fsetpm, FPU_illegal, FPU_illegal, FPU_illegal
59};
60
61void finit_(void)
62{
63 (finit_table[FPU_rm])();
64}
65
66
67static void fstsw_ax(void)
68{
69 *(short *) &FPU_EAX = status_word();
70 no_ip_update = 1;
71}
72
73static FUNC const fstsw_table[] = {
74 fstsw_ax, FPU_illegal, FPU_illegal, FPU_illegal,
75 FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal
76};
77
78void fstsw_(void)
79{
80 (fstsw_table[FPU_rm])();
81}
82
83
84static FUNC const fp_nop_table[] = {
85 fnop, FPU_illegal, FPU_illegal, FPU_illegal,
86 FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal
87};
88
89void fp_nop(void)
90{
91 (fp_nop_table[FPU_rm])();
92}
93
94
95void fld_i_(void)
96{
97 FPU_REG *st_new_ptr;
98 int i;
99 u_char tag;
100
101 if ( STACK_OVERFLOW )
102 { FPU_stack_overflow(); return; }
103
104 /* fld st(i) */
105 i = FPU_rm;
106 if ( NOT_EMPTY(i) )
107 {
108 reg_copy(&st(i), st_new_ptr);
109 tag = FPU_gettagi(i);
110 push();
111 FPU_settag0(tag);
112 }
113 else
114 {
115 if ( control_word & CW_Invalid )
116 {
117 /* The masked response */
118 FPU_stack_underflow();
119 }
120 else
121 EXCEPTION(EX_StackUnder);
122 }
123
124}
125
126
127void fxch_i(void)
128{
129 /* fxch st(i) */
130 FPU_REG t;
131 int i = FPU_rm;
132 FPU_REG *st0_ptr = &st(0), *sti_ptr = &st(i);
133 long tag_word = fpu_tag_word;
134 int regnr = top & 7, regnri = ((regnr + i) & 7);
135 u_char st0_tag = (tag_word >> (regnr*2)) & 3;
136 u_char sti_tag = (tag_word >> (regnri*2)) & 3;
137
138 if ( st0_tag == TAG_Empty )
139 {
140 if ( sti_tag == TAG_Empty )
141 {
142 FPU_stack_underflow();
143 FPU_stack_underflow_i(i);
144 return;
145 }
146 if ( control_word & CW_Invalid )
147 {
148 /* Masked response */
149 FPU_copy_to_reg0(sti_ptr, sti_tag);
150 }
151 FPU_stack_underflow_i(i);
152 return;
153 }
154 if ( sti_tag == TAG_Empty )
155 {
156 if ( control_word & CW_Invalid )
157 {
158 /* Masked response */
159 FPU_copy_to_regi(st0_ptr, st0_tag, i);
160 }
161 FPU_stack_underflow();
162 return;
163 }
164 clear_C1();
165
166 reg_copy(st0_ptr, &t);
167 reg_copy(sti_ptr, st0_ptr);
168 reg_copy(&t, sti_ptr);
169
170 tag_word &= ~(3 << (regnr*2)) & ~(3 << (regnri*2));
171 tag_word |= (sti_tag << (regnr*2)) | (st0_tag << (regnri*2));
172 fpu_tag_word = tag_word;
173}
174
175
176void ffree_(void)
177{
178 /* ffree st(i) */
179 FPU_settagi(FPU_rm, TAG_Empty);
180}
181
182
183void ffreep(void)
184{
185 /* ffree st(i) + pop - unofficial code */
186 FPU_settagi(FPU_rm, TAG_Empty);
187 FPU_pop();
188}
189
190
191void fst_i_(void)
192{
193 /* fst st(i) */
194 FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm);
195}
196
197
198void fstp_i(void)
199{
200 /* fstp st(i) */
201 FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm);
202 FPU_pop();
203}
204
diff --git a/arch/i386/math-emu/fpu_emu.h b/arch/i386/math-emu/fpu_emu.h
new file mode 100644
index 000000000000..d62b20a3e660
--- /dev/null
+++ b/arch/i386/math-emu/fpu_emu.h
@@ -0,0 +1,217 @@
1/*---------------------------------------------------------------------------+
2 | fpu_emu.h |
3 | |
4 | Copyright (C) 1992,1993,1994,1997 |
5 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
6 | Australia. E-mail billm@suburbia.net |
7 | |
8 +---------------------------------------------------------------------------*/
9
10
11#ifndef _FPU_EMU_H_
12#define _FPU_EMU_H_
13
14/*
15 * Define PECULIAR_486 to get a closer approximation to 80486 behaviour,
16 * rather than behaviour which appears to be cleaner.
17 * This is a matter of opinion: for all I know, the 80486 may simply
18 * be complying with the IEEE spec. Maybe one day I'll get to see the
19 * spec...
20 */
21#define PECULIAR_486
22
23#ifdef __ASSEMBLY__
24#include "fpu_asm.h"
25#define Const(x) $##x
26#else
27#define Const(x) x
28#endif
29
30#define EXP_BIAS Const(0)
31#define EXP_OVER Const(0x4000) /* smallest invalid large exponent */
32#define EXP_UNDER Const(-0x3fff) /* largest invalid small exponent */
33#define EXP_WAY_UNDER Const(-0x6000) /* Below the smallest denormal, but
34 still a 16 bit nr. */
35#define EXP_Infinity EXP_OVER
36#define EXP_NaN EXP_OVER
37
38#define EXTENDED_Ebias Const(0x3fff)
39#define EXTENDED_Emin (-0x3ffe) /* smallest valid exponent */
40
41#define SIGN_POS Const(0)
42#define SIGN_NEG Const(0x80)
43
44#define SIGN_Positive Const(0)
45#define SIGN_Negative Const(0x8000)
46
47
48/* Keep the order TAG_Valid, TAG_Zero, TW_Denormal */
49/* The following fold to 2 (Special) in the Tag Word */
50#define TW_Denormal Const(4) /* De-normal */
51#define TW_Infinity Const(5) /* + or - infinity */
52#define TW_NaN Const(6) /* Not a Number */
53#define TW_Unsupported Const(7) /* Not supported by an 80486 */
54
55#define TAG_Valid Const(0) /* valid */
56#define TAG_Zero Const(1) /* zero */
57#define TAG_Special Const(2) /* De-normal, + or - infinity,
58 or Not a Number */
59#define TAG_Empty Const(3) /* empty */
60
61#define LOADED_DATA Const(10101) /* Special st() number to identify
62 loaded data (not on stack). */
63
64/* A few flags (must be >= 0x10). */
65#define REV 0x10
66#define DEST_RM 0x20
67#define LOADED 0x40
68
69#define FPU_Exception Const(0x80000000) /* Added to tag returns. */
70
71
72#ifndef __ASSEMBLY__
73
74#include "fpu_system.h"
75
76#include <asm/sigcontext.h> /* for struct _fpstate */
77#include <asm/math_emu.h>
78#include <linux/linkage.h>
79
80/*
81#define RE_ENTRANT_CHECKING
82 */
83
84#ifdef RE_ENTRANT_CHECKING
85extern u_char emulating;
86# define RE_ENTRANT_CHECK_OFF emulating = 0
87# define RE_ENTRANT_CHECK_ON emulating = 1
88#else
89# define RE_ENTRANT_CHECK_OFF
90# define RE_ENTRANT_CHECK_ON
91#endif /* RE_ENTRANT_CHECKING */
92
93#define FWAIT_OPCODE 0x9b
94#define OP_SIZE_PREFIX 0x66
95#define ADDR_SIZE_PREFIX 0x67
96#define PREFIX_CS 0x2e
97#define PREFIX_DS 0x3e
98#define PREFIX_ES 0x26
99#define PREFIX_SS 0x36
100#define PREFIX_FS 0x64
101#define PREFIX_GS 0x65
102#define PREFIX_REPE 0xf3
103#define PREFIX_REPNE 0xf2
104#define PREFIX_LOCK 0xf0
105#define PREFIX_CS_ 1
106#define PREFIX_DS_ 2
107#define PREFIX_ES_ 3
108#define PREFIX_FS_ 4
109#define PREFIX_GS_ 5
110#define PREFIX_SS_ 6
111#define PREFIX_DEFAULT 7
112
113struct address {
114 unsigned int offset;
115 unsigned int selector:16;
116 unsigned int opcode:11;
117 unsigned int empty:5;
118};
119struct fpu__reg {
120 unsigned sigl;
121 unsigned sigh;
122 short exp;
123};
124
125typedef void (*FUNC)(void);
126typedef struct fpu__reg FPU_REG;
127typedef void (*FUNC_ST0)(FPU_REG *st0_ptr, u_char st0_tag);
128typedef struct { u_char address_size, operand_size, segment; }
129 overrides;
130/* This structure is 32 bits: */
131typedef struct { overrides override;
132 u_char default_mode; } fpu_addr_modes;
133/* PROTECTED has a restricted meaning in the emulator; it is used
134 to signal that the emulator needs to do special things to ensure
135 that protection is respected in a segmented model. */
136#define PROTECTED 4
137#define SIXTEEN 1 /* We rely upon this being 1 (true) */
138#define VM86 SIXTEEN
139#define PM16 (SIXTEEN | PROTECTED)
140#define SEG32 PROTECTED
141extern u_char const data_sizes_16[32];
142
143#define register_base ((u_char *) registers )
144#define fpu_register(x) ( * ((FPU_REG *)( register_base + 10 * (x & 7) )) )
145#define st(x) ( * ((FPU_REG *)( register_base + 10 * ((top+x) & 7) )) )
146
147#define STACK_OVERFLOW (FPU_stackoverflow(&st_new_ptr))
148#define NOT_EMPTY(i) (!FPU_empty_i(i))
149
150#define NOT_EMPTY_ST0 (st0_tag ^ TAG_Empty)
151
152#define poppop() { FPU_pop(); FPU_pop(); }
153
154/* push() does not affect the tags */
155#define push() { top--; }
156
157#define signbyte(a) (((u_char *)(a))[9])
158#define getsign(a) (signbyte(a) & 0x80)
159#define setsign(a,b) { if (b) signbyte(a) |= 0x80; else signbyte(a) &= 0x7f; }
160#define copysign(a,b) { if (getsign(a)) signbyte(b) |= 0x80; \
161 else signbyte(b) &= 0x7f; }
162#define changesign(a) { signbyte(a) ^= 0x80; }
163#define setpositive(a) { signbyte(a) &= 0x7f; }
164#define setnegative(a) { signbyte(a) |= 0x80; }
165#define signpositive(a) ( (signbyte(a) & 0x80) == 0 )
166#define signnegative(a) (signbyte(a) & 0x80)
167
168static inline void reg_copy(FPU_REG const *x, FPU_REG *y)
169{
170 *(short *)&(y->exp) = *(const short *)&(x->exp);
171 *(long long *)&(y->sigl) = *(const long long *)&(x->sigl);
172}
173
174#define exponent(x) (((*(short *)&((x)->exp)) & 0x7fff) - EXTENDED_Ebias)
175#define setexponentpos(x,y) { (*(short *)&((x)->exp)) = \
176 ((y) + EXTENDED_Ebias) & 0x7fff; }
177#define exponent16(x) (*(short *)&((x)->exp))
178#define setexponent16(x,y) { (*(short *)&((x)->exp)) = (y); }
179#define addexponent(x,y) { (*(short *)&((x)->exp)) += (y); }
180#define stdexp(x) { (*(short *)&((x)->exp)) += EXTENDED_Ebias; }
181
182#define isdenormal(ptr) (exponent(ptr) == EXP_BIAS+EXP_UNDER)
183
184#define significand(x) ( ((unsigned long long *)&((x)->sigl))[0] )
185
186
187/*----- Prototypes for functions written in assembler -----*/
188/* extern void reg_move(FPU_REG *a, FPU_REG *b); */
189
190asmlinkage int FPU_normalize(FPU_REG *x);
191asmlinkage int FPU_normalize_nuo(FPU_REG *x);
192asmlinkage int FPU_u_sub(FPU_REG const *arg1, FPU_REG const *arg2,
193 FPU_REG *answ, unsigned int control_w, u_char sign,
194 int expa, int expb);
195asmlinkage int FPU_u_mul(FPU_REG const *arg1, FPU_REG const *arg2,
196 FPU_REG *answ, unsigned int control_w, u_char sign,
197 int expon);
198asmlinkage int FPU_u_div(FPU_REG const *arg1, FPU_REG const *arg2,
199 FPU_REG *answ, unsigned int control_w, u_char sign);
200asmlinkage int FPU_u_add(FPU_REG const *arg1, FPU_REG const *arg2,
201 FPU_REG *answ, unsigned int control_w, u_char sign,
202 int expa, int expb);
203asmlinkage int wm_sqrt(FPU_REG *n, int dummy1, int dummy2,
204 unsigned int control_w, u_char sign);
205asmlinkage unsigned FPU_shrx(void *l, unsigned x);
206asmlinkage unsigned FPU_shrxs(void *v, unsigned x);
207asmlinkage unsigned long FPU_div_small(unsigned long long *x, unsigned long y);
208asmlinkage int FPU_round(FPU_REG *arg, unsigned int extent, int dummy,
209 unsigned int control_w, u_char sign);
210
211#ifndef MAKING_PROTO
212#include "fpu_proto.h"
213#endif
214
215#endif /* __ASSEMBLY__ */
216
217#endif /* _FPU_EMU_H_ */
diff --git a/arch/i386/math-emu/fpu_entry.c b/arch/i386/math-emu/fpu_entry.c
new file mode 100644
index 000000000000..d93f16ef828f
--- /dev/null
+++ b/arch/i386/math-emu/fpu_entry.c
@@ -0,0 +1,760 @@
1/*---------------------------------------------------------------------------+
2 | fpu_entry.c |
3 | |
4 | The entry functions for wm-FPU-emu |
5 | |
6 | Copyright (C) 1992,1993,1994,1996,1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
8 | E-mail billm@suburbia.net |
9 | |
10 | See the files "README" and "COPYING" for further copyright and warranty |
11 | information. |
12 | |
13 +---------------------------------------------------------------------------*/
14
15/*---------------------------------------------------------------------------+
16 | Note: |
17 | The file contains code which accesses user memory. |
18 | Emulator static data may change when user memory is accessed, due to |
19 | other processes using the emulator while swapping is in progress. |
20 +---------------------------------------------------------------------------*/
21
22/*---------------------------------------------------------------------------+
23 | math_emulate(), restore_i387_soft() and save_i387_soft() are the only |
24 | entry points for wm-FPU-emu. |
25 +---------------------------------------------------------------------------*/
26
27#include <linux/signal.h>
28#include <linux/ptrace.h>
29
30#include <asm/uaccess.h>
31#include <asm/desc.h>
32
33#include "fpu_system.h"
34#include "fpu_emu.h"
35#include "exception.h"
36#include "control_w.h"
37#include "status_w.h"
38
39#define __BAD__ FPU_illegal /* Illegal on an 80486, causes SIGILL */
40
41#ifndef NO_UNDOC_CODE /* Un-documented FPU op-codes supported by default. */
42
43/* WARNING: These codes are not documented by Intel in their 80486 manual
44 and may not work on FPU clones or later Intel FPUs. */
45
46/* Changes to support the un-doc codes provided by Linus Torvalds. */
47
48#define _d9_d8_ fstp_i /* unofficial code (19) */
49#define _dc_d0_ fcom_st /* unofficial code (14) */
50#define _dc_d8_ fcompst /* unofficial code (1c) */
51#define _dd_c8_ fxch_i /* unofficial code (0d) */
52#define _de_d0_ fcompst /* unofficial code (16) */
53#define _df_c0_ ffreep /* unofficial code (07) ffree + pop */
54#define _df_c8_ fxch_i /* unofficial code (0f) */
55#define _df_d0_ fstp_i /* unofficial code (17) */
56#define _df_d8_ fstp_i /* unofficial code (1f) */
57
58static FUNC const st_instr_table[64] = {
59 fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, _df_c0_,
60 fmul__, fxch_i, __BAD__, __BAD__, fmul_i, _dd_c8_, fmulp_, _df_c8_,
61 fcom_st, fp_nop, __BAD__, __BAD__, _dc_d0_, fst_i_, _de_d0_, _df_d0_,
62 fcompst, _d9_d8_, __BAD__, __BAD__, _dc_d8_, fstp_i, fcompp, _df_d8_,
63 fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_,
64 fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__,
65 fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__,
66 fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__,
67};
68
69#else /* Support only documented FPU op-codes */
70
71static FUNC const st_instr_table[64] = {
72 fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, __BAD__,
73 fmul__, fxch_i, __BAD__, __BAD__, fmul_i, __BAD__, fmulp_, __BAD__,
74 fcom_st, fp_nop, __BAD__, __BAD__, __BAD__, fst_i_, __BAD__, __BAD__,
75 fcompst, __BAD__, __BAD__, __BAD__, __BAD__, fstp_i, fcompp, __BAD__,
76 fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_,
77 fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__,
78 fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__,
79 fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__,
80};
81
82#endif /* NO_UNDOC_CODE */
83
84
85#define _NONE_ 0 /* Take no special action */
86#define _REG0_ 1 /* Need to check for not empty st(0) */
87#define _REGI_ 2 /* Need to check for not empty st(0) and st(rm) */
88#define _REGi_ 0 /* Uses st(rm) */
89#define _PUSH_ 3 /* Need to check for space to push onto stack */
90#define _null_ 4 /* Function illegal or not implemented */
91#define _REGIi 5 /* Uses st(0) and st(rm), result to st(rm) */
92#define _REGIp 6 /* Uses st(0) and st(rm), result to st(rm) then pop */
93#define _REGIc 0 /* Compare st(0) and st(rm) */
94#define _REGIn 0 /* Uses st(0) and st(rm), but handle checks later */
95
96#ifndef NO_UNDOC_CODE
97
98/* Un-documented FPU op-codes supported by default. (see above) */
99
100static u_char const type_table[64] = {
101 _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _REGi_,
102 _REGI_, _REGIn, _null_, _null_, _REGIi, _REGI_, _REGIp, _REGI_,
103 _REGIc, _NONE_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
104 _REGIc, _REG0_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
105 _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
106 _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
107 _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
108 _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
109};
110
111#else /* Support only documented FPU op-codes */
112
113static u_char const type_table[64] = {
114 _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _null_,
115 _REGI_, _REGIn, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
116 _REGIc, _NONE_, _null_, _null_, _null_, _REG0_, _null_, _null_,
117 _REGIc, _null_, _null_, _null_, _null_, _REG0_, _REGIc, _null_,
118 _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
119 _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
120 _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
121 _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
122};
123
124#endif /* NO_UNDOC_CODE */
125
126
127#ifdef RE_ENTRANT_CHECKING
128u_char emulating=0;
129#endif /* RE_ENTRANT_CHECKING */
130
131static int valid_prefix(u_char *Byte, u_char __user **fpu_eip,
132 overrides *override);
133
134asmlinkage void math_emulate(long arg)
135{
136 u_char FPU_modrm, byte1;
137 unsigned short code;
138 fpu_addr_modes addr_modes;
139 int unmasked;
140 FPU_REG loaded_data;
141 FPU_REG *st0_ptr;
142 u_char loaded_tag, st0_tag;
143 void __user *data_address;
144 struct address data_sel_off;
145 struct address entry_sel_off;
146 unsigned long code_base = 0;
147 unsigned long code_limit = 0; /* Initialized to stop compiler warnings */
148 struct desc_struct code_descriptor;
149
150#ifdef RE_ENTRANT_CHECKING
151 if ( emulating )
152 {
153 printk("ERROR: wm-FPU-emu is not RE-ENTRANT!\n");
154 }
155 RE_ENTRANT_CHECK_ON;
156#endif /* RE_ENTRANT_CHECKING */
157
158 if (!used_math())
159 {
160 finit();
161 set_used_math();
162 }
163
164 SETUP_DATA_AREA(arg);
165
166 FPU_ORIG_EIP = FPU_EIP;
167
168 if ( (FPU_EFLAGS & 0x00020000) != 0 )
169 {
170 /* Virtual 8086 mode */
171 addr_modes.default_mode = VM86;
172 FPU_EIP += code_base = FPU_CS << 4;
173 code_limit = code_base + 0xffff; /* Assumes code_base <= 0xffff0000 */
174 }
175 else if ( FPU_CS == __USER_CS && FPU_DS == __USER_DS )
176 {
177 addr_modes.default_mode = 0;
178 }
179 else if ( FPU_CS == __KERNEL_CS )
180 {
181 printk("math_emulate: %04x:%08lx\n",FPU_CS,FPU_EIP);
182 panic("Math emulation needed in kernel");
183 }
184 else
185 {
186
187 if ( (FPU_CS & 4) != 4 ) /* Must be in the LDT */
188 {
189 /* Can only handle segmented addressing via the LDT
190 for now, and it must be 16 bit */
191 printk("FPU emulator: Unsupported addressing mode\n");
192 math_abort(FPU_info, SIGILL);
193 }
194
195 code_descriptor = LDT_DESCRIPTOR(FPU_CS);
196 if ( SEG_D_SIZE(code_descriptor) )
197 {
198 /* The above test may be wrong, the book is not clear */
199 /* Segmented 32 bit protected mode */
200 addr_modes.default_mode = SEG32;
201 }
202 else
203 {
204 /* 16 bit protected mode */
205 addr_modes.default_mode = PM16;
206 }
207 FPU_EIP += code_base = SEG_BASE_ADDR(code_descriptor);
208 code_limit = code_base
209 + (SEG_LIMIT(code_descriptor)+1) * SEG_GRANULARITY(code_descriptor)
210 - 1;
211 if ( code_limit < code_base ) code_limit = 0xffffffff;
212 }
213
214 FPU_lookahead = 1;
215 if (current->ptrace & PT_PTRACED)
216 FPU_lookahead = 0;
217
218 if ( !valid_prefix(&byte1, (u_char __user **)&FPU_EIP,
219 &addr_modes.override) )
220 {
221 RE_ENTRANT_CHECK_OFF;
222 printk("FPU emulator: Unknown prefix byte 0x%02x, probably due to\n"
223 "FPU emulator: self-modifying code! (emulation impossible)\n",
224 byte1);
225 RE_ENTRANT_CHECK_ON;
226 EXCEPTION(EX_INTERNAL|0x126);
227 math_abort(FPU_info,SIGILL);
228 }
229
230do_another_FPU_instruction:
231
232 no_ip_update = 0;
233
234 FPU_EIP++; /* We have fetched the prefix and first code bytes. */
235
236 if ( addr_modes.default_mode )
237 {
238 /* This checks for the minimum instruction bytes.
239 We also need to check any extra (address mode) code access. */
240 if ( FPU_EIP > code_limit )
241 math_abort(FPU_info,SIGSEGV);
242 }
243
244 if ( (byte1 & 0xf8) != 0xd8 )
245 {
246 if ( byte1 == FWAIT_OPCODE )
247 {
248 if (partial_status & SW_Summary)
249 goto do_the_FPU_interrupt;
250 else
251 goto FPU_fwait_done;
252 }
253#ifdef PARANOID
254 EXCEPTION(EX_INTERNAL|0x128);
255 math_abort(FPU_info,SIGILL);
256#endif /* PARANOID */
257 }
258
259 RE_ENTRANT_CHECK_OFF;
260 FPU_code_access_ok(1);
261 FPU_get_user(FPU_modrm, (u_char __user *) FPU_EIP);
262 RE_ENTRANT_CHECK_ON;
263 FPU_EIP++;
264
265 if (partial_status & SW_Summary)
266 {
267 /* Ignore the error for now if the current instruction is a no-wait
268 control instruction */
269 /* The 80486 manual contradicts itself on this topic,
270 but a real 80486 uses the following instructions:
271 fninit, fnstenv, fnsave, fnstsw, fnstenv, fnclex.
272 */
273 code = (FPU_modrm << 8) | byte1;
274 if ( ! ( (((code & 0xf803) == 0xe003) || /* fnclex, fninit, fnstsw */
275 (((code & 0x3003) == 0x3001) && /* fnsave, fnstcw, fnstenv,
276 fnstsw */
277 ((code & 0xc000) != 0xc000))) ) )
278 {
279 /*
280 * We need to simulate the action of the kernel to FPU
281 * interrupts here.
282 */
283 do_the_FPU_interrupt:
284
285 FPU_EIP = FPU_ORIG_EIP; /* Point to current FPU instruction. */
286
287 RE_ENTRANT_CHECK_OFF;
288 current->thread.trap_no = 16;
289 current->thread.error_code = 0;
290 send_sig(SIGFPE, current, 1);
291 return;
292 }
293 }
294
295 entry_sel_off.offset = FPU_ORIG_EIP;
296 entry_sel_off.selector = FPU_CS;
297 entry_sel_off.opcode = (byte1 << 8) | FPU_modrm;
298
299 FPU_rm = FPU_modrm & 7;
300
301 if ( FPU_modrm < 0300 )
302 {
303 /* All of these instructions use the mod/rm byte to get a data address */
304
305 if ( (addr_modes.default_mode & SIXTEEN)
306 ^ (addr_modes.override.address_size == ADDR_SIZE_PREFIX) )
307 data_address = FPU_get_address_16(FPU_modrm, &FPU_EIP, &data_sel_off,
308 addr_modes);
309 else
310 data_address = FPU_get_address(FPU_modrm, &FPU_EIP, &data_sel_off,
311 addr_modes);
312
313 if ( addr_modes.default_mode )
314 {
315 if ( FPU_EIP-1 > code_limit )
316 math_abort(FPU_info,SIGSEGV);
317 }
318
319 if ( !(byte1 & 1) )
320 {
321 unsigned short status1 = partial_status;
322
323 st0_ptr = &st(0);
324 st0_tag = FPU_gettag0();
325
326 /* Stack underflow has priority */
327 if ( NOT_EMPTY_ST0 )
328 {
329 if ( addr_modes.default_mode & PROTECTED )
330 {
331 /* This table works for 16 and 32 bit protected mode */
332 if ( access_limit < data_sizes_16[(byte1 >> 1) & 3] )
333 math_abort(FPU_info,SIGSEGV);
334 }
335
336 unmasked = 0; /* Do this here to stop compiler warnings. */
337 switch ( (byte1 >> 1) & 3 )
338 {
339 case 0:
340 unmasked = FPU_load_single((float __user *)data_address,
341 &loaded_data);
342 loaded_tag = unmasked & 0xff;
343 unmasked &= ~0xff;
344 break;
345 case 1:
346 loaded_tag = FPU_load_int32((long __user *)data_address, &loaded_data);
347 break;
348 case 2:
349 unmasked = FPU_load_double((double __user *)data_address,
350 &loaded_data);
351 loaded_tag = unmasked & 0xff;
352 unmasked &= ~0xff;
353 break;
354 case 3:
355 default: /* Used here to suppress gcc warnings. */
356 loaded_tag = FPU_load_int16((short __user *)data_address, &loaded_data);
357 break;
358 }
359
360 /* No more access to user memory, it is safe
361 to use static data now */
362
363 /* NaN operands have the next priority. */
364 /* We have to delay looking at st(0) until after
365 loading the data, because that data might contain an SNaN */
366 if ( ((st0_tag == TAG_Special) && isNaN(st0_ptr)) ||
367 ((loaded_tag == TAG_Special) && isNaN(&loaded_data)) )
368 {
369 /* Restore the status word; we might have loaded a
370 denormal. */
371 partial_status = status1;
372 if ( (FPU_modrm & 0x30) == 0x10 )
373 {
374 /* fcom or fcomp */
375 EXCEPTION(EX_Invalid);
376 setcc(SW_C3 | SW_C2 | SW_C0);
377 if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) )
378 FPU_pop(); /* fcomp, masked, so we pop. */
379 }
380 else
381 {
382 if ( loaded_tag == TAG_Special )
383 loaded_tag = FPU_Special(&loaded_data);
384#ifdef PECULIAR_486
385 /* This is not really needed, but gives behaviour
386 identical to an 80486 */
387 if ( (FPU_modrm & 0x28) == 0x20 )
388 /* fdiv or fsub */
389 real_2op_NaN(&loaded_data, loaded_tag, 0, &loaded_data);
390 else
391#endif /* PECULIAR_486 */
392 /* fadd, fdivr, fmul, or fsubr */
393 real_2op_NaN(&loaded_data, loaded_tag, 0, st0_ptr);
394 }
395 goto reg_mem_instr_done;
396 }
397
398 if ( unmasked && !((FPU_modrm & 0x30) == 0x10) )
399 {
400 /* Is not a comparison instruction. */
401 if ( (FPU_modrm & 0x38) == 0x38 )
402 {
403 /* fdivr */
404 if ( (st0_tag == TAG_Zero) &&
405 ((loaded_tag == TAG_Valid)
406 || (loaded_tag == TAG_Special
407 && isdenormal(&loaded_data))) )
408 {
409 if ( FPU_divide_by_zero(0, getsign(&loaded_data))
410 < 0 )
411 {
412 /* We use the fact here that the unmasked
413 exception in the loaded data was for a
414 denormal operand */
415 /* Restore the state of the denormal op bit */
416 partial_status &= ~SW_Denorm_Op;
417 partial_status |= status1 & SW_Denorm_Op;
418 }
419 else
420 setsign(st0_ptr, getsign(&loaded_data));
421 }
422 }
423 goto reg_mem_instr_done;
424 }
425
426 switch ( (FPU_modrm >> 3) & 7 )
427 {
428 case 0: /* fadd */
429 clear_C1();
430 FPU_add(&loaded_data, loaded_tag, 0, control_word);
431 break;
432 case 1: /* fmul */
433 clear_C1();
434 FPU_mul(&loaded_data, loaded_tag, 0, control_word);
435 break;
436 case 2: /* fcom */
437 FPU_compare_st_data(&loaded_data, loaded_tag);
438 break;
439 case 3: /* fcomp */
440 if ( !FPU_compare_st_data(&loaded_data, loaded_tag)
441 && !unmasked )
442 FPU_pop();
443 break;
444 case 4: /* fsub */
445 clear_C1();
446 FPU_sub(LOADED|loaded_tag, (int)&loaded_data, control_word);
447 break;
448 case 5: /* fsubr */
449 clear_C1();
450 FPU_sub(REV|LOADED|loaded_tag, (int)&loaded_data, control_word);
451 break;
452 case 6: /* fdiv */
453 clear_C1();
454 FPU_div(LOADED|loaded_tag, (int)&loaded_data, control_word);
455 break;
456 case 7: /* fdivr */
457 clear_C1();
458 if ( st0_tag == TAG_Zero )
459 partial_status = status1; /* Undo any denorm tag,
460 zero-divide has priority. */
461 FPU_div(REV|LOADED|loaded_tag, (int)&loaded_data, control_word);
462 break;
463 }
464 }
465 else
466 {
467 if ( (FPU_modrm & 0x30) == 0x10 )
468 {
469 /* The instruction is fcom or fcomp */
470 EXCEPTION(EX_StackUnder);
471 setcc(SW_C3 | SW_C2 | SW_C0);
472 if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) )
473 FPU_pop(); /* fcomp */
474 }
475 else
476 FPU_stack_underflow();
477 }
478 reg_mem_instr_done:
479 operand_address = data_sel_off;
480 }
481 else
482 {
483 if ( !(no_ip_update =
484 FPU_load_store(((FPU_modrm & 0x38) | (byte1 & 6)) >> 1,
485 addr_modes, data_address)) )
486 {
487 operand_address = data_sel_off;
488 }
489 }
490
491 }
492 else
493 {
494 /* None of these instructions access user memory */
495 u_char instr_index = (FPU_modrm & 0x38) | (byte1 & 7);
496
497#ifdef PECULIAR_486
498 /* This is supposed to be undefined, but a real 80486 seems
499 to do this: */
500 operand_address.offset = 0;
501 operand_address.selector = FPU_DS;
502#endif /* PECULIAR_486 */
503
504 st0_ptr = &st(0);
505 st0_tag = FPU_gettag0();
506 switch ( type_table[(int) instr_index] )
507 {
508 case _NONE_: /* also _REGIc: _REGIn */
509 break;
510 case _REG0_:
511 if ( !NOT_EMPTY_ST0 )
512 {
513 FPU_stack_underflow();
514 goto FPU_instruction_done;
515 }
516 break;
517 case _REGIi:
518 if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) )
519 {
520 FPU_stack_underflow_i(FPU_rm);
521 goto FPU_instruction_done;
522 }
523 break;
524 case _REGIp:
525 if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) )
526 {
527 FPU_stack_underflow_pop(FPU_rm);
528 goto FPU_instruction_done;
529 }
530 break;
531 case _REGI_:
532 if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) )
533 {
534 FPU_stack_underflow();
535 goto FPU_instruction_done;
536 }
537 break;
538 case _PUSH_: /* Only used by the fld st(i) instruction */
539 break;
540 case _null_:
541 FPU_illegal();
542 goto FPU_instruction_done;
543 default:
544 EXCEPTION(EX_INTERNAL|0x111);
545 goto FPU_instruction_done;
546 }
547 (*st_instr_table[(int) instr_index])();
548
549FPU_instruction_done:
550 ;
551 }
552
553 if ( ! no_ip_update )
554 instruction_address = entry_sel_off;
555
556FPU_fwait_done:
557
558#ifdef DEBUG
559 RE_ENTRANT_CHECK_OFF;
560 FPU_printall();
561 RE_ENTRANT_CHECK_ON;
562#endif /* DEBUG */
563
564 if (FPU_lookahead && !need_resched())
565 {
566 FPU_ORIG_EIP = FPU_EIP - code_base;
567 if ( valid_prefix(&byte1, (u_char __user **)&FPU_EIP,
568 &addr_modes.override) )
569 goto do_another_FPU_instruction;
570 }
571
572 if ( addr_modes.default_mode )
573 FPU_EIP -= code_base;
574
575 RE_ENTRANT_CHECK_OFF;
576}
577
578
579/* Support for prefix bytes is not yet complete. To properly handle
580 all prefix bytes, further changes are needed in the emulator code
581 which accesses user address space. Access to separate segments is
582 important for msdos emulation. */
583static int valid_prefix(u_char *Byte, u_char __user **fpu_eip,
584 overrides *override)
585{
586 u_char byte;
587 u_char __user *ip = *fpu_eip;
588
589 *override = (overrides) { 0, 0, PREFIX_DEFAULT }; /* defaults */
590
591 RE_ENTRANT_CHECK_OFF;
592 FPU_code_access_ok(1);
593 FPU_get_user(byte, ip);
594 RE_ENTRANT_CHECK_ON;
595
596 while ( 1 )
597 {
598 switch ( byte )
599 {
600 case ADDR_SIZE_PREFIX:
601 override->address_size = ADDR_SIZE_PREFIX;
602 goto do_next_byte;
603
604 case OP_SIZE_PREFIX:
605 override->operand_size = OP_SIZE_PREFIX;
606 goto do_next_byte;
607
608 case PREFIX_CS:
609 override->segment = PREFIX_CS_;
610 goto do_next_byte;
611 case PREFIX_ES:
612 override->segment = PREFIX_ES_;
613 goto do_next_byte;
614 case PREFIX_SS:
615 override->segment = PREFIX_SS_;
616 goto do_next_byte;
617 case PREFIX_FS:
618 override->segment = PREFIX_FS_;
619 goto do_next_byte;
620 case PREFIX_GS:
621 override->segment = PREFIX_GS_;
622 goto do_next_byte;
623 case PREFIX_DS:
624 override->segment = PREFIX_DS_;
625 goto do_next_byte;
626
627/* lock is not a valid prefix for FPU instructions,
628 let the cpu handle it to generate a SIGILL. */
629/* case PREFIX_LOCK: */
630
631 /* rep.. prefixes have no meaning for FPU instructions */
632 case PREFIX_REPE:
633 case PREFIX_REPNE:
634
635 do_next_byte:
636 ip++;
637 RE_ENTRANT_CHECK_OFF;
638 FPU_code_access_ok(1);
639 FPU_get_user(byte, ip);
640 RE_ENTRANT_CHECK_ON;
641 break;
642 case FWAIT_OPCODE:
643 *Byte = byte;
644 return 1;
645 default:
646 if ( (byte & 0xf8) == 0xd8 )
647 {
648 *Byte = byte;
649 *fpu_eip = ip;
650 return 1;
651 }
652 else
653 {
654 /* Not a valid sequence of prefix bytes followed by
655 an FPU instruction. */
656 *Byte = byte; /* Needed for error message. */
657 return 0;
658 }
659 }
660 }
661}
662
663
664void math_abort(struct info * info, unsigned int signal)
665{
666 FPU_EIP = FPU_ORIG_EIP;
667 current->thread.trap_no = 16;
668 current->thread.error_code = 0;
669 send_sig(signal,current,1);
670 RE_ENTRANT_CHECK_OFF;
671 __asm__("movl %0,%%esp ; ret": :"g" (((long) info)-4));
672#ifdef PARANOID
673 printk("ERROR: wm-FPU-emu math_abort failed!\n");
674#endif /* PARANOID */
675}
676
677
678
679#define S387 ((struct i387_soft_struct *)s387)
680#define sstatus_word() \
681 ((S387->swd & ~SW_Top & 0xffff) | ((S387->ftop << SW_Top_Shift) & SW_Top))
682
683int restore_i387_soft(void *s387, struct _fpstate __user *buf)
684{
685 u_char __user *d = (u_char __user *)buf;
686 int offset, other, i, tags, regnr, tag, newtop;
687
688 RE_ENTRANT_CHECK_OFF;
689 FPU_access_ok(VERIFY_READ, d, 7*4 + 8*10);
690 if (__copy_from_user(&S387->cwd, d, 7*4))
691 return -1;
692 RE_ENTRANT_CHECK_ON;
693
694 d += 7*4;
695
696 S387->ftop = (S387->swd >> SW_Top_Shift) & 7;
697 offset = (S387->ftop & 7) * 10;
698 other = 80 - offset;
699
700 RE_ENTRANT_CHECK_OFF;
701 /* Copy all registers in stack order. */
702 if (__copy_from_user(((u_char *)&S387->st_space)+offset, d, other))
703 return -1;
704 if ( offset )
705 if (__copy_from_user((u_char *)&S387->st_space, d+other, offset))
706 return -1;
707 RE_ENTRANT_CHECK_ON;
708
709 /* The tags may need to be corrected now. */
710 tags = S387->twd;
711 newtop = S387->ftop;
712 for ( i = 0; i < 8; i++ )
713 {
714 regnr = (i+newtop) & 7;
715 if ( ((tags >> ((regnr & 7)*2)) & 3) != TAG_Empty )
716 {
717 /* The loaded data over-rides all other cases. */
718 tag = FPU_tagof((FPU_REG *)((u_char *)S387->st_space + 10*regnr));
719 tags &= ~(3 << (regnr*2));
720 tags |= (tag & 3) << (regnr*2);
721 }
722 }
723 S387->twd = tags;
724
725 return 0;
726}
727
728
729int save_i387_soft(void *s387, struct _fpstate __user * buf)
730{
731 u_char __user *d = (u_char __user *)buf;
732 int offset = (S387->ftop & 7) * 10, other = 80 - offset;
733
734 RE_ENTRANT_CHECK_OFF;
735 FPU_access_ok(VERIFY_WRITE, d, 7*4 + 8*10);
736#ifdef PECULIAR_486
737 S387->cwd &= ~0xe080;
738 /* An 80486 sets nearly all of the reserved bits to 1. */
739 S387->cwd |= 0xffff0040;
740 S387->swd = sstatus_word() | 0xffff0000;
741 S387->twd |= 0xffff0000;
742 S387->fcs &= ~0xf8000000;
743 S387->fos |= 0xffff0000;
744#endif /* PECULIAR_486 */
745 __copy_to_user(d, &S387->cwd, 7*4);
746 RE_ENTRANT_CHECK_ON;
747
748 d += 7*4;
749
750 RE_ENTRANT_CHECK_OFF;
751 /* Copy all registers in stack order. */
752 if (__copy_to_user(d, ((u_char *)&S387->st_space)+offset, other))
753 return -1;
754 if ( offset )
755 if (__copy_to_user(d+other, (u_char *)&S387->st_space, offset))
756 return -1
757 RE_ENTRANT_CHECK_ON;
758
759 return 1;
760}
diff --git a/arch/i386/math-emu/fpu_etc.c b/arch/i386/math-emu/fpu_etc.c
new file mode 100644
index 000000000000..e3b5d465587f
--- /dev/null
+++ b/arch/i386/math-emu/fpu_etc.c
@@ -0,0 +1,143 @@
1/*---------------------------------------------------------------------------+
2 | fpu_etc.c |
3 | |
4 | Implement a few FPU instructions. |
5 | |
6 | Copyright (C) 1992,1993,1994,1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
8 | Australia. E-mail billm@suburbia.net |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13#include "fpu_system.h"
14#include "exception.h"
15#include "fpu_emu.h"
16#include "status_w.h"
17#include "reg_constant.h"
18
19
20static void fchs(FPU_REG *st0_ptr, u_char st0tag)
21{
22 if ( st0tag ^ TAG_Empty )
23 {
24 signbyte(st0_ptr) ^= SIGN_NEG;
25 clear_C1();
26 }
27 else
28 FPU_stack_underflow();
29}
30
31
32static void fabs(FPU_REG *st0_ptr, u_char st0tag)
33{
34 if ( st0tag ^ TAG_Empty )
35 {
36 setpositive(st0_ptr);
37 clear_C1();
38 }
39 else
40 FPU_stack_underflow();
41}
42
43
44static void ftst_(FPU_REG *st0_ptr, u_char st0tag)
45{
46 switch (st0tag)
47 {
48 case TAG_Zero:
49 setcc(SW_C3);
50 break;
51 case TAG_Valid:
52 if (getsign(st0_ptr) == SIGN_POS)
53 setcc(0);
54 else
55 setcc(SW_C0);
56 break;
57 case TAG_Special:
58 switch ( FPU_Special(st0_ptr) )
59 {
60 case TW_Denormal:
61 if (getsign(st0_ptr) == SIGN_POS)
62 setcc(0);
63 else
64 setcc(SW_C0);
65 if ( denormal_operand() < 0 )
66 {
67#ifdef PECULIAR_486
68 /* This is weird! */
69 if (getsign(st0_ptr) == SIGN_POS)
70 setcc(SW_C3);
71#endif /* PECULIAR_486 */
72 return;
73 }
74 break;
75 case TW_NaN:
76 setcc(SW_C0|SW_C2|SW_C3); /* Operand is not comparable */
77 EXCEPTION(EX_Invalid);
78 break;
79 case TW_Infinity:
80 if (getsign(st0_ptr) == SIGN_POS)
81 setcc(0);
82 else
83 setcc(SW_C0);
84 break;
85 default:
86 setcc(SW_C0|SW_C2|SW_C3); /* Operand is not comparable */
87 EXCEPTION(EX_INTERNAL|0x14);
88 break;
89 }
90 break;
91 case TAG_Empty:
92 setcc(SW_C0|SW_C2|SW_C3);
93 EXCEPTION(EX_StackUnder);
94 break;
95 }
96}
97
98
99static void fxam(FPU_REG *st0_ptr, u_char st0tag)
100{
101 int c = 0;
102 switch (st0tag)
103 {
104 case TAG_Empty:
105 c = SW_C3|SW_C0;
106 break;
107 case TAG_Zero:
108 c = SW_C3;
109 break;
110 case TAG_Valid:
111 c = SW_C2;
112 break;
113 case TAG_Special:
114 switch ( FPU_Special(st0_ptr) )
115 {
116 case TW_Denormal:
117 c = SW_C2|SW_C3; /* Denormal */
118 break;
119 case TW_NaN:
120 /* We also use NaN for unsupported types. */
121 if ( (st0_ptr->sigh & 0x80000000) && (exponent(st0_ptr) == EXP_OVER) )
122 c = SW_C0;
123 break;
124 case TW_Infinity:
125 c = SW_C2|SW_C0;
126 break;
127 }
128 }
129 if ( getsign(st0_ptr) == SIGN_NEG )
130 c |= SW_C1;
131 setcc(c);
132}
133
134
135static FUNC_ST0 const fp_etc_table[] = {
136 fchs, fabs, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal,
137 ftst_, fxam, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal
138};
139
140void FPU_etc(void)
141{
142 (fp_etc_table[FPU_rm])(&st(0), FPU_gettag0());
143}
diff --git a/arch/i386/math-emu/fpu_proto.h b/arch/i386/math-emu/fpu_proto.h
new file mode 100644
index 000000000000..37a8a7fe7e2b
--- /dev/null
+++ b/arch/i386/math-emu/fpu_proto.h
@@ -0,0 +1,140 @@
1#ifndef _FPU_PROTO_H
2#define _FPU_PROTO_H
3
4/* errors.c */
5extern void FPU_illegal(void);
6extern void FPU_printall(void);
7asmlinkage void FPU_exception(int n);
8extern int real_1op_NaN(FPU_REG *a);
9extern int real_2op_NaN(FPU_REG const *b, u_char tagb, int deststnr,
10 FPU_REG const *defaultNaN);
11asmlinkage int arith_invalid(int deststnr);
12asmlinkage int FPU_divide_by_zero(int deststnr, u_char sign);
13extern int set_precision_flag(int flags);
14asmlinkage void set_precision_flag_up(void);
15asmlinkage void set_precision_flag_down(void);
16asmlinkage int denormal_operand(void);
17asmlinkage int arith_overflow(FPU_REG *dest);
18asmlinkage int arith_underflow(FPU_REG *dest);
19extern void FPU_stack_overflow(void);
20extern void FPU_stack_underflow(void);
21extern void FPU_stack_underflow_i(int i);
22extern void FPU_stack_underflow_pop(int i);
23/* fpu_arith.c */
24extern void fadd__(void);
25extern void fmul__(void);
26extern void fsub__(void);
27extern void fsubr_(void);
28extern void fdiv__(void);
29extern void fdivr_(void);
30extern void fadd_i(void);
31extern void fmul_i(void);
32extern void fsubri(void);
33extern void fsub_i(void);
34extern void fdivri(void);
35extern void fdiv_i(void);
36extern void faddp_(void);
37extern void fmulp_(void);
38extern void fsubrp(void);
39extern void fsubp_(void);
40extern void fdivrp(void);
41extern void fdivp_(void);
42/* fpu_aux.c */
43extern void finit(void);
44extern void finit_(void);
45extern void fstsw_(void);
46extern void fp_nop(void);
47extern void fld_i_(void);
48extern void fxch_i(void);
49extern void ffree_(void);
50extern void ffreep(void);
51extern void fst_i_(void);
52extern void fstp_i(void);
53/* fpu_entry.c */
54asmlinkage extern void math_emulate(long arg);
55extern void math_abort(struct info *info, unsigned int signal);
56/* fpu_etc.c */
57extern void FPU_etc(void);
58/* fpu_tags.c */
59extern int FPU_gettag0(void);
60extern int FPU_gettagi(int stnr);
61extern int FPU_gettag(int regnr);
62extern void FPU_settag0(int tag);
63extern void FPU_settagi(int stnr, int tag);
64extern void FPU_settag(int regnr, int tag);
65extern int FPU_Special(FPU_REG const *ptr);
66extern int isNaN(FPU_REG const *ptr);
67extern void FPU_pop(void);
68extern int FPU_empty_i(int stnr);
69extern int FPU_stackoverflow(FPU_REG **st_new_ptr);
70extern void FPU_copy_to_regi(FPU_REG const *r, u_char tag, int stnr);
71extern void FPU_copy_to_reg1(FPU_REG const *r, u_char tag);
72extern void FPU_copy_to_reg0(FPU_REG const *r, u_char tag);
73/* fpu_trig.c */
74extern void FPU_triga(void);
75extern void FPU_trigb(void);
76/* get_address.c */
77extern void __user *FPU_get_address(u_char FPU_modrm, unsigned long *fpu_eip,
78 struct address *addr, fpu_addr_modes addr_modes);
79extern void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip,
80 struct address *addr, fpu_addr_modes addr_modes);
81/* load_store.c */
82extern int FPU_load_store(u_char type, fpu_addr_modes addr_modes,
83 void __user *data_address);
84/* poly_2xm1.c */
85extern int poly_2xm1(u_char sign, FPU_REG *arg, FPU_REG *result);
86/* poly_atan.c */
87extern void poly_atan(FPU_REG *st0_ptr, u_char st0_tag, FPU_REG *st1_ptr,
88 u_char st1_tag);
89/* poly_l2.c */
90extern void poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign);
91extern int poly_l2p1(u_char s0, u_char s1, FPU_REG *r0, FPU_REG *r1,
92 FPU_REG *d);
93/* poly_sin.c */
94extern void poly_sine(FPU_REG *st0_ptr);
95extern void poly_cos(FPU_REG *st0_ptr);
96/* poly_tan.c */
97extern void poly_tan(FPU_REG *st0_ptr);
98/* reg_add_sub.c */
99extern int FPU_add(FPU_REG const *b, u_char tagb, int destrnr, int control_w);
100extern int FPU_sub(int flags, int rm, int control_w);
101/* reg_compare.c */
102extern int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag);
103extern void fcom_st(void);
104extern void fcompst(void);
105extern void fcompp(void);
106extern void fucom_(void);
107extern void fucomp(void);
108extern void fucompp(void);
109/* reg_constant.c */
110extern void fconst(void);
111/* reg_ld_str.c */
112extern int FPU_load_extended(long double __user *s, int stnr);
113extern int FPU_load_double(double __user *dfloat, FPU_REG *loaded_data);
114extern int FPU_load_single(float __user *single, FPU_REG *loaded_data);
115extern int FPU_load_int64(long long __user *_s);
116extern int FPU_load_int32(long __user *_s, FPU_REG *loaded_data);
117extern int FPU_load_int16(short __user *_s, FPU_REG *loaded_data);
118extern int FPU_load_bcd(u_char __user *s);
119extern int FPU_store_extended(FPU_REG *st0_ptr, u_char st0_tag,
120 long double __user *d);
121extern int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, double __user *dfloat);
122extern int FPU_store_single(FPU_REG *st0_ptr, u_char st0_tag, float __user *single);
123extern int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag, long long __user *d);
124extern int FPU_store_int32(FPU_REG *st0_ptr, u_char st0_tag, long __user *d);
125extern int FPU_store_int16(FPU_REG *st0_ptr, u_char st0_tag, short __user *d);
126extern int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d);
127extern int FPU_round_to_int(FPU_REG *r, u_char tag);
128extern u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user *s);
129extern void frstor(fpu_addr_modes addr_modes, u_char __user *data_address);
130extern u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d);
131extern void fsave(fpu_addr_modes addr_modes, u_char __user *data_address);
132extern int FPU_tagof(FPU_REG *ptr);
133/* reg_mul.c */
134extern int FPU_mul(FPU_REG const *b, u_char tagb, int deststnr, int control_w);
135
136extern int FPU_div(int flags, int regrm, int control_w);
137/* reg_convert.c */
138extern int FPU_to_exp16(FPU_REG const *a, FPU_REG *x);
139#endif /* _FPU_PROTO_H */
140
diff --git a/arch/i386/math-emu/fpu_system.h b/arch/i386/math-emu/fpu_system.h
new file mode 100644
index 000000000000..bf26341c8bde
--- /dev/null
+++ b/arch/i386/math-emu/fpu_system.h
@@ -0,0 +1,89 @@
1/*---------------------------------------------------------------------------+
2 | fpu_system.h |
3 | |
4 | Copyright (C) 1992,1994,1997 |
5 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
6 | Australia. E-mail billm@suburbia.net |
7 | |
8 +---------------------------------------------------------------------------*/
9
10#ifndef _FPU_SYSTEM_H
11#define _FPU_SYSTEM_H
12
13/* system dependent definitions */
14
15#include <linux/sched.h>
16#include <linux/kernel.h>
17#include <linux/mm.h>
18
19/* This sets the pointer FPU_info to point to the argument part
20 of the stack frame of math_emulate() */
21#define SETUP_DATA_AREA(arg) FPU_info = (struct info *) &arg
22
23/* s is always from a cpu register, and the cpu does bounds checking
24 * during register load --> no further bounds checks needed */
25#define LDT_DESCRIPTOR(s) (((struct desc_struct *)current->mm->context.ldt)[(s) >> 3])
26#define SEG_D_SIZE(x) ((x).b & (3 << 21))
27#define SEG_G_BIT(x) ((x).b & (1 << 23))
28#define SEG_GRANULARITY(x) (((x).b & (1 << 23)) ? 4096 : 1)
29#define SEG_286_MODE(x) ((x).b & ( 0xff000000 | 0xf0000 | (1 << 23)))
30#define SEG_BASE_ADDR(s) (((s).b & 0xff000000) \
31 | (((s).b & 0xff) << 16) | ((s).a >> 16))
32#define SEG_LIMIT(s) (((s).b & 0xff0000) | ((s).a & 0xffff))
33#define SEG_EXECUTE_ONLY(s) (((s).b & ((1 << 11) | (1 << 9))) == (1 << 11))
34#define SEG_WRITE_PERM(s) (((s).b & ((1 << 11) | (1 << 9))) == (1 << 9))
35#define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \
36 == (1 << 10))
37
38#define I387 (current->thread.i387)
39#define FPU_info (I387.soft.info)
40
41#define FPU_CS (*(unsigned short *) &(FPU_info->___cs))
42#define FPU_SS (*(unsigned short *) &(FPU_info->___ss))
43#define FPU_DS (*(unsigned short *) &(FPU_info->___ds))
44#define FPU_EAX (FPU_info->___eax)
45#define FPU_EFLAGS (FPU_info->___eflags)
46#define FPU_EIP (FPU_info->___eip)
47#define FPU_ORIG_EIP (FPU_info->___orig_eip)
48
49#define FPU_lookahead (I387.soft.lookahead)
50
51/* nz if ip_offset and cs_selector are not to be set for the current
52 instruction. */
53#define no_ip_update (*(u_char *)&(I387.soft.no_update))
54#define FPU_rm (*(u_char *)&(I387.soft.rm))
55
56/* Number of bytes of data which can be legally accessed by the current
57 instruction. This only needs to hold a number <= 108, so a byte will do. */
58#define access_limit (*(u_char *)&(I387.soft.alimit))
59
60#define partial_status (I387.soft.swd)
61#define control_word (I387.soft.cwd)
62#define fpu_tag_word (I387.soft.twd)
63#define registers (I387.soft.st_space)
64#define top (I387.soft.ftop)
65
66#define instruction_address (*(struct address *)&I387.soft.fip)
67#define operand_address (*(struct address *)&I387.soft.foo)
68
69#define FPU_access_ok(x,y,z) if ( !access_ok(x,y,z) ) \
70 math_abort(FPU_info,SIGSEGV)
71
72#undef FPU_IGNORE_CODE_SEGV
73#ifdef FPU_IGNORE_CODE_SEGV
74/* access_ok() is very expensive, and causes the emulator to run
75 about 20% slower if applied to the code. Anyway, errors due to bad
76 code addresses should be much rarer than errors due to bad data
77 addresses. */
78#define FPU_code_access_ok(z)
79#else
80/* A simpler test than access_ok() can probably be done for
81 FPU_code_access_ok() because the only possible error is to step
82 past the upper boundary of a legal code area. */
83#define FPU_code_access_ok(z) FPU_access_ok(VERIFY_READ,(void __user *)FPU_EIP,z)
84#endif
85
86#define FPU_get_user(x,y) get_user((x),(y))
87#define FPU_put_user(x,y) put_user((x),(y))
88
89#endif
diff --git a/arch/i386/math-emu/fpu_tags.c b/arch/i386/math-emu/fpu_tags.c
new file mode 100644
index 000000000000..cb436fe20e4c
--- /dev/null
+++ b/arch/i386/math-emu/fpu_tags.c
@@ -0,0 +1,127 @@
1/*---------------------------------------------------------------------------+
2 | fpu_tags.c |
3 | |
4 | Set FPU register tags. |
5 | |
6 | Copyright (C) 1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
8 | E-mail billm@jacobi.maths.monash.edu.au |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13#include "fpu_emu.h"
14#include "fpu_system.h"
15#include "exception.h"
16
17
18void FPU_pop(void)
19{
20 fpu_tag_word |= 3 << ((top & 7)*2);
21 top++;
22}
23
24
25int FPU_gettag0(void)
26{
27 return (fpu_tag_word >> ((top & 7)*2)) & 3;
28}
29
30
31int FPU_gettagi(int stnr)
32{
33 return (fpu_tag_word >> (((top+stnr) & 7)*2)) & 3;
34}
35
36
37int FPU_gettag(int regnr)
38{
39 return (fpu_tag_word >> ((regnr & 7)*2)) & 3;
40}
41
42
43void FPU_settag0(int tag)
44{
45 int regnr = top;
46 regnr &= 7;
47 fpu_tag_word &= ~(3 << (regnr*2));
48 fpu_tag_word |= (tag & 3) << (regnr*2);
49}
50
51
52void FPU_settagi(int stnr, int tag)
53{
54 int regnr = stnr+top;
55 regnr &= 7;
56 fpu_tag_word &= ~(3 << (regnr*2));
57 fpu_tag_word |= (tag & 3) << (regnr*2);
58}
59
60
61void FPU_settag(int regnr, int tag)
62{
63 regnr &= 7;
64 fpu_tag_word &= ~(3 << (regnr*2));
65 fpu_tag_word |= (tag & 3) << (regnr*2);
66}
67
68
69int FPU_Special(FPU_REG const *ptr)
70{
71 int exp = exponent(ptr);
72
73 if ( exp == EXP_BIAS+EXP_UNDER )
74 return TW_Denormal;
75 else if ( exp != EXP_BIAS+EXP_OVER )
76 return TW_NaN;
77 else if ( (ptr->sigh == 0x80000000) && (ptr->sigl == 0) )
78 return TW_Infinity;
79 return TW_NaN;
80}
81
82
83int isNaN(FPU_REG const *ptr)
84{
85 return ( (exponent(ptr) == EXP_BIAS+EXP_OVER)
86 && !((ptr->sigh == 0x80000000) && (ptr->sigl == 0)) );
87}
88
89
90int FPU_empty_i(int stnr)
91{
92 int regnr = (top+stnr) & 7;
93
94 return ((fpu_tag_word >> (regnr*2)) & 3) == TAG_Empty;
95}
96
97
98int FPU_stackoverflow(FPU_REG **st_new_ptr)
99{
100 *st_new_ptr = &st(-1);
101
102 return ((fpu_tag_word >> (((top - 1) & 7)*2)) & 3) != TAG_Empty;
103}
104
105
106void FPU_copy_to_regi(FPU_REG const *r, u_char tag, int stnr)
107{
108 reg_copy(r, &st(stnr));
109 FPU_settagi(stnr, tag);
110}
111
112void FPU_copy_to_reg1(FPU_REG const *r, u_char tag)
113{
114 reg_copy(r, &st(1));
115 FPU_settagi(1, tag);
116}
117
118void FPU_copy_to_reg0(FPU_REG const *r, u_char tag)
119{
120 int regnr = top;
121 regnr &= 7;
122
123 reg_copy(r, &st(0));
124
125 fpu_tag_word &= ~(3 << (regnr*2));
126 fpu_tag_word |= (tag & 3) << (regnr*2);
127}
diff --git a/arch/i386/math-emu/fpu_trig.c b/arch/i386/math-emu/fpu_trig.c
new file mode 100644
index 000000000000..403cbde1d425
--- /dev/null
+++ b/arch/i386/math-emu/fpu_trig.c
@@ -0,0 +1,1845 @@
1/*---------------------------------------------------------------------------+
2 | fpu_trig.c |
3 | |
4 | Implementation of the FPU "transcendental" functions. |
5 | |
6 | Copyright (C) 1992,1993,1994,1997,1999 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
8 | Australia. E-mail billm@melbpc.org.au |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13#include "fpu_system.h"
14#include "exception.h"
15#include "fpu_emu.h"
16#include "status_w.h"
17#include "control_w.h"
18#include "reg_constant.h"
19
20static void rem_kernel(unsigned long long st0, unsigned long long *y,
21 unsigned long long st1,
22 unsigned long long q, int n);
23
24#define BETTER_THAN_486
25
26#define FCOS 4
27
28/* Used only by fptan, fsin, fcos, and fsincos. */
29/* This routine produces very accurate results, similar to
30 using a value of pi with more than 128 bits precision. */
31/* Limited measurements show no results worse than 64 bit precision
32 except for the results for arguments close to 2^63, where the
33 precision of the result sometimes degrades to about 63.9 bits */
34static int trig_arg(FPU_REG *st0_ptr, int even)
35{
36 FPU_REG tmp;
37 u_char tmptag;
38 unsigned long long q;
39 int old_cw = control_word, saved_status = partial_status;
40 int tag, st0_tag = TAG_Valid;
41
42 if ( exponent(st0_ptr) >= 63 )
43 {
44 partial_status |= SW_C2; /* Reduction incomplete. */
45 return -1;
46 }
47
48 control_word &= ~CW_RC;
49 control_word |= RC_CHOP;
50
51 setpositive(st0_ptr);
52 tag = FPU_u_div(st0_ptr, &CONST_PI2, &tmp, PR_64_BITS | RC_CHOP | 0x3f,
53 SIGN_POS);
54
55 FPU_round_to_int(&tmp, tag); /* Fortunately, this can't overflow
56 to 2^64 */
57 q = significand(&tmp);
58 if ( q )
59 {
60 rem_kernel(significand(st0_ptr),
61 &significand(&tmp),
62 significand(&CONST_PI2),
63 q, exponent(st0_ptr) - exponent(&CONST_PI2));
64 setexponent16(&tmp, exponent(&CONST_PI2));
65 st0_tag = FPU_normalize(&tmp);
66 FPU_copy_to_reg0(&tmp, st0_tag);
67 }
68
69 if ( (even && !(q & 1)) || (!even && (q & 1)) )
70 {
71 st0_tag = FPU_sub(REV|LOADED|TAG_Valid, (int)&CONST_PI2, FULL_PRECISION);
72
73#ifdef BETTER_THAN_486
74 /* So far, the results are exact but based upon a 64 bit
75 precision approximation to pi/2. The technique used
76 now is equivalent to using an approximation to pi/2 which
77 is accurate to about 128 bits. */
78 if ( (exponent(st0_ptr) <= exponent(&CONST_PI2extra) + 64) || (q > 1) )
79 {
80 /* This code gives the effect of having pi/2 to better than
81 128 bits precision. */
82
83 significand(&tmp) = q + 1;
84 setexponent16(&tmp, 63);
85 FPU_normalize(&tmp);
86 tmptag =
87 FPU_u_mul(&CONST_PI2extra, &tmp, &tmp, FULL_PRECISION, SIGN_POS,
88 exponent(&CONST_PI2extra) + exponent(&tmp));
89 setsign(&tmp, getsign(&CONST_PI2extra));
90 st0_tag = FPU_add(&tmp, tmptag, 0, FULL_PRECISION);
91 if ( signnegative(st0_ptr) )
92 {
93 /* CONST_PI2extra is negative, so the result of the addition
94 can be negative. This means that the argument is actually
95 in a different quadrant. The correction is always < pi/2,
96 so it can't overflow into yet another quadrant. */
97 setpositive(st0_ptr);
98 q++;
99 }
100 }
101#endif /* BETTER_THAN_486 */
102 }
103#ifdef BETTER_THAN_486
104 else
105 {
106 /* So far, the results are exact but based upon a 64 bit
107 precision approximation to pi/2. The technique used
108 now is equivalent to using an approximation to pi/2 which
109 is accurate to about 128 bits. */
110 if ( ((q > 0) && (exponent(st0_ptr) <= exponent(&CONST_PI2extra) + 64))
111 || (q > 1) )
112 {
113 /* This code gives the effect of having p/2 to better than
114 128 bits precision. */
115
116 significand(&tmp) = q;
117 setexponent16(&tmp, 63);
118 FPU_normalize(&tmp); /* This must return TAG_Valid */
119 tmptag = FPU_u_mul(&CONST_PI2extra, &tmp, &tmp, FULL_PRECISION,
120 SIGN_POS,
121 exponent(&CONST_PI2extra) + exponent(&tmp));
122 setsign(&tmp, getsign(&CONST_PI2extra));
123 st0_tag = FPU_sub(LOADED|(tmptag & 0x0f), (int)&tmp,
124 FULL_PRECISION);
125 if ( (exponent(st0_ptr) == exponent(&CONST_PI2)) &&
126 ((st0_ptr->sigh > CONST_PI2.sigh)
127 || ((st0_ptr->sigh == CONST_PI2.sigh)
128 && (st0_ptr->sigl > CONST_PI2.sigl))) )
129 {
130 /* CONST_PI2extra is negative, so the result of the
131 subtraction can be larger than pi/2. This means
132 that the argument is actually in a different quadrant.
133 The correction is always < pi/2, so it can't overflow
134 into yet another quadrant. */
135 st0_tag = FPU_sub(REV|LOADED|TAG_Valid, (int)&CONST_PI2,
136 FULL_PRECISION);
137 q++;
138 }
139 }
140 }
141#endif /* BETTER_THAN_486 */
142
143 FPU_settag0(st0_tag);
144 control_word = old_cw;
145 partial_status = saved_status & ~SW_C2; /* Reduction complete. */
146
147 return (q & 3) | even;
148}
149
150
151/* Convert a long to register */
152static void convert_l2reg(long const *arg, int deststnr)
153{
154 int tag;
155 long num = *arg;
156 u_char sign;
157 FPU_REG *dest = &st(deststnr);
158
159 if (num == 0)
160 {
161 FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
162 return;
163 }
164
165 if (num > 0)
166 { sign = SIGN_POS; }
167 else
168 { num = -num; sign = SIGN_NEG; }
169
170 dest->sigh = num;
171 dest->sigl = 0;
172 setexponent16(dest, 31);
173 tag = FPU_normalize(dest);
174 FPU_settagi(deststnr, tag);
175 setsign(dest, sign);
176 return;
177}
178
179
180static void single_arg_error(FPU_REG *st0_ptr, u_char st0_tag)
181{
182 if ( st0_tag == TAG_Empty )
183 FPU_stack_underflow(); /* Puts a QNaN in st(0) */
184 else if ( st0_tag == TW_NaN )
185 real_1op_NaN(st0_ptr); /* return with a NaN in st(0) */
186#ifdef PARANOID
187 else
188 EXCEPTION(EX_INTERNAL|0x0112);
189#endif /* PARANOID */
190}
191
192
193static void single_arg_2_error(FPU_REG *st0_ptr, u_char st0_tag)
194{
195 int isNaN;
196
197 switch ( st0_tag )
198 {
199 case TW_NaN:
200 isNaN = (exponent(st0_ptr) == EXP_OVER) && (st0_ptr->sigh & 0x80000000);
201 if ( isNaN && !(st0_ptr->sigh & 0x40000000) ) /* Signaling ? */
202 {
203 EXCEPTION(EX_Invalid);
204 if ( control_word & CW_Invalid )
205 {
206 /* The masked response */
207 /* Convert to a QNaN */
208 st0_ptr->sigh |= 0x40000000;
209 push();
210 FPU_copy_to_reg0(st0_ptr, TAG_Special);
211 }
212 }
213 else if ( isNaN )
214 {
215 /* A QNaN */
216 push();
217 FPU_copy_to_reg0(st0_ptr, TAG_Special);
218 }
219 else
220 {
221 /* pseudoNaN or other unsupported */
222 EXCEPTION(EX_Invalid);
223 if ( control_word & CW_Invalid )
224 {
225 /* The masked response */
226 FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
227 push();
228 FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
229 }
230 }
231 break; /* return with a NaN in st(0) */
232#ifdef PARANOID
233 default:
234 EXCEPTION(EX_INTERNAL|0x0112);
235#endif /* PARANOID */
236 }
237}
238
239
240/*---------------------------------------------------------------------------*/
241
242static void f2xm1(FPU_REG *st0_ptr, u_char tag)
243{
244 FPU_REG a;
245
246 clear_C1();
247
248 if ( tag == TAG_Valid )
249 {
250 /* For an 80486 FPU, the result is undefined if the arg is >= 1.0 */
251 if ( exponent(st0_ptr) < 0 )
252 {
253 denormal_arg:
254
255 FPU_to_exp16(st0_ptr, &a);
256
257 /* poly_2xm1(x) requires 0 < st(0) < 1. */
258 poly_2xm1(getsign(st0_ptr), &a, st0_ptr);
259 }
260 set_precision_flag_up(); /* 80486 appears to always do this */
261 return;
262 }
263
264 if ( tag == TAG_Zero )
265 return;
266
267 if ( tag == TAG_Special )
268 tag = FPU_Special(st0_ptr);
269
270 switch ( tag )
271 {
272 case TW_Denormal:
273 if ( denormal_operand() < 0 )
274 return;
275 goto denormal_arg;
276 case TW_Infinity:
277 if ( signnegative(st0_ptr) )
278 {
279 /* -infinity gives -1 (p16-10) */
280 FPU_copy_to_reg0(&CONST_1, TAG_Valid);
281 setnegative(st0_ptr);
282 }
283 return;
284 default:
285 single_arg_error(st0_ptr, tag);
286 }
287}
288
289
290static void fptan(FPU_REG *st0_ptr, u_char st0_tag)
291{
292 FPU_REG *st_new_ptr;
293 int q;
294 u_char arg_sign = getsign(st0_ptr);
295
296 /* Stack underflow has higher priority */
297 if ( st0_tag == TAG_Empty )
298 {
299 FPU_stack_underflow(); /* Puts a QNaN in st(0) */
300 if ( control_word & CW_Invalid )
301 {
302 st_new_ptr = &st(-1);
303 push();
304 FPU_stack_underflow(); /* Puts a QNaN in the new st(0) */
305 }
306 return;
307 }
308
309 if ( STACK_OVERFLOW )
310 { FPU_stack_overflow(); return; }
311
312 if ( st0_tag == TAG_Valid )
313 {
314 if ( exponent(st0_ptr) > -40 )
315 {
316 if ( (q = trig_arg(st0_ptr, 0)) == -1 )
317 {
318 /* Operand is out of range */
319 return;
320 }
321
322 poly_tan(st0_ptr);
323 setsign(st0_ptr, (q & 1) ^ (arg_sign != 0));
324 set_precision_flag_up(); /* We do not really know if up or down */
325 }
326 else
327 {
328 /* For a small arg, the result == the argument */
329 /* Underflow may happen */
330
331 denormal_arg:
332
333 FPU_to_exp16(st0_ptr, st0_ptr);
334
335 st0_tag = FPU_round(st0_ptr, 1, 0, FULL_PRECISION, arg_sign);
336 FPU_settag0(st0_tag);
337 }
338 push();
339 FPU_copy_to_reg0(&CONST_1, TAG_Valid);
340 return;
341 }
342
343 if ( st0_tag == TAG_Zero )
344 {
345 push();
346 FPU_copy_to_reg0(&CONST_1, TAG_Valid);
347 setcc(0);
348 return;
349 }
350
351 if ( st0_tag == TAG_Special )
352 st0_tag = FPU_Special(st0_ptr);
353
354 if ( st0_tag == TW_Denormal )
355 {
356 if ( denormal_operand() < 0 )
357 return;
358
359 goto denormal_arg;
360 }
361
362 if ( st0_tag == TW_Infinity )
363 {
364 /* The 80486 treats infinity as an invalid operand */
365 if ( arith_invalid(0) >= 0 )
366 {
367 st_new_ptr = &st(-1);
368 push();
369 arith_invalid(0);
370 }
371 return;
372 }
373
374 single_arg_2_error(st0_ptr, st0_tag);
375}
376
377
378static void fxtract(FPU_REG *st0_ptr, u_char st0_tag)
379{
380 FPU_REG *st_new_ptr;
381 u_char sign;
382 register FPU_REG *st1_ptr = st0_ptr; /* anticipate */
383
384 if ( STACK_OVERFLOW )
385 { FPU_stack_overflow(); return; }
386
387 clear_C1();
388
389 if ( st0_tag == TAG_Valid )
390 {
391 long e;
392
393 push();
394 sign = getsign(st1_ptr);
395 reg_copy(st1_ptr, st_new_ptr);
396 setexponent16(st_new_ptr, exponent(st_new_ptr));
397
398 denormal_arg:
399
400 e = exponent16(st_new_ptr);
401 convert_l2reg(&e, 1);
402 setexponentpos(st_new_ptr, 0);
403 setsign(st_new_ptr, sign);
404 FPU_settag0(TAG_Valid); /* Needed if arg was a denormal */
405 return;
406 }
407 else if ( st0_tag == TAG_Zero )
408 {
409 sign = getsign(st0_ptr);
410
411 if ( FPU_divide_by_zero(0, SIGN_NEG) < 0 )
412 return;
413
414 push();
415 FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
416 setsign(st_new_ptr, sign);
417 return;
418 }
419
420 if ( st0_tag == TAG_Special )
421 st0_tag = FPU_Special(st0_ptr);
422
423 if ( st0_tag == TW_Denormal )
424 {
425 if (denormal_operand() < 0 )
426 return;
427
428 push();
429 sign = getsign(st1_ptr);
430 FPU_to_exp16(st1_ptr, st_new_ptr);
431 goto denormal_arg;
432 }
433 else if ( st0_tag == TW_Infinity )
434 {
435 sign = getsign(st0_ptr);
436 setpositive(st0_ptr);
437 push();
438 FPU_copy_to_reg0(&CONST_INF, TAG_Special);
439 setsign(st_new_ptr, sign);
440 return;
441 }
442 else if ( st0_tag == TW_NaN )
443 {
444 if ( real_1op_NaN(st0_ptr) < 0 )
445 return;
446
447 push();
448 FPU_copy_to_reg0(st0_ptr, TAG_Special);
449 return;
450 }
451 else if ( st0_tag == TAG_Empty )
452 {
453 /* Is this the correct behaviour? */
454 if ( control_word & EX_Invalid )
455 {
456 FPU_stack_underflow();
457 push();
458 FPU_stack_underflow();
459 }
460 else
461 EXCEPTION(EX_StackUnder);
462 }
463#ifdef PARANOID
464 else
465 EXCEPTION(EX_INTERNAL | 0x119);
466#endif /* PARANOID */
467}
468
469
470static void fdecstp(void)
471{
472 clear_C1();
473 top--;
474}
475
476static void fincstp(void)
477{
478 clear_C1();
479 top++;
480}
481
482
483static void fsqrt_(FPU_REG *st0_ptr, u_char st0_tag)
484{
485 int expon;
486
487 clear_C1();
488
489 if ( st0_tag == TAG_Valid )
490 {
491 u_char tag;
492
493 if (signnegative(st0_ptr))
494 {
495 arith_invalid(0); /* sqrt(negative) is invalid */
496 return;
497 }
498
499 /* make st(0) in [1.0 .. 4.0) */
500 expon = exponent(st0_ptr);
501
502 denormal_arg:
503
504 setexponent16(st0_ptr, (expon & 1));
505
506 /* Do the computation, the sign of the result will be positive. */
507 tag = wm_sqrt(st0_ptr, 0, 0, control_word, SIGN_POS);
508 addexponent(st0_ptr, expon >> 1);
509 FPU_settag0(tag);
510 return;
511 }
512
513 if ( st0_tag == TAG_Zero )
514 return;
515
516 if ( st0_tag == TAG_Special )
517 st0_tag = FPU_Special(st0_ptr);
518
519 if ( st0_tag == TW_Infinity )
520 {
521 if ( signnegative(st0_ptr) )
522 arith_invalid(0); /* sqrt(-Infinity) is invalid */
523 return;
524 }
525 else if ( st0_tag == TW_Denormal )
526 {
527 if (signnegative(st0_ptr))
528 {
529 arith_invalid(0); /* sqrt(negative) is invalid */
530 return;
531 }
532
533 if ( denormal_operand() < 0 )
534 return;
535
536 FPU_to_exp16(st0_ptr, st0_ptr);
537
538 expon = exponent16(st0_ptr);
539
540 goto denormal_arg;
541 }
542
543 single_arg_error(st0_ptr, st0_tag);
544
545}
546
547
548static void frndint_(FPU_REG *st0_ptr, u_char st0_tag)
549{
550 int flags, tag;
551
552 if ( st0_tag == TAG_Valid )
553 {
554 u_char sign;
555
556 denormal_arg:
557
558 sign = getsign(st0_ptr);
559
560 if (exponent(st0_ptr) > 63)
561 return;
562
563 if ( st0_tag == TW_Denormal )
564 {
565 if (denormal_operand() < 0 )
566 return;
567 }
568
569 /* Fortunately, this can't overflow to 2^64 */
570 if ( (flags = FPU_round_to_int(st0_ptr, st0_tag)) )
571 set_precision_flag(flags);
572
573 setexponent16(st0_ptr, 63);
574 tag = FPU_normalize(st0_ptr);
575 setsign(st0_ptr, sign);
576 FPU_settag0(tag);
577 return;
578 }
579
580 if ( st0_tag == TAG_Zero )
581 return;
582
583 if ( st0_tag == TAG_Special )
584 st0_tag = FPU_Special(st0_ptr);
585
586 if ( st0_tag == TW_Denormal )
587 goto denormal_arg;
588 else if ( st0_tag == TW_Infinity )
589 return;
590 else
591 single_arg_error(st0_ptr, st0_tag);
592}
593
594
595static int fsin(FPU_REG *st0_ptr, u_char tag)
596{
597 u_char arg_sign = getsign(st0_ptr);
598
599 if ( tag == TAG_Valid )
600 {
601 int q;
602
603 if ( exponent(st0_ptr) > -40 )
604 {
605 if ( (q = trig_arg(st0_ptr, 0)) == -1 )
606 {
607 /* Operand is out of range */
608 return 1;
609 }
610
611 poly_sine(st0_ptr);
612
613 if (q & 2)
614 changesign(st0_ptr);
615
616 setsign(st0_ptr, getsign(st0_ptr) ^ arg_sign);
617
618 /* We do not really know if up or down */
619 set_precision_flag_up();
620 return 0;
621 }
622 else
623 {
624 /* For a small arg, the result == the argument */
625 set_precision_flag_up(); /* Must be up. */
626 return 0;
627 }
628 }
629
630 if ( tag == TAG_Zero )
631 {
632 setcc(0);
633 return 0;
634 }
635
636 if ( tag == TAG_Special )
637 tag = FPU_Special(st0_ptr);
638
639 if ( tag == TW_Denormal )
640 {
641 if ( denormal_operand() < 0 )
642 return 1;
643
644 /* For a small arg, the result == the argument */
645 /* Underflow may happen */
646 FPU_to_exp16(st0_ptr, st0_ptr);
647
648 tag = FPU_round(st0_ptr, 1, 0, FULL_PRECISION, arg_sign);
649
650 FPU_settag0(tag);
651
652 return 0;
653 }
654 else if ( tag == TW_Infinity )
655 {
656 /* The 80486 treats infinity as an invalid operand */
657 arith_invalid(0);
658 return 1;
659 }
660 else
661 {
662 single_arg_error(st0_ptr, tag);
663 return 1;
664 }
665}
666
667
668static int f_cos(FPU_REG *st0_ptr, u_char tag)
669{
670 u_char st0_sign;
671
672 st0_sign = getsign(st0_ptr);
673
674 if ( tag == TAG_Valid )
675 {
676 int q;
677
678 if ( exponent(st0_ptr) > -40 )
679 {
680 if ( (exponent(st0_ptr) < 0)
681 || ((exponent(st0_ptr) == 0)
682 && (significand(st0_ptr) <= 0xc90fdaa22168c234LL)) )
683 {
684 poly_cos(st0_ptr);
685
686 /* We do not really know if up or down */
687 set_precision_flag_down();
688
689 return 0;
690 }
691 else if ( (q = trig_arg(st0_ptr, FCOS)) != -1 )
692 {
693 poly_sine(st0_ptr);
694
695 if ((q+1) & 2)
696 changesign(st0_ptr);
697
698 /* We do not really know if up or down */
699 set_precision_flag_down();
700
701 return 0;
702 }
703 else
704 {
705 /* Operand is out of range */
706 return 1;
707 }
708 }
709 else
710 {
711 denormal_arg:
712
713 setcc(0);
714 FPU_copy_to_reg0(&CONST_1, TAG_Valid);
715#ifdef PECULIAR_486
716 set_precision_flag_down(); /* 80486 appears to do this. */
717#else
718 set_precision_flag_up(); /* Must be up. */
719#endif /* PECULIAR_486 */
720 return 0;
721 }
722 }
723 else if ( tag == TAG_Zero )
724 {
725 FPU_copy_to_reg0(&CONST_1, TAG_Valid);
726 setcc(0);
727 return 0;
728 }
729
730 if ( tag == TAG_Special )
731 tag = FPU_Special(st0_ptr);
732
733 if ( tag == TW_Denormal )
734 {
735 if ( denormal_operand() < 0 )
736 return 1;
737
738 goto denormal_arg;
739 }
740 else if ( tag == TW_Infinity )
741 {
742 /* The 80486 treats infinity as an invalid operand */
743 arith_invalid(0);
744 return 1;
745 }
746 else
747 {
748 single_arg_error(st0_ptr, tag); /* requires st0_ptr == &st(0) */
749 return 1;
750 }
751}
752
753
754static void fcos(FPU_REG *st0_ptr, u_char st0_tag)
755{
756 f_cos(st0_ptr, st0_tag);
757}
758
759
760static void fsincos(FPU_REG *st0_ptr, u_char st0_tag)
761{
762 FPU_REG *st_new_ptr;
763 FPU_REG arg;
764 u_char tag;
765
766 /* Stack underflow has higher priority */
767 if ( st0_tag == TAG_Empty )
768 {
769 FPU_stack_underflow(); /* Puts a QNaN in st(0) */
770 if ( control_word & CW_Invalid )
771 {
772 st_new_ptr = &st(-1);
773 push();
774 FPU_stack_underflow(); /* Puts a QNaN in the new st(0) */
775 }
776 return;
777 }
778
779 if ( STACK_OVERFLOW )
780 { FPU_stack_overflow(); return; }
781
782 if ( st0_tag == TAG_Special )
783 tag = FPU_Special(st0_ptr);
784 else
785 tag = st0_tag;
786
787 if ( tag == TW_NaN )
788 {
789 single_arg_2_error(st0_ptr, TW_NaN);
790 return;
791 }
792 else if ( tag == TW_Infinity )
793 {
794 /* The 80486 treats infinity as an invalid operand */
795 if ( arith_invalid(0) >= 0 )
796 {
797 /* Masked response */
798 push();
799 arith_invalid(0);
800 }
801 return;
802 }
803
804 reg_copy(st0_ptr, &arg);
805 if ( !fsin(st0_ptr, st0_tag) )
806 {
807 push();
808 FPU_copy_to_reg0(&arg, st0_tag);
809 f_cos(&st(0), st0_tag);
810 }
811 else
812 {
813 /* An error, so restore st(0) */
814 FPU_copy_to_reg0(&arg, st0_tag);
815 }
816}
817
818
819/*---------------------------------------------------------------------------*/
820/* The following all require two arguments: st(0) and st(1) */
821
822/* A lean, mean kernel for the fprem instructions. This relies upon
823 the division and rounding to an integer in do_fprem giving an
824 exact result. Because of this, rem_kernel() needs to deal only with
825 the least significant 64 bits, the more significant bits of the
826 result must be zero.
827 */
828static void rem_kernel(unsigned long long st0, unsigned long long *y,
829 unsigned long long st1,
830 unsigned long long q, int n)
831{
832 int dummy;
833 unsigned long long x;
834
835 x = st0 << n;
836
837 /* Do the required multiplication and subtraction in the one operation */
838
839 /* lsw x -= lsw st1 * lsw q */
840 asm volatile ("mull %4; subl %%eax,%0; sbbl %%edx,%1"
841 :"=m" (((unsigned *)&x)[0]), "=m" (((unsigned *)&x)[1]),
842 "=a" (dummy)
843 :"2" (((unsigned *)&st1)[0]), "m" (((unsigned *)&q)[0])
844 :"%dx");
845 /* msw x -= msw st1 * lsw q */
846 asm volatile ("mull %3; subl %%eax,%0"
847 :"=m" (((unsigned *)&x)[1]), "=a" (dummy)
848 :"1" (((unsigned *)&st1)[1]), "m" (((unsigned *)&q)[0])
849 :"%dx");
850 /* msw x -= lsw st1 * msw q */
851 asm volatile ("mull %3; subl %%eax,%0"
852 :"=m" (((unsigned *)&x)[1]), "=a" (dummy)
853 :"1" (((unsigned *)&st1)[0]), "m" (((unsigned *)&q)[1])
854 :"%dx");
855
856 *y = x;
857}
858
859
860/* Remainder of st(0) / st(1) */
861/* This routine produces exact results, i.e. there is never any
862 rounding or truncation, etc of the result. */
863static void do_fprem(FPU_REG *st0_ptr, u_char st0_tag, int round)
864{
865 FPU_REG *st1_ptr = &st(1);
866 u_char st1_tag = FPU_gettagi(1);
867
868 if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) )
869 {
870 FPU_REG tmp, st0, st1;
871 u_char st0_sign, st1_sign;
872 u_char tmptag;
873 int tag;
874 int old_cw;
875 int expdif;
876 long long q;
877 unsigned short saved_status;
878 int cc;
879
880 fprem_valid:
881 /* Convert registers for internal use. */
882 st0_sign = FPU_to_exp16(st0_ptr, &st0);
883 st1_sign = FPU_to_exp16(st1_ptr, &st1);
884 expdif = exponent16(&st0) - exponent16(&st1);
885
886 old_cw = control_word;
887 cc = 0;
888
889 /* We want the status following the denorm tests, but don't want
890 the status changed by the arithmetic operations. */
891 saved_status = partial_status;
892 control_word &= ~CW_RC;
893 control_word |= RC_CHOP;
894
895 if ( expdif < 64 )
896 {
897 /* This should be the most common case */
898
899 if ( expdif > -2 )
900 {
901 u_char sign = st0_sign ^ st1_sign;
902 tag = FPU_u_div(&st0, &st1, &tmp,
903 PR_64_BITS | RC_CHOP | 0x3f,
904 sign);
905 setsign(&tmp, sign);
906
907 if ( exponent(&tmp) >= 0 )
908 {
909 FPU_round_to_int(&tmp, tag); /* Fortunately, this can't
910 overflow to 2^64 */
911 q = significand(&tmp);
912
913 rem_kernel(significand(&st0),
914 &significand(&tmp),
915 significand(&st1),
916 q, expdif);
917
918 setexponent16(&tmp, exponent16(&st1));
919 }
920 else
921 {
922 reg_copy(&st0, &tmp);
923 q = 0;
924 }
925
926 if ( (round == RC_RND) && (tmp.sigh & 0xc0000000) )
927 {
928 /* We may need to subtract st(1) once more,
929 to get a result <= 1/2 of st(1). */
930 unsigned long long x;
931 expdif = exponent16(&st1) - exponent16(&tmp);
932 if ( expdif <= 1 )
933 {
934 if ( expdif == 0 )
935 x = significand(&st1) - significand(&tmp);
936 else /* expdif is 1 */
937 x = (significand(&st1) << 1) - significand(&tmp);
938 if ( (x < significand(&tmp)) ||
939 /* or equi-distant (from 0 & st(1)) and q is odd */
940 ((x == significand(&tmp)) && (q & 1) ) )
941 {
942 st0_sign = ! st0_sign;
943 significand(&tmp) = x;
944 q++;
945 }
946 }
947 }
948
949 if (q & 4) cc |= SW_C0;
950 if (q & 2) cc |= SW_C3;
951 if (q & 1) cc |= SW_C1;
952 }
953 else
954 {
955 control_word = old_cw;
956 setcc(0);
957 return;
958 }
959 }
960 else
961 {
962 /* There is a large exponent difference ( >= 64 ) */
963 /* To make much sense, the code in this section should
964 be done at high precision. */
965 int exp_1, N;
966 u_char sign;
967
968 /* prevent overflow here */
969 /* N is 'a number between 32 and 63' (p26-113) */
970 reg_copy(&st0, &tmp);
971 tmptag = st0_tag;
972 N = (expdif & 0x0000001f) + 32; /* This choice gives results
973 identical to an AMD 486 */
974 setexponent16(&tmp, N);
975 exp_1 = exponent16(&st1);
976 setexponent16(&st1, 0);
977 expdif -= N;
978
979 sign = getsign(&tmp) ^ st1_sign;
980 tag = FPU_u_div(&tmp, &st1, &tmp, PR_64_BITS | RC_CHOP | 0x3f,
981 sign);
982 setsign(&tmp, sign);
983
984 FPU_round_to_int(&tmp, tag); /* Fortunately, this can't
985 overflow to 2^64 */
986
987 rem_kernel(significand(&st0),
988 &significand(&tmp),
989 significand(&st1),
990 significand(&tmp),
991 exponent(&tmp)
992 );
993 setexponent16(&tmp, exp_1 + expdif);
994
995 /* It is possible for the operation to be complete here.
996 What does the IEEE standard say? The Intel 80486 manual
997 implies that the operation will never be completed at this
998 point, and the behaviour of a real 80486 confirms this.
999 */
1000 if ( !(tmp.sigh | tmp.sigl) )
1001 {
1002 /* The result is zero */
1003 control_word = old_cw;
1004 partial_status = saved_status;
1005 FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
1006 setsign(&st0, st0_sign);
1007#ifdef PECULIAR_486
1008 setcc(SW_C2);
1009#else
1010 setcc(0);
1011#endif /* PECULIAR_486 */
1012 return;
1013 }
1014 cc = SW_C2;
1015 }
1016
1017 control_word = old_cw;
1018 partial_status = saved_status;
1019 tag = FPU_normalize_nuo(&tmp);
1020 reg_copy(&tmp, st0_ptr);
1021
1022 /* The only condition to be looked for is underflow,
1023 and it can occur here only if underflow is unmasked. */
1024 if ( (exponent16(&tmp) <= EXP_UNDER) && (tag != TAG_Zero)
1025 && !(control_word & CW_Underflow) )
1026 {
1027 setcc(cc);
1028 tag = arith_underflow(st0_ptr);
1029 setsign(st0_ptr, st0_sign);
1030 FPU_settag0(tag);
1031 return;
1032 }
1033 else if ( (exponent16(&tmp) > EXP_UNDER) || (tag == TAG_Zero) )
1034 {
1035 stdexp(st0_ptr);
1036 setsign(st0_ptr, st0_sign);
1037 }
1038 else
1039 {
1040 tag = FPU_round(st0_ptr, 0, 0, FULL_PRECISION, st0_sign);
1041 }
1042 FPU_settag0(tag);
1043 setcc(cc);
1044
1045 return;
1046 }
1047
1048 if ( st0_tag == TAG_Special )
1049 st0_tag = FPU_Special(st0_ptr);
1050 if ( st1_tag == TAG_Special )
1051 st1_tag = FPU_Special(st1_ptr);
1052
1053 if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal))
1054 || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid))
1055 || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) )
1056 {
1057 if ( denormal_operand() < 0 )
1058 return;
1059 goto fprem_valid;
1060 }
1061 else if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) )
1062 {
1063 FPU_stack_underflow();
1064 return;
1065 }
1066 else if ( st0_tag == TAG_Zero )
1067 {
1068 if ( st1_tag == TAG_Valid )
1069 {
1070 setcc(0); return;
1071 }
1072 else if ( st1_tag == TW_Denormal )
1073 {
1074 if ( denormal_operand() < 0 )
1075 return;
1076 setcc(0); return;
1077 }
1078 else if ( st1_tag == TAG_Zero )
1079 { arith_invalid(0); return; } /* fprem(?,0) always invalid */
1080 else if ( st1_tag == TW_Infinity )
1081 { setcc(0); return; }
1082 }
1083 else if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) )
1084 {
1085 if ( st1_tag == TAG_Zero )
1086 {
1087 arith_invalid(0); /* fprem(Valid,Zero) is invalid */
1088 return;
1089 }
1090 else if ( st1_tag != TW_NaN )
1091 {
1092 if ( ((st0_tag == TW_Denormal) || (st1_tag == TW_Denormal))
1093 && (denormal_operand() < 0) )
1094 return;
1095
1096 if ( st1_tag == TW_Infinity )
1097 {
1098 /* fprem(Valid,Infinity) is o.k. */
1099 setcc(0); return;
1100 }
1101 }
1102 }
1103 else if ( st0_tag == TW_Infinity )
1104 {
1105 if ( st1_tag != TW_NaN )
1106 {
1107 arith_invalid(0); /* fprem(Infinity,?) is invalid */
1108 return;
1109 }
1110 }
1111
1112 /* One of the registers must contain a NaN if we got here. */
1113
1114#ifdef PARANOID
1115 if ( (st0_tag != TW_NaN) && (st1_tag != TW_NaN) )
1116 EXCEPTION(EX_INTERNAL | 0x118);
1117#endif /* PARANOID */
1118
1119 real_2op_NaN(st1_ptr, st1_tag, 0, st1_ptr);
1120
1121}
1122
1123
1124/* ST(1) <- ST(1) * log ST; pop ST */
1125static void fyl2x(FPU_REG *st0_ptr, u_char st0_tag)
1126{
1127 FPU_REG *st1_ptr = &st(1), exponent;
1128 u_char st1_tag = FPU_gettagi(1);
1129 u_char sign;
1130 int e, tag;
1131
1132 clear_C1();
1133
1134 if ( (st0_tag == TAG_Valid) && (st1_tag == TAG_Valid) )
1135 {
1136 both_valid:
1137 /* Both regs are Valid or Denormal */
1138 if ( signpositive(st0_ptr) )
1139 {
1140 if ( st0_tag == TW_Denormal )
1141 FPU_to_exp16(st0_ptr, st0_ptr);
1142 else
1143 /* Convert st(0) for internal use. */
1144 setexponent16(st0_ptr, exponent(st0_ptr));
1145
1146 if ( (st0_ptr->sigh == 0x80000000) && (st0_ptr->sigl == 0) )
1147 {
1148 /* Special case. The result can be precise. */
1149 u_char esign;
1150 e = exponent16(st0_ptr);
1151 if ( e >= 0 )
1152 {
1153 exponent.sigh = e;
1154 esign = SIGN_POS;
1155 }
1156 else
1157 {
1158 exponent.sigh = -e;
1159 esign = SIGN_NEG;
1160 }
1161 exponent.sigl = 0;
1162 setexponent16(&exponent, 31);
1163 tag = FPU_normalize_nuo(&exponent);
1164 stdexp(&exponent);
1165 setsign(&exponent, esign);
1166 tag = FPU_mul(&exponent, tag, 1, FULL_PRECISION);
1167 if ( tag >= 0 )
1168 FPU_settagi(1, tag);
1169 }
1170 else
1171 {
1172 /* The usual case */
1173 sign = getsign(st1_ptr);
1174 if ( st1_tag == TW_Denormal )
1175 FPU_to_exp16(st1_ptr, st1_ptr);
1176 else
1177 /* Convert st(1) for internal use. */
1178 setexponent16(st1_ptr, exponent(st1_ptr));
1179 poly_l2(st0_ptr, st1_ptr, sign);
1180 }
1181 }
1182 else
1183 {
1184 /* negative */
1185 if ( arith_invalid(1) < 0 )
1186 return;
1187 }
1188
1189 FPU_pop();
1190
1191 return;
1192 }
1193
1194 if ( st0_tag == TAG_Special )
1195 st0_tag = FPU_Special(st0_ptr);
1196 if ( st1_tag == TAG_Special )
1197 st1_tag = FPU_Special(st1_ptr);
1198
1199 if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) )
1200 {
1201 FPU_stack_underflow_pop(1);
1202 return;
1203 }
1204 else if ( (st0_tag <= TW_Denormal) && (st1_tag <= TW_Denormal) )
1205 {
1206 if ( st0_tag == TAG_Zero )
1207 {
1208 if ( st1_tag == TAG_Zero )
1209 {
1210 /* Both args zero is invalid */
1211 if ( arith_invalid(1) < 0 )
1212 return;
1213 }
1214 else
1215 {
1216 u_char sign;
1217 sign = getsign(st1_ptr)^SIGN_NEG;
1218 if ( FPU_divide_by_zero(1, sign) < 0 )
1219 return;
1220
1221 setsign(st1_ptr, sign);
1222 }
1223 }
1224 else if ( st1_tag == TAG_Zero )
1225 {
1226 /* st(1) contains zero, st(0) valid <> 0 */
1227 /* Zero is the valid answer */
1228 sign = getsign(st1_ptr);
1229
1230 if ( signnegative(st0_ptr) )
1231 {
1232 /* log(negative) */
1233 if ( arith_invalid(1) < 0 )
1234 return;
1235 }
1236 else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
1237 return;
1238 else
1239 {
1240 if ( exponent(st0_ptr) < 0 )
1241 sign ^= SIGN_NEG;
1242
1243 FPU_copy_to_reg1(&CONST_Z, TAG_Zero);
1244 setsign(st1_ptr, sign);
1245 }
1246 }
1247 else
1248 {
1249 /* One or both operands are denormals. */
1250 if ( denormal_operand() < 0 )
1251 return;
1252 goto both_valid;
1253 }
1254 }
1255 else if ( (st0_tag == TW_NaN) || (st1_tag == TW_NaN) )
1256 {
1257 if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 )
1258 return;
1259 }
1260 /* One or both arg must be an infinity */
1261 else if ( st0_tag == TW_Infinity )
1262 {
1263 if ( (signnegative(st0_ptr)) || (st1_tag == TAG_Zero) )
1264 {
1265 /* log(-infinity) or 0*log(infinity) */
1266 if ( arith_invalid(1) < 0 )
1267 return;
1268 }
1269 else
1270 {
1271 u_char sign = getsign(st1_ptr);
1272
1273 if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) )
1274 return;
1275
1276 FPU_copy_to_reg1(&CONST_INF, TAG_Special);
1277 setsign(st1_ptr, sign);
1278 }
1279 }
1280 /* st(1) must be infinity here */
1281 else if ( ((st0_tag == TAG_Valid) || (st0_tag == TW_Denormal))
1282 && ( signpositive(st0_ptr) ) )
1283 {
1284 if ( exponent(st0_ptr) >= 0 )
1285 {
1286 if ( (exponent(st0_ptr) == 0) &&
1287 (st0_ptr->sigh == 0x80000000) &&
1288 (st0_ptr->sigl == 0) )
1289 {
1290 /* st(0) holds 1.0 */
1291 /* infinity*log(1) */
1292 if ( arith_invalid(1) < 0 )
1293 return;
1294 }
1295 /* else st(0) is positive and > 1.0 */
1296 }
1297 else
1298 {
1299 /* st(0) is positive and < 1.0 */
1300
1301 if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
1302 return;
1303
1304 changesign(st1_ptr);
1305 }
1306 }
1307 else
1308 {
1309 /* st(0) must be zero or negative */
1310 if ( st0_tag == TAG_Zero )
1311 {
1312 /* This should be invalid, but a real 80486 is happy with it. */
1313
1314#ifndef PECULIAR_486
1315 sign = getsign(st1_ptr);
1316 if ( FPU_divide_by_zero(1, sign) < 0 )
1317 return;
1318#endif /* PECULIAR_486 */
1319
1320 changesign(st1_ptr);
1321 }
1322 else if ( arith_invalid(1) < 0 ) /* log(negative) */
1323 return;
1324 }
1325
1326 FPU_pop();
1327}
1328
1329
1330static void fpatan(FPU_REG *st0_ptr, u_char st0_tag)
1331{
1332 FPU_REG *st1_ptr = &st(1);
1333 u_char st1_tag = FPU_gettagi(1);
1334 int tag;
1335
1336 clear_C1();
1337 if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) )
1338 {
1339 valid_atan:
1340
1341 poly_atan(st0_ptr, st0_tag, st1_ptr, st1_tag);
1342
1343 FPU_pop();
1344
1345 return;
1346 }
1347
1348 if ( st0_tag == TAG_Special )
1349 st0_tag = FPU_Special(st0_ptr);
1350 if ( st1_tag == TAG_Special )
1351 st1_tag = FPU_Special(st1_ptr);
1352
1353 if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal))
1354 || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid))
1355 || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) )
1356 {
1357 if ( denormal_operand() < 0 )
1358 return;
1359
1360 goto valid_atan;
1361 }
1362 else if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) )
1363 {
1364 FPU_stack_underflow_pop(1);
1365 return;
1366 }
1367 else if ( (st0_tag == TW_NaN) || (st1_tag == TW_NaN) )
1368 {
1369 if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) >= 0 )
1370 FPU_pop();
1371 return;
1372 }
1373 else if ( (st0_tag == TW_Infinity) || (st1_tag == TW_Infinity) )
1374 {
1375 u_char sign = getsign(st1_ptr);
1376 if ( st0_tag == TW_Infinity )
1377 {
1378 if ( st1_tag == TW_Infinity )
1379 {
1380 if ( signpositive(st0_ptr) )
1381 {
1382 FPU_copy_to_reg1(&CONST_PI4, TAG_Valid);
1383 }
1384 else
1385 {
1386 setpositive(st1_ptr);
1387 tag = FPU_u_add(&CONST_PI4, &CONST_PI2, st1_ptr,
1388 FULL_PRECISION, SIGN_POS,
1389 exponent(&CONST_PI4), exponent(&CONST_PI2));
1390 if ( tag >= 0 )
1391 FPU_settagi(1, tag);
1392 }
1393 }
1394 else
1395 {
1396 if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) )
1397 return;
1398
1399 if ( signpositive(st0_ptr) )
1400 {
1401 FPU_copy_to_reg1(&CONST_Z, TAG_Zero);
1402 setsign(st1_ptr, sign); /* An 80486 preserves the sign */
1403 FPU_pop();
1404 return;
1405 }
1406 else
1407 {
1408 FPU_copy_to_reg1(&CONST_PI, TAG_Valid);
1409 }
1410 }
1411 }
1412 else
1413 {
1414 /* st(1) is infinity, st(0) not infinity */
1415 if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
1416 return;
1417
1418 FPU_copy_to_reg1(&CONST_PI2, TAG_Valid);
1419 }
1420 setsign(st1_ptr, sign);
1421 }
1422 else if ( st1_tag == TAG_Zero )
1423 {
1424 /* st(0) must be valid or zero */
1425 u_char sign = getsign(st1_ptr);
1426
1427 if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
1428 return;
1429
1430 if ( signpositive(st0_ptr) )
1431 {
1432 /* An 80486 preserves the sign */
1433 FPU_pop();
1434 return;
1435 }
1436
1437 FPU_copy_to_reg1(&CONST_PI, TAG_Valid);
1438 setsign(st1_ptr, sign);
1439 }
1440 else if ( st0_tag == TAG_Zero )
1441 {
1442 /* st(1) must be TAG_Valid here */
1443 u_char sign = getsign(st1_ptr);
1444
1445 if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) )
1446 return;
1447
1448 FPU_copy_to_reg1(&CONST_PI2, TAG_Valid);
1449 setsign(st1_ptr, sign);
1450 }
1451#ifdef PARANOID
1452 else
1453 EXCEPTION(EX_INTERNAL | 0x125);
1454#endif /* PARANOID */
1455
1456 FPU_pop();
1457 set_precision_flag_up(); /* We do not really know if up or down */
1458}
1459
1460
1461static void fprem(FPU_REG *st0_ptr, u_char st0_tag)
1462{
1463 do_fprem(st0_ptr, st0_tag, RC_CHOP);
1464}
1465
1466
1467static void fprem1(FPU_REG *st0_ptr, u_char st0_tag)
1468{
1469 do_fprem(st0_ptr, st0_tag, RC_RND);
1470}
1471
1472
1473static void fyl2xp1(FPU_REG *st0_ptr, u_char st0_tag)
1474{
1475 u_char sign, sign1;
1476 FPU_REG *st1_ptr = &st(1), a, b;
1477 u_char st1_tag = FPU_gettagi(1);
1478
1479 clear_C1();
1480 if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) )
1481 {
1482 valid_yl2xp1:
1483
1484 sign = getsign(st0_ptr);
1485 sign1 = getsign(st1_ptr);
1486
1487 FPU_to_exp16(st0_ptr, &a);
1488 FPU_to_exp16(st1_ptr, &b);
1489
1490 if ( poly_l2p1(sign, sign1, &a, &b, st1_ptr) )
1491 return;
1492
1493 FPU_pop();
1494 return;
1495 }
1496
1497 if ( st0_tag == TAG_Special )
1498 st0_tag = FPU_Special(st0_ptr);
1499 if ( st1_tag == TAG_Special )
1500 st1_tag = FPU_Special(st1_ptr);
1501
1502 if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal))
1503 || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid))
1504 || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) )
1505 {
1506 if ( denormal_operand() < 0 )
1507 return;
1508
1509 goto valid_yl2xp1;
1510 }
1511 else if ( (st0_tag == TAG_Empty) | (st1_tag == TAG_Empty) )
1512 {
1513 FPU_stack_underflow_pop(1);
1514 return;
1515 }
1516 else if ( st0_tag == TAG_Zero )
1517 {
1518 switch ( st1_tag )
1519 {
1520 case TW_Denormal:
1521 if ( denormal_operand() < 0 )
1522 return;
1523
1524 case TAG_Zero:
1525 case TAG_Valid:
1526 setsign(st0_ptr, getsign(st0_ptr) ^ getsign(st1_ptr));
1527 FPU_copy_to_reg1(st0_ptr, st0_tag);
1528 break;
1529
1530 case TW_Infinity:
1531 /* Infinity*log(1) */
1532 if ( arith_invalid(1) < 0 )
1533 return;
1534 break;
1535
1536 case TW_NaN:
1537 if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 )
1538 return;
1539 break;
1540
1541 default:
1542#ifdef PARANOID
1543 EXCEPTION(EX_INTERNAL | 0x116);
1544 return;
1545#endif /* PARANOID */
1546 break;
1547 }
1548 }
1549 else if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) )
1550 {
1551 switch ( st1_tag )
1552 {
1553 case TAG_Zero:
1554 if ( signnegative(st0_ptr) )
1555 {
1556 if ( exponent(st0_ptr) >= 0 )
1557 {
1558 /* st(0) holds <= -1.0 */
1559#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */
1560 changesign(st1_ptr);
1561#else
1562 if ( arith_invalid(1) < 0 )
1563 return;
1564#endif /* PECULIAR_486 */
1565 }
1566 else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
1567 return;
1568 else
1569 changesign(st1_ptr);
1570 }
1571 else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
1572 return;
1573 break;
1574
1575 case TW_Infinity:
1576 if ( signnegative(st0_ptr) )
1577 {
1578 if ( (exponent(st0_ptr) >= 0) &&
1579 !((st0_ptr->sigh == 0x80000000) &&
1580 (st0_ptr->sigl == 0)) )
1581 {
1582 /* st(0) holds < -1.0 */
1583#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */
1584 changesign(st1_ptr);
1585#else
1586 if ( arith_invalid(1) < 0 ) return;
1587#endif /* PECULIAR_486 */
1588 }
1589 else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
1590 return;
1591 else
1592 changesign(st1_ptr);
1593 }
1594 else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
1595 return;
1596 break;
1597
1598 case TW_NaN:
1599 if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 )
1600 return;
1601 }
1602
1603 }
1604 else if ( st0_tag == TW_NaN )
1605 {
1606 if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 )
1607 return;
1608 }
1609 else if ( st0_tag == TW_Infinity )
1610 {
1611 if ( st1_tag == TW_NaN )
1612 {
1613 if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 )
1614 return;
1615 }
1616 else if ( signnegative(st0_ptr) )
1617 {
1618#ifndef PECULIAR_486
1619 /* This should have higher priority than denormals, but... */
1620 if ( arith_invalid(1) < 0 ) /* log(-infinity) */
1621 return;
1622#endif /* PECULIAR_486 */
1623 if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) )
1624 return;
1625#ifdef PECULIAR_486
1626 /* Denormal operands actually get higher priority */
1627 if ( arith_invalid(1) < 0 ) /* log(-infinity) */
1628 return;
1629#endif /* PECULIAR_486 */
1630 }
1631 else if ( st1_tag == TAG_Zero )
1632 {
1633 /* log(infinity) */
1634 if ( arith_invalid(1) < 0 )
1635 return;
1636 }
1637
1638 /* st(1) must be valid here. */
1639
1640 else if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) )
1641 return;
1642
1643 /* The Manual says that log(Infinity) is invalid, but a real
1644 80486 sensibly says that it is o.k. */
1645 else
1646 {
1647 u_char sign = getsign(st1_ptr);
1648 FPU_copy_to_reg1(&CONST_INF, TAG_Special);
1649 setsign(st1_ptr, sign);
1650 }
1651 }
1652#ifdef PARANOID
1653 else
1654 {
1655 EXCEPTION(EX_INTERNAL | 0x117);
1656 return;
1657 }
1658#endif /* PARANOID */
1659
1660 FPU_pop();
1661 return;
1662
1663}
1664
1665
1666static void fscale(FPU_REG *st0_ptr, u_char st0_tag)
1667{
1668 FPU_REG *st1_ptr = &st(1);
1669 u_char st1_tag = FPU_gettagi(1);
1670 int old_cw = control_word;
1671 u_char sign = getsign(st0_ptr);
1672
1673 clear_C1();
1674 if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) )
1675 {
1676 long scale;
1677 FPU_REG tmp;
1678
1679 /* Convert register for internal use. */
1680 setexponent16(st0_ptr, exponent(st0_ptr));
1681
1682 valid_scale:
1683
1684 if ( exponent(st1_ptr) > 30 )
1685 {
1686 /* 2^31 is far too large, would require 2^(2^30) or 2^(-2^30) */
1687
1688 if ( signpositive(st1_ptr) )
1689 {
1690 EXCEPTION(EX_Overflow);
1691 FPU_copy_to_reg0(&CONST_INF, TAG_Special);
1692 }
1693 else
1694 {
1695 EXCEPTION(EX_Underflow);
1696 FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
1697 }
1698 setsign(st0_ptr, sign);
1699 return;
1700 }
1701
1702 control_word &= ~CW_RC;
1703 control_word |= RC_CHOP;
1704 reg_copy(st1_ptr, &tmp);
1705 FPU_round_to_int(&tmp, st1_tag); /* This can never overflow here */
1706 control_word = old_cw;
1707 scale = signnegative(st1_ptr) ? -tmp.sigl : tmp.sigl;
1708 scale += exponent16(st0_ptr);
1709
1710 setexponent16(st0_ptr, scale);
1711
1712 /* Use FPU_round() to properly detect under/overflow etc */
1713 FPU_round(st0_ptr, 0, 0, control_word, sign);
1714
1715 return;
1716 }
1717
1718 if ( st0_tag == TAG_Special )
1719 st0_tag = FPU_Special(st0_ptr);
1720 if ( st1_tag == TAG_Special )
1721 st1_tag = FPU_Special(st1_ptr);
1722
1723 if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) )
1724 {
1725 switch ( st1_tag )
1726 {
1727 case TAG_Valid:
1728 /* st(0) must be a denormal */
1729 if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
1730 return;
1731
1732 FPU_to_exp16(st0_ptr, st0_ptr); /* Will not be left on stack */
1733 goto valid_scale;
1734
1735 case TAG_Zero:
1736 if ( st0_tag == TW_Denormal )
1737 denormal_operand();
1738 return;
1739
1740 case TW_Denormal:
1741 denormal_operand();
1742 return;
1743
1744 case TW_Infinity:
1745 if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
1746 return;
1747
1748 if ( signpositive(st1_ptr) )
1749 FPU_copy_to_reg0(&CONST_INF, TAG_Special);
1750 else
1751 FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
1752 setsign(st0_ptr, sign);
1753 return;
1754
1755 case TW_NaN:
1756 real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr);
1757 return;
1758 }
1759 }
1760 else if ( st0_tag == TAG_Zero )
1761 {
1762 switch ( st1_tag )
1763 {
1764 case TAG_Valid:
1765 case TAG_Zero:
1766 return;
1767
1768 case TW_Denormal:
1769 denormal_operand();
1770 return;
1771
1772 case TW_Infinity:
1773 if ( signpositive(st1_ptr) )
1774 arith_invalid(0); /* Zero scaled by +Infinity */
1775 return;
1776
1777 case TW_NaN:
1778 real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr);
1779 return;
1780 }
1781 }
1782 else if ( st0_tag == TW_Infinity )
1783 {
1784 switch ( st1_tag )
1785 {
1786 case TAG_Valid:
1787 case TAG_Zero:
1788 return;
1789
1790 case TW_Denormal:
1791 denormal_operand();
1792 return;
1793
1794 case TW_Infinity:
1795 if ( signnegative(st1_ptr) )
1796 arith_invalid(0); /* Infinity scaled by -Infinity */
1797 return;
1798
1799 case TW_NaN:
1800 real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr);
1801 return;
1802 }
1803 }
1804 else if ( st0_tag == TW_NaN )
1805 {
1806 if ( st1_tag != TAG_Empty )
1807 { real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr); return; }
1808 }
1809
1810#ifdef PARANOID
1811 if ( !((st0_tag == TAG_Empty) || (st1_tag == TAG_Empty)) )
1812 {
1813 EXCEPTION(EX_INTERNAL | 0x115);
1814 return;
1815 }
1816#endif
1817
1818 /* At least one of st(0), st(1) must be empty */
1819 FPU_stack_underflow();
1820
1821}
1822
1823
1824/*---------------------------------------------------------------------------*/
1825
1826static FUNC_ST0 const trig_table_a[] = {
1827 f2xm1, fyl2x, fptan, fpatan,
1828 fxtract, fprem1, (FUNC_ST0)fdecstp, (FUNC_ST0)fincstp
1829};
1830
1831void FPU_triga(void)
1832{
1833 (trig_table_a[FPU_rm])(&st(0), FPU_gettag0());
1834}
1835
1836
1837static FUNC_ST0 const trig_table_b[] =
1838 {
1839 fprem, fyl2xp1, fsqrt_, fsincos, frndint_, fscale, (FUNC_ST0)fsin, fcos
1840 };
1841
1842void FPU_trigb(void)
1843{
1844 (trig_table_b[FPU_rm])(&st(0), FPU_gettag0());
1845}
diff --git a/arch/i386/math-emu/get_address.c b/arch/i386/math-emu/get_address.c
new file mode 100644
index 000000000000..91175738e948
--- /dev/null
+++ b/arch/i386/math-emu/get_address.c
@@ -0,0 +1,449 @@
1/*---------------------------------------------------------------------------+
2 | get_address.c |
3 | |
4 | Get the effective address from an FPU instruction. |
5 | |
6 | Copyright (C) 1992,1993,1994,1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
8 | Australia. E-mail billm@suburbia.net |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13/*---------------------------------------------------------------------------+
14 | Note: |
15 | The file contains code which accesses user memory. |
16 | Emulator static data may change when user memory is accessed, due to |
17 | other processes using the emulator while swapping is in progress. |
18 +---------------------------------------------------------------------------*/
19
20
21#include <linux/stddef.h>
22
23#include <asm/uaccess.h>
24#include <asm/desc.h>
25
26#include "fpu_system.h"
27#include "exception.h"
28#include "fpu_emu.h"
29
30
31#define FPU_WRITE_BIT 0x10
32
33static int reg_offset[] = {
34 offsetof(struct info,___eax),
35 offsetof(struct info,___ecx),
36 offsetof(struct info,___edx),
37 offsetof(struct info,___ebx),
38 offsetof(struct info,___esp),
39 offsetof(struct info,___ebp),
40 offsetof(struct info,___esi),
41 offsetof(struct info,___edi)
42};
43
44#define REG_(x) (*(long *)(reg_offset[(x)]+(u_char *) FPU_info))
45
46static int reg_offset_vm86[] = {
47 offsetof(struct info,___cs),
48 offsetof(struct info,___vm86_ds),
49 offsetof(struct info,___vm86_es),
50 offsetof(struct info,___vm86_fs),
51 offsetof(struct info,___vm86_gs),
52 offsetof(struct info,___ss),
53 offsetof(struct info,___vm86_ds)
54 };
55
56#define VM86_REG_(x) (*(unsigned short *) \
57 (reg_offset_vm86[((unsigned)x)]+(u_char *) FPU_info))
58
59/* These are dummy, fs and gs are not saved on the stack. */
60#define ___FS ___ds
61#define ___GS ___ds
62
63static int reg_offset_pm[] = {
64 offsetof(struct info,___cs),
65 offsetof(struct info,___ds),
66 offsetof(struct info,___es),
67 offsetof(struct info,___FS),
68 offsetof(struct info,___GS),
69 offsetof(struct info,___ss),
70 offsetof(struct info,___ds)
71 };
72
73#define PM_REG_(x) (*(unsigned short *) \
74 (reg_offset_pm[((unsigned)x)]+(u_char *) FPU_info))
75
76
77/* Decode the SIB byte. This function assumes mod != 0 */
78static int sib(int mod, unsigned long *fpu_eip)
79{
80 u_char ss,index,base;
81 long offset;
82
83 RE_ENTRANT_CHECK_OFF;
84 FPU_code_access_ok(1);
85 FPU_get_user(base, (u_char __user *) (*fpu_eip)); /* The SIB byte */
86 RE_ENTRANT_CHECK_ON;
87 (*fpu_eip)++;
88 ss = base >> 6;
89 index = (base >> 3) & 7;
90 base &= 7;
91
92 if ((mod == 0) && (base == 5))
93 offset = 0; /* No base register */
94 else
95 offset = REG_(base);
96
97 if (index == 4)
98 {
99 /* No index register */
100 /* A non-zero ss is illegal */
101 if ( ss )
102 EXCEPTION(EX_Invalid);
103 }
104 else
105 {
106 offset += (REG_(index)) << ss;
107 }
108
109 if (mod == 1)
110 {
111 /* 8 bit signed displacement */
112 long displacement;
113 RE_ENTRANT_CHECK_OFF;
114 FPU_code_access_ok(1);
115 FPU_get_user(displacement, (signed char __user *) (*fpu_eip));
116 offset += displacement;
117 RE_ENTRANT_CHECK_ON;
118 (*fpu_eip)++;
119 }
120 else if (mod == 2 || base == 5) /* The second condition also has mod==0 */
121 {
122 /* 32 bit displacement */
123 long displacement;
124 RE_ENTRANT_CHECK_OFF;
125 FPU_code_access_ok(4);
126 FPU_get_user(displacement, (long __user *) (*fpu_eip));
127 offset += displacement;
128 RE_ENTRANT_CHECK_ON;
129 (*fpu_eip) += 4;
130 }
131
132 return offset;
133}
134
135
136static unsigned long vm86_segment(u_char segment,
137 struct address *addr)
138{
139 segment--;
140#ifdef PARANOID
141 if ( segment > PREFIX_SS_ )
142 {
143 EXCEPTION(EX_INTERNAL|0x130);
144 math_abort(FPU_info,SIGSEGV);
145 }
146#endif /* PARANOID */
147 addr->selector = VM86_REG_(segment);
148 return (unsigned long)VM86_REG_(segment) << 4;
149}
150
151
152/* This should work for 16 and 32 bit protected mode. */
153static long pm_address(u_char FPU_modrm, u_char segment,
154 struct address *addr, long offset)
155{
156 struct desc_struct descriptor;
157 unsigned long base_address, limit, address, seg_top;
158 unsigned short selector;
159
160 segment--;
161
162#ifdef PARANOID
163 /* segment is unsigned, so this also detects if segment was 0: */
164 if ( segment > PREFIX_SS_ )
165 {
166 EXCEPTION(EX_INTERNAL|0x132);
167 math_abort(FPU_info,SIGSEGV);
168 }
169#endif /* PARANOID */
170
171 switch ( segment )
172 {
173 /* fs and gs aren't used by the kernel, so they still have their
174 user-space values. */
175 case PREFIX_FS_-1:
176 /* The cast is needed here to get gcc 2.8.0 to use a 16 bit register
177 in the assembler statement. */
178
179 __asm__("mov %%fs,%0":"=r" (selector));
180 addr->selector = selector;
181 break;
182 case PREFIX_GS_-1:
183 /* The cast is needed here to get gcc 2.8.0 to use a 16 bit register
184 in the assembler statement. */
185 __asm__("mov %%gs,%0":"=r" (selector));
186 addr->selector = selector;
187 break;
188 default:
189 addr->selector = PM_REG_(segment);
190 }
191
192 descriptor = LDT_DESCRIPTOR(PM_REG_(segment));
193 base_address = SEG_BASE_ADDR(descriptor);
194 address = base_address + offset;
195 limit = base_address
196 + (SEG_LIMIT(descriptor)+1) * SEG_GRANULARITY(descriptor) - 1;
197 if ( limit < base_address ) limit = 0xffffffff;
198
199 if ( SEG_EXPAND_DOWN(descriptor) )
200 {
201 if ( SEG_G_BIT(descriptor) )
202 seg_top = 0xffffffff;
203 else
204 {
205 seg_top = base_address + (1 << 20);
206 if ( seg_top < base_address ) seg_top = 0xffffffff;
207 }
208 access_limit =
209 (address <= limit) || (address >= seg_top) ? 0 :
210 ((seg_top-address) >= 255 ? 255 : seg_top-address);
211 }
212 else
213 {
214 access_limit =
215 (address > limit) || (address < base_address) ? 0 :
216 ((limit-address) >= 254 ? 255 : limit-address+1);
217 }
218 if ( SEG_EXECUTE_ONLY(descriptor) ||
219 (!SEG_WRITE_PERM(descriptor) && (FPU_modrm & FPU_WRITE_BIT)) )
220 {
221 access_limit = 0;
222 }
223 return address;
224}
225
226
227/*
228 MOD R/M byte: MOD == 3 has a special use for the FPU
229 SIB byte used iff R/M = 100b
230
231 7 6 5 4 3 2 1 0
232 ..... ......... .........
233 MOD OPCODE(2) R/M
234
235
236 SIB byte
237
238 7 6 5 4 3 2 1 0
239 ..... ......... .........
240 SS INDEX BASE
241
242*/
243
244void __user *FPU_get_address(u_char FPU_modrm, unsigned long *fpu_eip,
245 struct address *addr,
246 fpu_addr_modes addr_modes)
247{
248 u_char mod;
249 unsigned rm = FPU_modrm & 7;
250 long *cpu_reg_ptr;
251 int address = 0; /* Initialized just to stop compiler warnings. */
252
253 /* Memory accessed via the cs selector is write protected
254 in `non-segmented' 32 bit protected mode. */
255 if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT)
256 && (addr_modes.override.segment == PREFIX_CS_) )
257 {
258 math_abort(FPU_info,SIGSEGV);
259 }
260
261 addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */
262
263 mod = (FPU_modrm >> 6) & 3;
264
265 if (rm == 4 && mod != 3)
266 {
267 address = sib(mod, fpu_eip);
268 }
269 else
270 {
271 cpu_reg_ptr = & REG_(rm);
272 switch (mod)
273 {
274 case 0:
275 if (rm == 5)
276 {
277 /* Special case: disp32 */
278 RE_ENTRANT_CHECK_OFF;
279 FPU_code_access_ok(4);
280 FPU_get_user(address, (unsigned long __user *) (*fpu_eip));
281 (*fpu_eip) += 4;
282 RE_ENTRANT_CHECK_ON;
283 addr->offset = address;
284 return (void __user *) address;
285 }
286 else
287 {
288 address = *cpu_reg_ptr; /* Just return the contents
289 of the cpu register */
290 addr->offset = address;
291 return (void __user *) address;
292 }
293 case 1:
294 /* 8 bit signed displacement */
295 RE_ENTRANT_CHECK_OFF;
296 FPU_code_access_ok(1);
297 FPU_get_user(address, (signed char __user *) (*fpu_eip));
298 RE_ENTRANT_CHECK_ON;
299 (*fpu_eip)++;
300 break;
301 case 2:
302 /* 32 bit displacement */
303 RE_ENTRANT_CHECK_OFF;
304 FPU_code_access_ok(4);
305 FPU_get_user(address, (long __user *) (*fpu_eip));
306 (*fpu_eip) += 4;
307 RE_ENTRANT_CHECK_ON;
308 break;
309 case 3:
310 /* Not legal for the FPU */
311 EXCEPTION(EX_Invalid);
312 }
313 address += *cpu_reg_ptr;
314 }
315
316 addr->offset = address;
317
318 switch ( addr_modes.default_mode )
319 {
320 case 0:
321 break;
322 case VM86:
323 address += vm86_segment(addr_modes.override.segment, addr);
324 break;
325 case PM16:
326 case SEG32:
327 address = pm_address(FPU_modrm, addr_modes.override.segment,
328 addr, address);
329 break;
330 default:
331 EXCEPTION(EX_INTERNAL|0x133);
332 }
333
334 return (void __user *)address;
335}
336
337
338void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip,
339 struct address *addr,
340 fpu_addr_modes addr_modes)
341{
342 u_char mod;
343 unsigned rm = FPU_modrm & 7;
344 int address = 0; /* Default used for mod == 0 */
345
346 /* Memory accessed via the cs selector is write protected
347 in `non-segmented' 32 bit protected mode. */
348 if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT)
349 && (addr_modes.override.segment == PREFIX_CS_) )
350 {
351 math_abort(FPU_info,SIGSEGV);
352 }
353
354 addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */
355
356 mod = (FPU_modrm >> 6) & 3;
357
358 switch (mod)
359 {
360 case 0:
361 if (rm == 6)
362 {
363 /* Special case: disp16 */
364 RE_ENTRANT_CHECK_OFF;
365 FPU_code_access_ok(2);
366 FPU_get_user(address, (unsigned short __user *) (*fpu_eip));
367 (*fpu_eip) += 2;
368 RE_ENTRANT_CHECK_ON;
369 goto add_segment;
370 }
371 break;
372 case 1:
373 /* 8 bit signed displacement */
374 RE_ENTRANT_CHECK_OFF;
375 FPU_code_access_ok(1);
376 FPU_get_user(address, (signed char __user *) (*fpu_eip));
377 RE_ENTRANT_CHECK_ON;
378 (*fpu_eip)++;
379 break;
380 case 2:
381 /* 16 bit displacement */
382 RE_ENTRANT_CHECK_OFF;
383 FPU_code_access_ok(2);
384 FPU_get_user(address, (unsigned short __user *) (*fpu_eip));
385 (*fpu_eip) += 2;
386 RE_ENTRANT_CHECK_ON;
387 break;
388 case 3:
389 /* Not legal for the FPU */
390 EXCEPTION(EX_Invalid);
391 break;
392 }
393 switch ( rm )
394 {
395 case 0:
396 address += FPU_info->___ebx + FPU_info->___esi;
397 break;
398 case 1:
399 address += FPU_info->___ebx + FPU_info->___edi;
400 break;
401 case 2:
402 address += FPU_info->___ebp + FPU_info->___esi;
403 if ( addr_modes.override.segment == PREFIX_DEFAULT )
404 addr_modes.override.segment = PREFIX_SS_;
405 break;
406 case 3:
407 address += FPU_info->___ebp + FPU_info->___edi;
408 if ( addr_modes.override.segment == PREFIX_DEFAULT )
409 addr_modes.override.segment = PREFIX_SS_;
410 break;
411 case 4:
412 address += FPU_info->___esi;
413 break;
414 case 5:
415 address += FPU_info->___edi;
416 break;
417 case 6:
418 address += FPU_info->___ebp;
419 if ( addr_modes.override.segment == PREFIX_DEFAULT )
420 addr_modes.override.segment = PREFIX_SS_;
421 break;
422 case 7:
423 address += FPU_info->___ebx;
424 break;
425 }
426
427 add_segment:
428 address &= 0xffff;
429
430 addr->offset = address;
431
432 switch ( addr_modes.default_mode )
433 {
434 case 0:
435 break;
436 case VM86:
437 address += vm86_segment(addr_modes.override.segment, addr);
438 break;
439 case PM16:
440 case SEG32:
441 address = pm_address(FPU_modrm, addr_modes.override.segment,
442 addr, address);
443 break;
444 default:
445 EXCEPTION(EX_INTERNAL|0x131);
446 }
447
448 return (void __user *)address ;
449}
diff --git a/arch/i386/math-emu/load_store.c b/arch/i386/math-emu/load_store.c
new file mode 100644
index 000000000000..85314be2fef8
--- /dev/null
+++ b/arch/i386/math-emu/load_store.c
@@ -0,0 +1,270 @@
1/*---------------------------------------------------------------------------+
2 | load_store.c |
3 | |
4 | This file contains most of the code to interpret the FPU instructions |
5 | which load and store from user memory. |
6 | |
7 | Copyright (C) 1992,1993,1994,1997 |
8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
9 | Australia. E-mail billm@suburbia.net |
10 | |
11 | |
12 +---------------------------------------------------------------------------*/
13
14/*---------------------------------------------------------------------------+
15 | Note: |
16 | The file contains code which accesses user memory. |
17 | Emulator static data may change when user memory is accessed, due to |
18 | other processes using the emulator while swapping is in progress. |
19 +---------------------------------------------------------------------------*/
20
21#include <asm/uaccess.h>
22
23#include "fpu_system.h"
24#include "exception.h"
25#include "fpu_emu.h"
26#include "status_w.h"
27#include "control_w.h"
28
29
30#define _NONE_ 0 /* st0_ptr etc not needed */
31#define _REG0_ 1 /* Will be storing st(0) */
32#define _PUSH_ 3 /* Need to check for space to push onto stack */
33#define _null_ 4 /* Function illegal or not implemented */
34
35#define pop_0() { FPU_settag0(TAG_Empty); top++; }
36
37
38static u_char const type_table[32] = {
39 _PUSH_, _PUSH_, _PUSH_, _PUSH_,
40 _null_, _null_, _null_, _null_,
41 _REG0_, _REG0_, _REG0_, _REG0_,
42 _REG0_, _REG0_, _REG0_, _REG0_,
43 _NONE_, _null_, _NONE_, _PUSH_,
44 _NONE_, _PUSH_, _null_, _PUSH_,
45 _NONE_, _null_, _NONE_, _REG0_,
46 _NONE_, _REG0_, _NONE_, _REG0_
47 };
48
49u_char const data_sizes_16[32] = {
50 4, 4, 8, 2, 0, 0, 0, 0,
51 4, 4, 8, 2, 4, 4, 8, 2,
52 14, 0, 94, 10, 2, 10, 0, 8,
53 14, 0, 94, 10, 2, 10, 2, 8
54};
55
56static u_char const data_sizes_32[32] = {
57 4, 4, 8, 2, 0, 0, 0, 0,
58 4, 4, 8, 2, 4, 4, 8, 2,
59 28, 0,108, 10, 2, 10, 0, 8,
60 28, 0,108, 10, 2, 10, 2, 8
61};
62
63int FPU_load_store(u_char type, fpu_addr_modes addr_modes,
64 void __user *data_address)
65{
66 FPU_REG loaded_data;
67 FPU_REG *st0_ptr;
68 u_char st0_tag = TAG_Empty; /* This is just to stop a gcc warning. */
69 u_char loaded_tag;
70
71 st0_ptr = NULL; /* Initialized just to stop compiler warnings. */
72
73 if ( addr_modes.default_mode & PROTECTED )
74 {
75 if ( addr_modes.default_mode == SEG32 )
76 {
77 if ( access_limit < data_sizes_32[type] )
78 math_abort(FPU_info,SIGSEGV);
79 }
80 else if ( addr_modes.default_mode == PM16 )
81 {
82 if ( access_limit < data_sizes_16[type] )
83 math_abort(FPU_info,SIGSEGV);
84 }
85#ifdef PARANOID
86 else
87 EXCEPTION(EX_INTERNAL|0x140);
88#endif /* PARANOID */
89 }
90
91 switch ( type_table[type] )
92 {
93 case _NONE_:
94 break;
95 case _REG0_:
96 st0_ptr = &st(0); /* Some of these instructions pop after
97 storing */
98 st0_tag = FPU_gettag0();
99 break;
100 case _PUSH_:
101 {
102 if ( FPU_gettagi(-1) != TAG_Empty )
103 { FPU_stack_overflow(); return 0; }
104 top--;
105 st0_ptr = &st(0);
106 }
107 break;
108 case _null_:
109 FPU_illegal();
110 return 0;
111#ifdef PARANOID
112 default:
113 EXCEPTION(EX_INTERNAL|0x141);
114 return 0;
115#endif /* PARANOID */
116 }
117
118 switch ( type )
119 {
120 case 000: /* fld m32real */
121 clear_C1();
122 loaded_tag = FPU_load_single((float __user *)data_address, &loaded_data);
123 if ( (loaded_tag == TAG_Special)
124 && isNaN(&loaded_data)
125 && (real_1op_NaN(&loaded_data) < 0) )
126 {
127 top++;
128 break;
129 }
130 FPU_copy_to_reg0(&loaded_data, loaded_tag);
131 break;
132 case 001: /* fild m32int */
133 clear_C1();
134 loaded_tag = FPU_load_int32((long __user *)data_address, &loaded_data);
135 FPU_copy_to_reg0(&loaded_data, loaded_tag);
136 break;
137 case 002: /* fld m64real */
138 clear_C1();
139 loaded_tag = FPU_load_double((double __user *)data_address, &loaded_data);
140 if ( (loaded_tag == TAG_Special)
141 && isNaN(&loaded_data)
142 && (real_1op_NaN(&loaded_data) < 0) )
143 {
144 top++;
145 break;
146 }
147 FPU_copy_to_reg0(&loaded_data, loaded_tag);
148 break;
149 case 003: /* fild m16int */
150 clear_C1();
151 loaded_tag = FPU_load_int16((short __user *)data_address, &loaded_data);
152 FPU_copy_to_reg0(&loaded_data, loaded_tag);
153 break;
154 case 010: /* fst m32real */
155 clear_C1();
156 FPU_store_single(st0_ptr, st0_tag, (float __user *)data_address);
157 break;
158 case 011: /* fist m32int */
159 clear_C1();
160 FPU_store_int32(st0_ptr, st0_tag, (long __user *)data_address);
161 break;
162 case 012: /* fst m64real */
163 clear_C1();
164 FPU_store_double(st0_ptr, st0_tag, (double __user *)data_address);
165 break;
166 case 013: /* fist m16int */
167 clear_C1();
168 FPU_store_int16(st0_ptr, st0_tag, (short __user *)data_address);
169 break;
170 case 014: /* fstp m32real */
171 clear_C1();
172 if ( FPU_store_single(st0_ptr, st0_tag, (float __user *)data_address) )
173 pop_0(); /* pop only if the number was actually stored
174 (see the 80486 manual p16-28) */
175 break;
176 case 015: /* fistp m32int */
177 clear_C1();
178 if ( FPU_store_int32(st0_ptr, st0_tag, (long __user *)data_address) )
179 pop_0(); /* pop only if the number was actually stored
180 (see the 80486 manual p16-28) */
181 break;
182 case 016: /* fstp m64real */
183 clear_C1();
184 if ( FPU_store_double(st0_ptr, st0_tag, (double __user *)data_address) )
185 pop_0(); /* pop only if the number was actually stored
186 (see the 80486 manual p16-28) */
187 break;
188 case 017: /* fistp m16int */
189 clear_C1();
190 if ( FPU_store_int16(st0_ptr, st0_tag, (short __user *)data_address) )
191 pop_0(); /* pop only if the number was actually stored
192 (see the 80486 manual p16-28) */
193 break;
194 case 020: /* fldenv m14/28byte */
195 fldenv(addr_modes, (u_char __user *)data_address);
196 /* Ensure that the values just loaded are not changed by
197 fix-up operations. */
198 return 1;
199 case 022: /* frstor m94/108byte */
200 frstor(addr_modes, (u_char __user *)data_address);
201 /* Ensure that the values just loaded are not changed by
202 fix-up operations. */
203 return 1;
204 case 023: /* fbld m80dec */
205 clear_C1();
206 loaded_tag = FPU_load_bcd((u_char __user *)data_address);
207 FPU_settag0(loaded_tag);
208 break;
209 case 024: /* fldcw */
210 RE_ENTRANT_CHECK_OFF;
211 FPU_access_ok(VERIFY_READ, data_address, 2);
212 FPU_get_user(control_word, (unsigned short __user *) data_address);
213 RE_ENTRANT_CHECK_ON;
214 if ( partial_status & ~control_word & CW_Exceptions )
215 partial_status |= (SW_Summary | SW_Backward);
216 else
217 partial_status &= ~(SW_Summary | SW_Backward);
218#ifdef PECULIAR_486
219 control_word |= 0x40; /* An 80486 appears to always set this bit */
220#endif /* PECULIAR_486 */
221 return 1;
222 case 025: /* fld m80real */
223 clear_C1();
224 loaded_tag = FPU_load_extended((long double __user *)data_address, 0);
225 FPU_settag0(loaded_tag);
226 break;
227 case 027: /* fild m64int */
228 clear_C1();
229 loaded_tag = FPU_load_int64((long long __user *)data_address);
230 FPU_settag0(loaded_tag);
231 break;
232 case 030: /* fstenv m14/28byte */
233 fstenv(addr_modes, (u_char __user *)data_address);
234 return 1;
235 case 032: /* fsave */
236 fsave(addr_modes, (u_char __user *)data_address);
237 return 1;
238 case 033: /* fbstp m80dec */
239 clear_C1();
240 if ( FPU_store_bcd(st0_ptr, st0_tag, (u_char __user *)data_address) )
241 pop_0(); /* pop only if the number was actually stored
242 (see the 80486 manual p16-28) */
243 break;
244 case 034: /* fstcw m16int */
245 RE_ENTRANT_CHECK_OFF;
246 FPU_access_ok(VERIFY_WRITE,data_address,2);
247 FPU_put_user(control_word, (unsigned short __user *) data_address);
248 RE_ENTRANT_CHECK_ON;
249 return 1;
250 case 035: /* fstp m80real */
251 clear_C1();
252 if ( FPU_store_extended(st0_ptr, st0_tag, (long double __user *)data_address) )
253 pop_0(); /* pop only if the number was actually stored
254 (see the 80486 manual p16-28) */
255 break;
256 case 036: /* fstsw m2byte */
257 RE_ENTRANT_CHECK_OFF;
258 FPU_access_ok(VERIFY_WRITE,data_address,2);
259 FPU_put_user(status_word(),(unsigned short __user *) data_address);
260 RE_ENTRANT_CHECK_ON;
261 return 1;
262 case 037: /* fistp m64int */
263 clear_C1();
264 if ( FPU_store_int64(st0_ptr, st0_tag, (long long __user *)data_address) )
265 pop_0(); /* pop only if the number was actually stored
266 (see the 80486 manual p16-28) */
267 break;
268 }
269 return 0;
270}
diff --git a/arch/i386/math-emu/mul_Xsig.S b/arch/i386/math-emu/mul_Xsig.S
new file mode 100644
index 000000000000..717785a53eb4
--- /dev/null
+++ b/arch/i386/math-emu/mul_Xsig.S
@@ -0,0 +1,176 @@
1/*---------------------------------------------------------------------------+
2 | mul_Xsig.S |
3 | |
4 | Multiply a 12 byte fixed point number by another fixed point number. |
5 | |
6 | Copyright (C) 1992,1994,1995 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
8 | Australia. E-mail billm@jacobi.maths.monash.edu.au |
9 | |
10 | Call from C as: |
11 | void mul32_Xsig(Xsig *x, unsigned b) |
12 | |
13 | void mul64_Xsig(Xsig *x, unsigned long long *b) |
14 | |
15 | void mul_Xsig_Xsig(Xsig *x, unsigned *b) |
16 | |
17 | The result is neither rounded nor normalized, and the ls bit or so may |
18 | be wrong. |
19 | |
20 +---------------------------------------------------------------------------*/
21 .file "mul_Xsig.S"
22
23
24#include "fpu_emu.h"
25
26.text
27ENTRY(mul32_Xsig)
28 pushl %ebp
29 movl %esp,%ebp
30 subl $16,%esp
31 pushl %esi
32
33 movl PARAM1,%esi
34 movl PARAM2,%ecx
35
36 xor %eax,%eax
37 movl %eax,-4(%ebp)
38 movl %eax,-8(%ebp)
39
40 movl (%esi),%eax /* lsl of Xsig */
41 mull %ecx /* msl of b */
42 movl %edx,-12(%ebp)
43
44 movl 4(%esi),%eax /* midl of Xsig */
45 mull %ecx /* msl of b */
46 addl %eax,-12(%ebp)
47 adcl %edx,-8(%ebp)
48 adcl $0,-4(%ebp)
49
50 movl 8(%esi),%eax /* msl of Xsig */
51 mull %ecx /* msl of b */
52 addl %eax,-8(%ebp)
53 adcl %edx,-4(%ebp)
54
55 movl -12(%ebp),%eax
56 movl %eax,(%esi)
57 movl -8(%ebp),%eax
58 movl %eax,4(%esi)
59 movl -4(%ebp),%eax
60 movl %eax,8(%esi)
61
62 popl %esi
63 leave
64 ret
65
66
67ENTRY(mul64_Xsig)
68 pushl %ebp
69 movl %esp,%ebp
70 subl $16,%esp
71 pushl %esi
72
73 movl PARAM1,%esi
74 movl PARAM2,%ecx
75
76 xor %eax,%eax
77 movl %eax,-4(%ebp)
78 movl %eax,-8(%ebp)
79
80 movl (%esi),%eax /* lsl of Xsig */
81 mull 4(%ecx) /* msl of b */
82 movl %edx,-12(%ebp)
83
84 movl 4(%esi),%eax /* midl of Xsig */
85 mull (%ecx) /* lsl of b */
86 addl %edx,-12(%ebp)
87 adcl $0,-8(%ebp)
88 adcl $0,-4(%ebp)
89
90 movl 4(%esi),%eax /* midl of Xsig */
91 mull 4(%ecx) /* msl of b */
92 addl %eax,-12(%ebp)
93 adcl %edx,-8(%ebp)
94 adcl $0,-4(%ebp)
95
96 movl 8(%esi),%eax /* msl of Xsig */
97 mull (%ecx) /* lsl of b */
98 addl %eax,-12(%ebp)
99 adcl %edx,-8(%ebp)
100 adcl $0,-4(%ebp)
101
102 movl 8(%esi),%eax /* msl of Xsig */
103 mull 4(%ecx) /* msl of b */
104 addl %eax,-8(%ebp)
105 adcl %edx,-4(%ebp)
106
107 movl -12(%ebp),%eax
108 movl %eax,(%esi)
109 movl -8(%ebp),%eax
110 movl %eax,4(%esi)
111 movl -4(%ebp),%eax
112 movl %eax,8(%esi)
113
114 popl %esi
115 leave
116 ret
117
118
119
120ENTRY(mul_Xsig_Xsig)
121 pushl %ebp
122 movl %esp,%ebp
123 subl $16,%esp
124 pushl %esi
125
126 movl PARAM1,%esi
127 movl PARAM2,%ecx
128
129 xor %eax,%eax
130 movl %eax,-4(%ebp)
131 movl %eax,-8(%ebp)
132
133 movl (%esi),%eax /* lsl of Xsig */
134 mull 8(%ecx) /* msl of b */
135 movl %edx,-12(%ebp)
136
137 movl 4(%esi),%eax /* midl of Xsig */
138 mull 4(%ecx) /* midl of b */
139 addl %edx,-12(%ebp)
140 adcl $0,-8(%ebp)
141 adcl $0,-4(%ebp)
142
143 movl 8(%esi),%eax /* msl of Xsig */
144 mull (%ecx) /* lsl of b */
145 addl %edx,-12(%ebp)
146 adcl $0,-8(%ebp)
147 adcl $0,-4(%ebp)
148
149 movl 4(%esi),%eax /* midl of Xsig */
150 mull 8(%ecx) /* msl of b */
151 addl %eax,-12(%ebp)
152 adcl %edx,-8(%ebp)
153 adcl $0,-4(%ebp)
154
155 movl 8(%esi),%eax /* msl of Xsig */
156 mull 4(%ecx) /* midl of b */
157 addl %eax,-12(%ebp)
158 adcl %edx,-8(%ebp)
159 adcl $0,-4(%ebp)
160
161 movl 8(%esi),%eax /* msl of Xsig */
162 mull 8(%ecx) /* msl of b */
163 addl %eax,-8(%ebp)
164 adcl %edx,-4(%ebp)
165
166 movl -12(%ebp),%edx
167 movl %edx,(%esi)
168 movl -8(%ebp),%edx
169 movl %edx,4(%esi)
170 movl -4(%ebp),%edx
171 movl %edx,8(%esi)
172
173 popl %esi
174 leave
175 ret
176
diff --git a/arch/i386/math-emu/poly.h b/arch/i386/math-emu/poly.h
new file mode 100644
index 000000000000..4db798114923
--- /dev/null
+++ b/arch/i386/math-emu/poly.h
@@ -0,0 +1,121 @@
1/*---------------------------------------------------------------------------+
2 | poly.h |
3 | |
4 | Header file for the FPU-emu poly*.c source files. |
5 | |
6 | Copyright (C) 1994,1999 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
8 | Australia. E-mail billm@melbpc.org.au |
9 | |
10 | Declarations and definitions for functions operating on Xsig (12-byte |
11 | extended-significand) quantities. |
12 | |
13 +---------------------------------------------------------------------------*/
14
15#ifndef _POLY_H
16#define _POLY_H
17
18/* This 12-byte structure is used to improve the accuracy of computation
19 of transcendental functions.
20 Intended to be used to get results better than 8-byte computation
21 allows. 9-byte would probably be sufficient.
22 */
23typedef struct {
24 unsigned long lsw;
25 unsigned long midw;
26 unsigned long msw;
27} Xsig;
28
29asmlinkage void mul64(unsigned long long const *a, unsigned long long const *b,
30 unsigned long long *result);
31asmlinkage void polynomial_Xsig(Xsig *, const unsigned long long *x,
32 const unsigned long long terms[], const int n);
33
34asmlinkage void mul32_Xsig(Xsig *, const unsigned long mult);
35asmlinkage void mul64_Xsig(Xsig *, const unsigned long long *mult);
36asmlinkage void mul_Xsig_Xsig(Xsig *dest, const Xsig *mult);
37
38asmlinkage void shr_Xsig(Xsig *, const int n);
39asmlinkage int round_Xsig(Xsig *);
40asmlinkage int norm_Xsig(Xsig *);
41asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest);
42
43/* Macro to extract the most significant 32 bits from a long long */
44#define LL_MSW(x) (((unsigned long *)&x)[1])
45
46/* Macro to initialize an Xsig struct */
47#define MK_XSIG(a,b,c) { c, b, a }
48
49/* Macro to access the 8 ms bytes of an Xsig as a long long */
50#define XSIG_LL(x) (*(unsigned long long *)&x.midw)
51
52
53/*
54 Need to run gcc with optimizations on to get these to
55 actually be in-line.
56 */
57
58/* Multiply two fixed-point 32 bit numbers, producing a 32 bit result.
59 The answer is the ms word of the product. */
60/* Some versions of gcc make it difficult to stop eax from being clobbered.
61 Merely specifying that it is used doesn't work...
62 */
63static inline unsigned long mul_32_32(const unsigned long arg1,
64 const unsigned long arg2)
65{
66 int retval;
67 asm volatile ("mull %2; movl %%edx,%%eax" \
68 :"=a" (retval) \
69 :"0" (arg1), "g" (arg2) \
70 :"dx");
71 return retval;
72}
73
74
75/* Add the 12 byte Xsig x2 to Xsig dest, with no checks for overflow. */
76static inline void add_Xsig_Xsig(Xsig *dest, const Xsig *x2)
77{
78 asm volatile ("movl %1,%%edi; movl %2,%%esi;\n"
79 "movl (%%esi),%%eax; addl %%eax,(%%edi);\n"
80 "movl 4(%%esi),%%eax; adcl %%eax,4(%%edi);\n"
81 "movl 8(%%esi),%%eax; adcl %%eax,8(%%edi);\n"
82 :"=g" (*dest):"g" (dest), "g" (x2)
83 :"ax","si","di");
84}
85
86
87/* Add the 12 byte Xsig x2 to Xsig dest, adjust exp if overflow occurs. */
88/* Note: the constraints in the asm statement didn't always work properly
89 with gcc 2.5.8. Changing from using edi to using ecx got around the
90 problem, but keep fingers crossed! */
91static inline void add_two_Xsig(Xsig *dest, const Xsig *x2, long int *exp)
92{
93 asm volatile ("movl %2,%%ecx; movl %3,%%esi;\n"
94 "movl (%%esi),%%eax; addl %%eax,(%%ecx);\n"
95 "movl 4(%%esi),%%eax; adcl %%eax,4(%%ecx);\n"
96 "movl 8(%%esi),%%eax; adcl %%eax,8(%%ecx);\n"
97 "jnc 0f;\n"
98 "rcrl 8(%%ecx); rcrl 4(%%ecx); rcrl (%%ecx)\n"
99 "movl %4,%%ecx; incl (%%ecx)\n"
100 "movl $1,%%eax; jmp 1f;\n"
101 "0: xorl %%eax,%%eax;\n"
102 "1:\n"
103 :"=g" (*exp), "=g" (*dest)
104 :"g" (dest), "g" (x2), "g" (exp)
105 :"cx","si","ax");
106}
107
108
109/* Negate (subtract from 1.0) the 12 byte Xsig */
110/* This is faster in a loop on my 386 than using the "neg" instruction. */
111static inline void negate_Xsig(Xsig *x)
112{
113 asm volatile("movl %1,%%esi;\n"
114 "xorl %%ecx,%%ecx;\n"
115 "movl %%ecx,%%eax; subl (%%esi),%%eax; movl %%eax,(%%esi);\n"
116 "movl %%ecx,%%eax; sbbl 4(%%esi),%%eax; movl %%eax,4(%%esi);\n"
117 "movl %%ecx,%%eax; sbbl 8(%%esi),%%eax; movl %%eax,8(%%esi);\n"
118 :"=g" (*x):"g" (x):"si","ax","cx");
119}
120
121#endif /* _POLY_H */
diff --git a/arch/i386/math-emu/poly_2xm1.c b/arch/i386/math-emu/poly_2xm1.c
new file mode 100644
index 000000000000..9766ad5e9743
--- /dev/null
+++ b/arch/i386/math-emu/poly_2xm1.c
@@ -0,0 +1,156 @@
1/*---------------------------------------------------------------------------+
2 | poly_2xm1.c |
3 | |
4 | Function to compute 2^x-1 by a polynomial approximation. |
5 | |
6 | Copyright (C) 1992,1993,1994,1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
8 | E-mail billm@suburbia.net |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13#include "exception.h"
14#include "reg_constant.h"
15#include "fpu_emu.h"
16#include "fpu_system.h"
17#include "control_w.h"
18#include "poly.h"
19
20
21#define HIPOWER 11
22static const unsigned long long lterms[HIPOWER] =
23{
24 0x0000000000000000LL, /* This term done separately as 12 bytes */
25 0xf5fdeffc162c7543LL,
26 0x1c6b08d704a0bfa6LL,
27 0x0276556df749cc21LL,
28 0x002bb0ffcf14f6b8LL,
29 0x0002861225ef751cLL,
30 0x00001ffcbfcd5422LL,
31 0x00000162c005d5f1LL,
32 0x0000000da96ccb1bLL,
33 0x0000000078d1b897LL,
34 0x000000000422b029LL
35};
36
37static const Xsig hiterm = MK_XSIG(0xb17217f7, 0xd1cf79ab, 0xc8a39194);
38
39/* Four slices: 0.0 : 0.25 : 0.50 : 0.75 : 1.0,
40 These numbers are 2^(1/4), 2^(1/2), and 2^(3/4)
41 */
42static const Xsig shiftterm0 = MK_XSIG(0, 0, 0);
43static const Xsig shiftterm1 = MK_XSIG(0x9837f051, 0x8db8a96f, 0x46ad2318);
44static const Xsig shiftterm2 = MK_XSIG(0xb504f333, 0xf9de6484, 0x597d89b3);
45static const Xsig shiftterm3 = MK_XSIG(0xd744fcca, 0xd69d6af4, 0x39a68bb9);
46
47static const Xsig *shiftterm[] = { &shiftterm0, &shiftterm1,
48 &shiftterm2, &shiftterm3 };
49
50
51/*--- poly_2xm1() -----------------------------------------------------------+
52 | Requires st(0) which is TAG_Valid and < 1. |
53 +---------------------------------------------------------------------------*/
54int poly_2xm1(u_char sign, FPU_REG *arg, FPU_REG *result)
55{
56 long int exponent, shift;
57 unsigned long long Xll;
58 Xsig accumulator, Denom, argSignif;
59 u_char tag;
60
61 exponent = exponent16(arg);
62
63#ifdef PARANOID
64 if ( exponent >= 0 ) /* Don't want a |number| >= 1.0 */
65 {
66 /* Number negative, too large, or not Valid. */
67 EXCEPTION(EX_INTERNAL|0x127);
68 return 1;
69 }
70#endif /* PARANOID */
71
72 argSignif.lsw = 0;
73 XSIG_LL(argSignif) = Xll = significand(arg);
74
75 if ( exponent == -1 )
76 {
77 shift = (argSignif.msw & 0x40000000) ? 3 : 2;
78 /* subtract 0.5 or 0.75 */
79 exponent -= 2;
80 XSIG_LL(argSignif) <<= 2;
81 Xll <<= 2;
82 }
83 else if ( exponent == -2 )
84 {
85 shift = 1;
86 /* subtract 0.25 */
87 exponent--;
88 XSIG_LL(argSignif) <<= 1;
89 Xll <<= 1;
90 }
91 else
92 shift = 0;
93
94 if ( exponent < -2 )
95 {
96 /* Shift the argument right by the required places. */
97 if ( FPU_shrx(&Xll, -2-exponent) >= 0x80000000U )
98 Xll++; /* round up */
99 }
100
101 accumulator.lsw = accumulator.midw = accumulator.msw = 0;
102 polynomial_Xsig(&accumulator, &Xll, lterms, HIPOWER-1);
103 mul_Xsig_Xsig(&accumulator, &argSignif);
104 shr_Xsig(&accumulator, 3);
105
106 mul_Xsig_Xsig(&argSignif, &hiterm); /* The leading term */
107 add_two_Xsig(&accumulator, &argSignif, &exponent);
108
109 if ( shift )
110 {
111 /* The argument is large, use the identity:
112 f(x+a) = f(a) * (f(x) + 1) - 1;
113 */
114 shr_Xsig(&accumulator, - exponent);
115 accumulator.msw |= 0x80000000; /* add 1.0 */
116 mul_Xsig_Xsig(&accumulator, shiftterm[shift]);
117 accumulator.msw &= 0x3fffffff; /* subtract 1.0 */
118 exponent = 1;
119 }
120
121 if ( sign != SIGN_POS )
122 {
123 /* The argument is negative, use the identity:
124 f(-x) = -f(x) / (1 + f(x))
125 */
126 Denom.lsw = accumulator.lsw;
127 XSIG_LL(Denom) = XSIG_LL(accumulator);
128 if ( exponent < 0 )
129 shr_Xsig(&Denom, - exponent);
130 else if ( exponent > 0 )
131 {
132 /* exponent must be 1 here */
133 XSIG_LL(Denom) <<= 1;
134 if ( Denom.lsw & 0x80000000 )
135 XSIG_LL(Denom) |= 1;
136 (Denom.lsw) <<= 1;
137 }
138 Denom.msw |= 0x80000000; /* add 1.0 */
139 div_Xsig(&accumulator, &Denom, &accumulator);
140 }
141
142 /* Convert to 64 bit signed-compatible */
143 exponent += round_Xsig(&accumulator);
144
145 result = &st(0);
146 significand(result) = XSIG_LL(accumulator);
147 setexponent16(result, exponent);
148
149 tag = FPU_round(result, 1, 0, FULL_PRECISION, sign);
150
151 setsign(result, sign);
152 FPU_settag0(tag);
153
154 return 0;
155
156}
diff --git a/arch/i386/math-emu/poly_atan.c b/arch/i386/math-emu/poly_atan.c
new file mode 100644
index 000000000000..82f702952f69
--- /dev/null
+++ b/arch/i386/math-emu/poly_atan.c
@@ -0,0 +1,229 @@
1/*---------------------------------------------------------------------------+
2 | poly_atan.c |
3 | |
4 | Compute the arctan of a FPU_REG, using a polynomial approximation. |
5 | |
6 | Copyright (C) 1992,1993,1994,1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
8 | E-mail billm@suburbia.net |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13#include "exception.h"
14#include "reg_constant.h"
15#include "fpu_emu.h"
16#include "fpu_system.h"
17#include "status_w.h"
18#include "control_w.h"
19#include "poly.h"
20
21
22#define HIPOWERon 6 /* odd poly, negative terms */
23static const unsigned long long oddnegterms[HIPOWERon] =
24{
25 0x0000000000000000LL, /* Dummy (not for - 1.0) */
26 0x015328437f756467LL,
27 0x0005dda27b73dec6LL,
28 0x0000226bf2bfb91aLL,
29 0x000000ccc439c5f7LL,
30 0x0000000355438407LL
31} ;
32
33#define HIPOWERop 6 /* odd poly, positive terms */
34static const unsigned long long oddplterms[HIPOWERop] =
35{
36/* 0xaaaaaaaaaaaaaaabLL, transferred to fixedpterm[] */
37 0x0db55a71875c9ac2LL,
38 0x0029fce2d67880b0LL,
39 0x0000dfd3908b4596LL,
40 0x00000550fd61dab4LL,
41 0x0000001c9422b3f9LL,
42 0x000000003e3301e1LL
43};
44
45static const unsigned long long denomterm = 0xebd9b842c5c53a0eLL;
46
47static const Xsig fixedpterm = MK_XSIG(0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa);
48
49static const Xsig pi_signif = MK_XSIG(0xc90fdaa2, 0x2168c234, 0xc4c6628b);
50
51
52/*--- poly_atan() -----------------------------------------------------------+
53 | |
54 +---------------------------------------------------------------------------*/
55void poly_atan(FPU_REG *st0_ptr, u_char st0_tag,
56 FPU_REG *st1_ptr, u_char st1_tag)
57{
58 u_char transformed, inverted,
59 sign1, sign2;
60 int exponent;
61 long int dummy_exp;
62 Xsig accumulator, Numer, Denom, accumulatore, argSignif,
63 argSq, argSqSq;
64 u_char tag;
65
66 sign1 = getsign(st0_ptr);
67 sign2 = getsign(st1_ptr);
68 if ( st0_tag == TAG_Valid )
69 {
70 exponent = exponent(st0_ptr);
71 }
72 else
73 {
74 /* This gives non-compatible stack contents... */
75 FPU_to_exp16(st0_ptr, st0_ptr);
76 exponent = exponent16(st0_ptr);
77 }
78 if ( st1_tag == TAG_Valid )
79 {
80 exponent -= exponent(st1_ptr);
81 }
82 else
83 {
84 /* This gives non-compatible stack contents... */
85 FPU_to_exp16(st1_ptr, st1_ptr);
86 exponent -= exponent16(st1_ptr);
87 }
88
89 if ( (exponent < 0) || ((exponent == 0) &&
90 ((st0_ptr->sigh < st1_ptr->sigh) ||
91 ((st0_ptr->sigh == st1_ptr->sigh) &&
92 (st0_ptr->sigl < st1_ptr->sigl))) ) )
93 {
94 inverted = 1;
95 Numer.lsw = Denom.lsw = 0;
96 XSIG_LL(Numer) = significand(st0_ptr);
97 XSIG_LL(Denom) = significand(st1_ptr);
98 }
99 else
100 {
101 inverted = 0;
102 exponent = -exponent;
103 Numer.lsw = Denom.lsw = 0;
104 XSIG_LL(Numer) = significand(st1_ptr);
105 XSIG_LL(Denom) = significand(st0_ptr);
106 }
107 div_Xsig(&Numer, &Denom, &argSignif);
108 exponent += norm_Xsig(&argSignif);
109
110 if ( (exponent >= -1)
111 || ((exponent == -2) && (argSignif.msw > 0xd413ccd0)) )
112 {
113 /* The argument is greater than sqrt(2)-1 (=0.414213562...) */
114 /* Convert the argument by an identity for atan */
115 transformed = 1;
116
117 if ( exponent >= 0 )
118 {
119#ifdef PARANOID
120 if ( !( (exponent == 0) &&
121 (argSignif.lsw == 0) && (argSignif.midw == 0) &&
122 (argSignif.msw == 0x80000000) ) )
123 {
124 EXCEPTION(EX_INTERNAL|0x104); /* There must be a logic error */
125 return;
126 }
127#endif /* PARANOID */
128 argSignif.msw = 0; /* Make the transformed arg -> 0.0 */
129 }
130 else
131 {
132 Numer.lsw = Denom.lsw = argSignif.lsw;
133 XSIG_LL(Numer) = XSIG_LL(Denom) = XSIG_LL(argSignif);
134
135 if ( exponent < -1 )
136 shr_Xsig(&Numer, -1-exponent);
137 negate_Xsig(&Numer);
138
139 shr_Xsig(&Denom, -exponent);
140 Denom.msw |= 0x80000000;
141
142 div_Xsig(&Numer, &Denom, &argSignif);
143
144 exponent = -1 + norm_Xsig(&argSignif);
145 }
146 }
147 else
148 {
149 transformed = 0;
150 }
151
152 argSq.lsw = argSignif.lsw; argSq.midw = argSignif.midw;
153 argSq.msw = argSignif.msw;
154 mul_Xsig_Xsig(&argSq, &argSq);
155
156 argSqSq.lsw = argSq.lsw; argSqSq.midw = argSq.midw; argSqSq.msw = argSq.msw;
157 mul_Xsig_Xsig(&argSqSq, &argSqSq);
158
159 accumulatore.lsw = argSq.lsw;
160 XSIG_LL(accumulatore) = XSIG_LL(argSq);
161
162 shr_Xsig(&argSq, 2*(-1-exponent-1));
163 shr_Xsig(&argSqSq, 4*(-1-exponent-1));
164
165 /* Now have argSq etc with binary point at the left
166 .1xxxxxxxx */
167
168 /* Do the basic fixed point polynomial evaluation */
169 accumulator.msw = accumulator.midw = accumulator.lsw = 0;
170 polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq),
171 oddplterms, HIPOWERop-1);
172 mul64_Xsig(&accumulator, &XSIG_LL(argSq));
173 negate_Xsig(&accumulator);
174 polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq), oddnegterms, HIPOWERon-1);
175 negate_Xsig(&accumulator);
176 add_two_Xsig(&accumulator, &fixedpterm, &dummy_exp);
177
178 mul64_Xsig(&accumulatore, &denomterm);
179 shr_Xsig(&accumulatore, 1 + 2*(-1-exponent));
180 accumulatore.msw |= 0x80000000;
181
182 div_Xsig(&accumulator, &accumulatore, &accumulator);
183
184 mul_Xsig_Xsig(&accumulator, &argSignif);
185 mul_Xsig_Xsig(&accumulator, &argSq);
186
187 shr_Xsig(&accumulator, 3);
188 negate_Xsig(&accumulator);
189 add_Xsig_Xsig(&accumulator, &argSignif);
190
191 if ( transformed )
192 {
193 /* compute pi/4 - accumulator */
194 shr_Xsig(&accumulator, -1-exponent);
195 negate_Xsig(&accumulator);
196 add_Xsig_Xsig(&accumulator, &pi_signif);
197 exponent = -1;
198 }
199
200 if ( inverted )
201 {
202 /* compute pi/2 - accumulator */
203 shr_Xsig(&accumulator, -exponent);
204 negate_Xsig(&accumulator);
205 add_Xsig_Xsig(&accumulator, &pi_signif);
206 exponent = 0;
207 }
208
209 if ( sign1 )
210 {
211 /* compute pi - accumulator */
212 shr_Xsig(&accumulator, 1 - exponent);
213 negate_Xsig(&accumulator);
214 add_Xsig_Xsig(&accumulator, &pi_signif);
215 exponent = 1;
216 }
217
218 exponent += round_Xsig(&accumulator);
219
220 significand(st1_ptr) = XSIG_LL(accumulator);
221 setexponent16(st1_ptr, exponent);
222
223 tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign2);
224 FPU_settagi(1, tag);
225
226 set_precision_flag_up(); /* We do not really know if up or down,
227 use this as the default. */
228
229}
diff --git a/arch/i386/math-emu/poly_l2.c b/arch/i386/math-emu/poly_l2.c
new file mode 100644
index 000000000000..dd00e1d5b074
--- /dev/null
+++ b/arch/i386/math-emu/poly_l2.c
@@ -0,0 +1,272 @@
1/*---------------------------------------------------------------------------+
2 | poly_l2.c |
3 | |
4 | Compute the base 2 log of a FPU_REG, using a polynomial approximation. |
5 | |
6 | Copyright (C) 1992,1993,1994,1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
8 | E-mail billm@suburbia.net |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13
14#include "exception.h"
15#include "reg_constant.h"
16#include "fpu_emu.h"
17#include "fpu_system.h"
18#include "control_w.h"
19#include "poly.h"
20
21
22static void log2_kernel(FPU_REG const *arg, u_char argsign,
23 Xsig *accum_result, long int *expon);
24
25
26/*--- poly_l2() -------------------------------------------------------------+
27 | Base 2 logarithm by a polynomial approximation. |
28 +---------------------------------------------------------------------------*/
29void poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign)
30{
31 long int exponent, expon, expon_expon;
32 Xsig accumulator, expon_accum, yaccum;
33 u_char sign, argsign;
34 FPU_REG x;
35 int tag;
36
37 exponent = exponent16(st0_ptr);
38
39 /* From st0_ptr, make a number > sqrt(2)/2 and < sqrt(2) */
40 if ( st0_ptr->sigh > (unsigned)0xb504f334 )
41 {
42 /* Treat as sqrt(2)/2 < st0_ptr < 1 */
43 significand(&x) = - significand(st0_ptr);
44 setexponent16(&x, -1);
45 exponent++;
46 argsign = SIGN_NEG;
47 }
48 else
49 {
50 /* Treat as 1 <= st0_ptr < sqrt(2) */
51 x.sigh = st0_ptr->sigh - 0x80000000;
52 x.sigl = st0_ptr->sigl;
53 setexponent16(&x, 0);
54 argsign = SIGN_POS;
55 }
56 tag = FPU_normalize_nuo(&x);
57
58 if ( tag == TAG_Zero )
59 {
60 expon = 0;
61 accumulator.msw = accumulator.midw = accumulator.lsw = 0;
62 }
63 else
64 {
65 log2_kernel(&x, argsign, &accumulator, &expon);
66 }
67
68 if ( exponent < 0 )
69 {
70 sign = SIGN_NEG;
71 exponent = -exponent;
72 }
73 else
74 sign = SIGN_POS;
75 expon_accum.msw = exponent; expon_accum.midw = expon_accum.lsw = 0;
76 if ( exponent )
77 {
78 expon_expon = 31 + norm_Xsig(&expon_accum);
79 shr_Xsig(&accumulator, expon_expon - expon);
80
81 if ( sign ^ argsign )
82 negate_Xsig(&accumulator);
83 add_Xsig_Xsig(&accumulator, &expon_accum);
84 }
85 else
86 {
87 expon_expon = expon;
88 sign = argsign;
89 }
90
91 yaccum.lsw = 0; XSIG_LL(yaccum) = significand(st1_ptr);
92 mul_Xsig_Xsig(&accumulator, &yaccum);
93
94 expon_expon += round_Xsig(&accumulator);
95
96 if ( accumulator.msw == 0 )
97 {
98 FPU_copy_to_reg1(&CONST_Z, TAG_Zero);
99 return;
100 }
101
102 significand(st1_ptr) = XSIG_LL(accumulator);
103 setexponent16(st1_ptr, expon_expon + exponent16(st1_ptr) + 1);
104
105 tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign ^ st1_sign);
106 FPU_settagi(1, tag);
107
108 set_precision_flag_up(); /* 80486 appears to always do this */
109
110 return;
111
112}
113
114
115/*--- poly_l2p1() -----------------------------------------------------------+
116 | Base 2 logarithm by a polynomial approximation. |
117 | log2(x+1) |
118 +---------------------------------------------------------------------------*/
119int poly_l2p1(u_char sign0, u_char sign1,
120 FPU_REG *st0_ptr, FPU_REG *st1_ptr, FPU_REG *dest)
121{
122 u_char tag;
123 long int exponent;
124 Xsig accumulator, yaccum;
125
126 if ( exponent16(st0_ptr) < 0 )
127 {
128 log2_kernel(st0_ptr, sign0, &accumulator, &exponent);
129
130 yaccum.lsw = 0;
131 XSIG_LL(yaccum) = significand(st1_ptr);
132 mul_Xsig_Xsig(&accumulator, &yaccum);
133
134 exponent += round_Xsig(&accumulator);
135
136 exponent += exponent16(st1_ptr) + 1;
137 if ( exponent < EXP_WAY_UNDER ) exponent = EXP_WAY_UNDER;
138
139 significand(dest) = XSIG_LL(accumulator);
140 setexponent16(dest, exponent);
141
142 tag = FPU_round(dest, 1, 0, FULL_PRECISION, sign0 ^ sign1);
143 FPU_settagi(1, tag);
144
145 if ( tag == TAG_Valid )
146 set_precision_flag_up(); /* 80486 appears to always do this */
147 }
148 else
149 {
150 /* The magnitude of st0_ptr is far too large. */
151
152 if ( sign0 != SIGN_POS )
153 {
154 /* Trying to get the log of a negative number. */
155#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */
156 changesign(st1_ptr);
157#else
158 if ( arith_invalid(1) < 0 )
159 return 1;
160#endif /* PECULIAR_486 */
161 }
162
163 /* 80486 appears to do this */
164 if ( sign0 == SIGN_NEG )
165 set_precision_flag_down();
166 else
167 set_precision_flag_up();
168 }
169
170 if ( exponent(dest) <= EXP_UNDER )
171 EXCEPTION(EX_Underflow);
172
173 return 0;
174
175}
176
177
178
179
180#undef HIPOWER
181#define HIPOWER 10
182static const unsigned long long logterms[HIPOWER] =
183{
184 0x2a8eca5705fc2ef0LL,
185 0xf6384ee1d01febceLL,
186 0x093bb62877cdf642LL,
187 0x006985d8a9ec439bLL,
188 0x0005212c4f55a9c8LL,
189 0x00004326a16927f0LL,
190 0x0000038d1d80a0e7LL,
191 0x0000003141cc80c6LL,
192 0x00000002b1668c9fLL,
193 0x000000002c7a46aaLL
194};
195
196static const unsigned long leadterm = 0xb8000000;
197
198
199/*--- log2_kernel() ---------------------------------------------------------+
200 | Base 2 logarithm by a polynomial approximation. |
201 | log2(x+1) |
202 +---------------------------------------------------------------------------*/
203static void log2_kernel(FPU_REG const *arg, u_char argsign, Xsig *accum_result,
204 long int *expon)
205{
206 long int exponent, adj;
207 unsigned long long Xsq;
208 Xsig accumulator, Numer, Denom, argSignif, arg_signif;
209
210 exponent = exponent16(arg);
211 Numer.lsw = Denom.lsw = 0;
212 XSIG_LL(Numer) = XSIG_LL(Denom) = significand(arg);
213 if ( argsign == SIGN_POS )
214 {
215 shr_Xsig(&Denom, 2 - (1 + exponent));
216 Denom.msw |= 0x80000000;
217 div_Xsig(&Numer, &Denom, &argSignif);
218 }
219 else
220 {
221 shr_Xsig(&Denom, 1 - (1 + exponent));
222 negate_Xsig(&Denom);
223 if ( Denom.msw & 0x80000000 )
224 {
225 div_Xsig(&Numer, &Denom, &argSignif);
226 exponent ++;
227 }
228 else
229 {
230 /* Denom must be 1.0 */
231 argSignif.lsw = Numer.lsw; argSignif.midw = Numer.midw;
232 argSignif.msw = Numer.msw;
233 }
234 }
235
236#ifndef PECULIAR_486
237 /* Should check here that |local_arg| is within the valid range */
238 if ( exponent >= -2 )
239 {
240 if ( (exponent > -2) ||
241 (argSignif.msw > (unsigned)0xafb0ccc0) )
242 {
243 /* The argument is too large */
244 }
245 }
246#endif /* PECULIAR_486 */
247
248 arg_signif.lsw = argSignif.lsw; XSIG_LL(arg_signif) = XSIG_LL(argSignif);
249 adj = norm_Xsig(&argSignif);
250 accumulator.lsw = argSignif.lsw; XSIG_LL(accumulator) = XSIG_LL(argSignif);
251 mul_Xsig_Xsig(&accumulator, &accumulator);
252 shr_Xsig(&accumulator, 2*(-1 - (1 + exponent + adj)));
253 Xsq = XSIG_LL(accumulator);
254 if ( accumulator.lsw & 0x80000000 )
255 Xsq++;
256
257 accumulator.msw = accumulator.midw = accumulator.lsw = 0;
258 /* Do the basic fixed point polynomial evaluation */
259 polynomial_Xsig(&accumulator, &Xsq, logterms, HIPOWER-1);
260
261 mul_Xsig_Xsig(&accumulator, &argSignif);
262 shr_Xsig(&accumulator, 6 - adj);
263
264 mul32_Xsig(&arg_signif, leadterm);
265 add_two_Xsig(&accumulator, &arg_signif, &exponent);
266
267 *expon = exponent + 1;
268 accum_result->lsw = accumulator.lsw;
269 accum_result->midw = accumulator.midw;
270 accum_result->msw = accumulator.msw;
271
272}
diff --git a/arch/i386/math-emu/poly_sin.c b/arch/i386/math-emu/poly_sin.c
new file mode 100644
index 000000000000..a36313fb06f1
--- /dev/null
+++ b/arch/i386/math-emu/poly_sin.c
@@ -0,0 +1,397 @@
1/*---------------------------------------------------------------------------+
2 | poly_sin.c |
3 | |
4 | Computation of an approximation of the sin function and the cosine |
5 | function by a polynomial. |
6 | |
7 | Copyright (C) 1992,1993,1994,1997,1999 |
8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
9 | E-mail billm@melbpc.org.au |
10 | |
11 | |
12 +---------------------------------------------------------------------------*/
13
14
15#include "exception.h"
16#include "reg_constant.h"
17#include "fpu_emu.h"
18#include "fpu_system.h"
19#include "control_w.h"
20#include "poly.h"
21
22
23#define N_COEFF_P 4
24#define N_COEFF_N 4
25
26static const unsigned long long pos_terms_l[N_COEFF_P] =
27{
28 0xaaaaaaaaaaaaaaabLL,
29 0x00d00d00d00cf906LL,
30 0x000006b99159a8bbLL,
31 0x000000000d7392e6LL
32};
33
34static const unsigned long long neg_terms_l[N_COEFF_N] =
35{
36 0x2222222222222167LL,
37 0x0002e3bc74aab624LL,
38 0x0000000b09229062LL,
39 0x00000000000c7973LL
40};
41
42
43
44#define N_COEFF_PH 4
45#define N_COEFF_NH 4
46static const unsigned long long pos_terms_h[N_COEFF_PH] =
47{
48 0x0000000000000000LL,
49 0x05b05b05b05b0406LL,
50 0x000049f93edd91a9LL,
51 0x00000000c9c9ed62LL
52};
53
54static const unsigned long long neg_terms_h[N_COEFF_NH] =
55{
56 0xaaaaaaaaaaaaaa98LL,
57 0x001a01a01a019064LL,
58 0x0000008f76c68a77LL,
59 0x0000000000d58f5eLL
60};
61
62
63/*--- poly_sine() -----------------------------------------------------------+
64 | |
65 +---------------------------------------------------------------------------*/
66void poly_sine(FPU_REG *st0_ptr)
67{
68 int exponent, echange;
69 Xsig accumulator, argSqrd, argTo4;
70 unsigned long fix_up, adj;
71 unsigned long long fixed_arg;
72 FPU_REG result;
73
74 exponent = exponent(st0_ptr);
75
76 accumulator.lsw = accumulator.midw = accumulator.msw = 0;
77
78 /* Split into two ranges, for arguments below and above 1.0 */
79 /* The boundary between upper and lower is approx 0.88309101259 */
80 if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa)) )
81 {
82 /* The argument is <= 0.88309101259 */
83
84 argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl; argSqrd.lsw = 0;
85 mul64_Xsig(&argSqrd, &significand(st0_ptr));
86 shr_Xsig(&argSqrd, 2*(-1-exponent));
87 argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
88 argTo4.lsw = argSqrd.lsw;
89 mul_Xsig_Xsig(&argTo4, &argTo4);
90
91 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
92 N_COEFF_N-1);
93 mul_Xsig_Xsig(&accumulator, &argSqrd);
94 negate_Xsig(&accumulator);
95
96 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
97 N_COEFF_P-1);
98
99 shr_Xsig(&accumulator, 2); /* Divide by four */
100 accumulator.msw |= 0x80000000; /* Add 1.0 */
101
102 mul64_Xsig(&accumulator, &significand(st0_ptr));
103 mul64_Xsig(&accumulator, &significand(st0_ptr));
104 mul64_Xsig(&accumulator, &significand(st0_ptr));
105
106 /* Divide by four, FPU_REG compatible, etc */
107 exponent = 3*exponent;
108
109 /* The minimum exponent difference is 3 */
110 shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
111
112 negate_Xsig(&accumulator);
113 XSIG_LL(accumulator) += significand(st0_ptr);
114
115 echange = round_Xsig(&accumulator);
116
117 setexponentpos(&result, exponent(st0_ptr) + echange);
118 }
119 else
120 {
121 /* The argument is > 0.88309101259 */
122 /* We use sin(st(0)) = cos(pi/2-st(0)) */
123
124 fixed_arg = significand(st0_ptr);
125
126 if ( exponent == 0 )
127 {
128 /* The argument is >= 1.0 */
129
130 /* Put the binary point at the left. */
131 fixed_arg <<= 1;
132 }
133 /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
134 fixed_arg = 0x921fb54442d18469LL - fixed_arg;
135 /* There is a special case which arises due to rounding, to fix here. */
136 if ( fixed_arg == 0xffffffffffffffffLL )
137 fixed_arg = 0;
138
139 XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0;
140 mul64_Xsig(&argSqrd, &fixed_arg);
141
142 XSIG_LL(argTo4) = XSIG_LL(argSqrd); argTo4.lsw = argSqrd.lsw;
143 mul_Xsig_Xsig(&argTo4, &argTo4);
144
145 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
146 N_COEFF_NH-1);
147 mul_Xsig_Xsig(&accumulator, &argSqrd);
148 negate_Xsig(&accumulator);
149
150 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
151 N_COEFF_PH-1);
152 negate_Xsig(&accumulator);
153
154 mul64_Xsig(&accumulator, &fixed_arg);
155 mul64_Xsig(&accumulator, &fixed_arg);
156
157 shr_Xsig(&accumulator, 3);
158 negate_Xsig(&accumulator);
159
160 add_Xsig_Xsig(&accumulator, &argSqrd);
161
162 shr_Xsig(&accumulator, 1);
163
164 accumulator.lsw |= 1; /* A zero accumulator here would cause problems */
165 negate_Xsig(&accumulator);
166
167 /* The basic computation is complete. Now fix the answer to
168 compensate for the error due to the approximation used for
169 pi/2
170 */
171
172 /* This has an exponent of -65 */
173 fix_up = 0x898cc517;
174 /* The fix-up needs to be improved for larger args */
175 if ( argSqrd.msw & 0xffc00000 )
176 {
177 /* Get about 32 bit precision in these: */
178 fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
179 }
180 fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
181
182 adj = accumulator.lsw; /* temp save */
183 accumulator.lsw -= fix_up;
184 if ( accumulator.lsw > adj )
185 XSIG_LL(accumulator) --;
186
187 echange = round_Xsig(&accumulator);
188
189 setexponentpos(&result, echange - 1);
190 }
191
192 significand(&result) = XSIG_LL(accumulator);
193 setsign(&result, getsign(st0_ptr));
194 FPU_copy_to_reg0(&result, TAG_Valid);
195
196#ifdef PARANOID
197 if ( (exponent(&result) >= 0)
198 && (significand(&result) > 0x8000000000000000LL) )
199 {
200 EXCEPTION(EX_INTERNAL|0x150);
201 }
202#endif /* PARANOID */
203
204}
205
206
207
208/*--- poly_cos() ------------------------------------------------------------+
209 | |
210 +---------------------------------------------------------------------------*/
211void poly_cos(FPU_REG *st0_ptr)
212{
213 FPU_REG result;
214 long int exponent, exp2, echange;
215 Xsig accumulator, argSqrd, fix_up, argTo4;
216 unsigned long long fixed_arg;
217
218#ifdef PARANOID
219 if ( (exponent(st0_ptr) > 0)
220 || ((exponent(st0_ptr) == 0)
221 && (significand(st0_ptr) > 0xc90fdaa22168c234LL)) )
222 {
223 EXCEPTION(EX_Invalid);
224 FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
225 return;
226 }
227#endif /* PARANOID */
228
229 exponent = exponent(st0_ptr);
230
231 accumulator.lsw = accumulator.midw = accumulator.msw = 0;
232
233 if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54)) )
234 {
235 /* arg is < 0.687705 */
236
237 argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl;
238 argSqrd.lsw = 0;
239 mul64_Xsig(&argSqrd, &significand(st0_ptr));
240
241 if ( exponent < -1 )
242 {
243 /* shift the argument right by the required places */
244 shr_Xsig(&argSqrd, 2*(-1-exponent));
245 }
246
247 argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
248 argTo4.lsw = argSqrd.lsw;
249 mul_Xsig_Xsig(&argTo4, &argTo4);
250
251 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
252 N_COEFF_NH-1);
253 mul_Xsig_Xsig(&accumulator, &argSqrd);
254 negate_Xsig(&accumulator);
255
256 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
257 N_COEFF_PH-1);
258 negate_Xsig(&accumulator);
259
260 mul64_Xsig(&accumulator, &significand(st0_ptr));
261 mul64_Xsig(&accumulator, &significand(st0_ptr));
262 shr_Xsig(&accumulator, -2*(1+exponent));
263
264 shr_Xsig(&accumulator, 3);
265 negate_Xsig(&accumulator);
266
267 add_Xsig_Xsig(&accumulator, &argSqrd);
268
269 shr_Xsig(&accumulator, 1);
270
271 /* It doesn't matter if accumulator is all zero here, the
272 following code will work ok */
273 negate_Xsig(&accumulator);
274
275 if ( accumulator.lsw & 0x80000000 )
276 XSIG_LL(accumulator) ++;
277 if ( accumulator.msw == 0 )
278 {
279 /* The result is 1.0 */
280 FPU_copy_to_reg0(&CONST_1, TAG_Valid);
281 return;
282 }
283 else
284 {
285 significand(&result) = XSIG_LL(accumulator);
286
287 /* will be a valid positive nr with expon = -1 */
288 setexponentpos(&result, -1);
289 }
290 }
291 else
292 {
293 fixed_arg = significand(st0_ptr);
294
295 if ( exponent == 0 )
296 {
297 /* The argument is >= 1.0 */
298
299 /* Put the binary point at the left. */
300 fixed_arg <<= 1;
301 }
302 /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
303 fixed_arg = 0x921fb54442d18469LL - fixed_arg;
304 /* There is a special case which arises due to rounding, to fix here. */
305 if ( fixed_arg == 0xffffffffffffffffLL )
306 fixed_arg = 0;
307
308 exponent = -1;
309 exp2 = -1;
310
311 /* A shift is needed here only for a narrow range of arguments,
312 i.e. for fixed_arg approx 2^-32, but we pick up more... */
313 if ( !(LL_MSW(fixed_arg) & 0xffff0000) )
314 {
315 fixed_arg <<= 16;
316 exponent -= 16;
317 exp2 -= 16;
318 }
319
320 XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0;
321 mul64_Xsig(&argSqrd, &fixed_arg);
322
323 if ( exponent < -1 )
324 {
325 /* shift the argument right by the required places */
326 shr_Xsig(&argSqrd, 2*(-1-exponent));
327 }
328
329 argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
330 argTo4.lsw = argSqrd.lsw;
331 mul_Xsig_Xsig(&argTo4, &argTo4);
332
333 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
334 N_COEFF_N-1);
335 mul_Xsig_Xsig(&accumulator, &argSqrd);
336 negate_Xsig(&accumulator);
337
338 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
339 N_COEFF_P-1);
340
341 shr_Xsig(&accumulator, 2); /* Divide by four */
342 accumulator.msw |= 0x80000000; /* Add 1.0 */
343
344 mul64_Xsig(&accumulator, &fixed_arg);
345 mul64_Xsig(&accumulator, &fixed_arg);
346 mul64_Xsig(&accumulator, &fixed_arg);
347
348 /* Divide by four, FPU_REG compatible, etc */
349 exponent = 3*exponent;
350
351 /* The minimum exponent difference is 3 */
352 shr_Xsig(&accumulator, exp2 - exponent);
353
354 negate_Xsig(&accumulator);
355 XSIG_LL(accumulator) += fixed_arg;
356
357 /* The basic computation is complete. Now fix the answer to
358 compensate for the error due to the approximation used for
359 pi/2
360 */
361
362 /* This has an exponent of -65 */
363 XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
364 fix_up.lsw = 0;
365
366 /* The fix-up needs to be improved for larger args */
367 if ( argSqrd.msw & 0xffc00000 )
368 {
369 /* Get about 32 bit precision in these: */
370 fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
371 fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
372 }
373
374 exp2 += norm_Xsig(&accumulator);
375 shr_Xsig(&accumulator, 1); /* Prevent overflow */
376 exp2++;
377 shr_Xsig(&fix_up, 65 + exp2);
378
379 add_Xsig_Xsig(&accumulator, &fix_up);
380
381 echange = round_Xsig(&accumulator);
382
383 setexponentpos(&result, exp2 + echange);
384 significand(&result) = XSIG_LL(accumulator);
385 }
386
387 FPU_copy_to_reg0(&result, TAG_Valid);
388
389#ifdef PARANOID
390 if ( (exponent(&result) >= 0)
391 && (significand(&result) > 0x8000000000000000LL) )
392 {
393 EXCEPTION(EX_INTERNAL|0x151);
394 }
395#endif /* PARANOID */
396
397}
diff --git a/arch/i386/math-emu/poly_tan.c b/arch/i386/math-emu/poly_tan.c
new file mode 100644
index 000000000000..8df3e03b6e6f
--- /dev/null
+++ b/arch/i386/math-emu/poly_tan.c
@@ -0,0 +1,222 @@
1/*---------------------------------------------------------------------------+
2 | poly_tan.c |
3 | |
4 | Compute the tan of a FPU_REG, using a polynomial approximation. |
5 | |
6 | Copyright (C) 1992,1993,1994,1997,1999 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
8 | Australia. E-mail billm@melbpc.org.au |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13#include "exception.h"
14#include "reg_constant.h"
15#include "fpu_emu.h"
16#include "fpu_system.h"
17#include "control_w.h"
18#include "poly.h"
19
20
21#define HiPOWERop 3 /* odd poly, positive terms */
22static const unsigned long long oddplterm[HiPOWERop] =
23{
24 0x0000000000000000LL,
25 0x0051a1cf08fca228LL,
26 0x0000000071284ff7LL
27};
28
29#define HiPOWERon 2 /* odd poly, negative terms */
30static const unsigned long long oddnegterm[HiPOWERon] =
31{
32 0x1291a9a184244e80LL,
33 0x0000583245819c21LL
34};
35
36#define HiPOWERep 2 /* even poly, positive terms */
37static const unsigned long long evenplterm[HiPOWERep] =
38{
39 0x0e848884b539e888LL,
40 0x00003c7f18b887daLL
41};
42
43#define HiPOWERen 2 /* even poly, negative terms */
44static const unsigned long long evennegterm[HiPOWERen] =
45{
46 0xf1f0200fd51569ccLL,
47 0x003afb46105c4432LL
48};
49
50static const unsigned long long twothirds = 0xaaaaaaaaaaaaaaabLL;
51
52
53/*--- poly_tan() ------------------------------------------------------------+
54 | |
55 +---------------------------------------------------------------------------*/
56void poly_tan(FPU_REG *st0_ptr)
57{
58 long int exponent;
59 int invert;
60 Xsig argSq, argSqSq, accumulatoro, accumulatore, accum,
61 argSignif, fix_up;
62 unsigned long adj;
63
64 exponent = exponent(st0_ptr);
65
66#ifdef PARANOID
67 if ( signnegative(st0_ptr) ) /* Can't hack a number < 0.0 */
68 { arith_invalid(0); return; } /* Need a positive number */
69#endif /* PARANOID */
70
71 /* Split the problem into two domains, smaller and larger than pi/4 */
72 if ( (exponent == 0) || ((exponent == -1) && (st0_ptr->sigh > 0xc90fdaa2)) )
73 {
74 /* The argument is greater than (approx) pi/4 */
75 invert = 1;
76 accum.lsw = 0;
77 XSIG_LL(accum) = significand(st0_ptr);
78
79 if ( exponent == 0 )
80 {
81 /* The argument is >= 1.0 */
82 /* Put the binary point at the left. */
83 XSIG_LL(accum) <<= 1;
84 }
85 /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
86 XSIG_LL(accum) = 0x921fb54442d18469LL - XSIG_LL(accum);
87 /* This is a special case which arises due to rounding. */
88 if ( XSIG_LL(accum) == 0xffffffffffffffffLL )
89 {
90 FPU_settag0(TAG_Valid);
91 significand(st0_ptr) = 0x8a51e04daabda360LL;
92 setexponent16(st0_ptr, (0x41 + EXTENDED_Ebias) | SIGN_Negative);
93 return;
94 }
95
96 argSignif.lsw = accum.lsw;
97 XSIG_LL(argSignif) = XSIG_LL(accum);
98 exponent = -1 + norm_Xsig(&argSignif);
99 }
100 else
101 {
102 invert = 0;
103 argSignif.lsw = 0;
104 XSIG_LL(accum) = XSIG_LL(argSignif) = significand(st0_ptr);
105
106 if ( exponent < -1 )
107 {
108 /* shift the argument right by the required places */
109 if ( FPU_shrx(&XSIG_LL(accum), -1-exponent) >= 0x80000000U )
110 XSIG_LL(accum) ++; /* round up */
111 }
112 }
113
114 XSIG_LL(argSq) = XSIG_LL(accum); argSq.lsw = accum.lsw;
115 mul_Xsig_Xsig(&argSq, &argSq);
116 XSIG_LL(argSqSq) = XSIG_LL(argSq); argSqSq.lsw = argSq.lsw;
117 mul_Xsig_Xsig(&argSqSq, &argSqSq);
118
119 /* Compute the negative terms for the numerator polynomial */
120 accumulatoro.msw = accumulatoro.midw = accumulatoro.lsw = 0;
121 polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddnegterm, HiPOWERon-1);
122 mul_Xsig_Xsig(&accumulatoro, &argSq);
123 negate_Xsig(&accumulatoro);
124 /* Add the positive terms */
125 polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddplterm, HiPOWERop-1);
126
127
128 /* Compute the positive terms for the denominator polynomial */
129 accumulatore.msw = accumulatore.midw = accumulatore.lsw = 0;
130 polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evenplterm, HiPOWERep-1);
131 mul_Xsig_Xsig(&accumulatore, &argSq);
132 negate_Xsig(&accumulatore);
133 /* Add the negative terms */
134 polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evennegterm, HiPOWERen-1);
135 /* Multiply by arg^2 */
136 mul64_Xsig(&accumulatore, &XSIG_LL(argSignif));
137 mul64_Xsig(&accumulatore, &XSIG_LL(argSignif));
138 /* de-normalize and divide by 2 */
139 shr_Xsig(&accumulatore, -2*(1+exponent) + 1);
140 negate_Xsig(&accumulatore); /* This does 1 - accumulator */
141
142 /* Now find the ratio. */
143 if ( accumulatore.msw == 0 )
144 {
145 /* accumulatoro must contain 1.0 here, (actually, 0) but it
146 really doesn't matter what value we use because it will
147 have negligible effect in later calculations
148 */
149 XSIG_LL(accum) = 0x8000000000000000LL;
150 accum.lsw = 0;
151 }
152 else
153 {
154 div_Xsig(&accumulatoro, &accumulatore, &accum);
155 }
156
157 /* Multiply by 1/3 * arg^3 */
158 mul64_Xsig(&accum, &XSIG_LL(argSignif));
159 mul64_Xsig(&accum, &XSIG_LL(argSignif));
160 mul64_Xsig(&accum, &XSIG_LL(argSignif));
161 mul64_Xsig(&accum, &twothirds);
162 shr_Xsig(&accum, -2*(exponent+1));
163
164 /* tan(arg) = arg + accum */
165 add_two_Xsig(&accum, &argSignif, &exponent);
166
167 if ( invert )
168 {
169 /* We now have the value of tan(pi_2 - arg) where pi_2 is an
170 approximation for pi/2
171 */
172 /* The next step is to fix the answer to compensate for the
173 error due to the approximation used for pi/2
174 */
175
176 /* This is (approx) delta, the error in our approx for pi/2
177 (see above). It has an exponent of -65
178 */
179 XSIG_LL(fix_up) = 0x898cc51701b839a2LL;
180 fix_up.lsw = 0;
181
182 if ( exponent == 0 )
183 adj = 0xffffffff; /* We want approx 1.0 here, but
184 this is close enough. */
185 else if ( exponent > -30 )
186 {
187 adj = accum.msw >> -(exponent+1); /* tan */
188 adj = mul_32_32(adj, adj); /* tan^2 */
189 }
190 else
191 adj = 0;
192 adj = mul_32_32(0x898cc517, adj); /* delta * tan^2 */
193
194 fix_up.msw += adj;
195 if ( !(fix_up.msw & 0x80000000) ) /* did fix_up overflow ? */
196 {
197 /* Yes, we need to add an msb */
198 shr_Xsig(&fix_up, 1);
199 fix_up.msw |= 0x80000000;
200 shr_Xsig(&fix_up, 64 + exponent);
201 }
202 else
203 shr_Xsig(&fix_up, 65 + exponent);
204
205 add_two_Xsig(&accum, &fix_up, &exponent);
206
207 /* accum now contains tan(pi/2 - arg).
208 Use tan(arg) = 1.0 / tan(pi/2 - arg)
209 */
210 accumulatoro.lsw = accumulatoro.midw = 0;
211 accumulatoro.msw = 0x80000000;
212 div_Xsig(&accumulatoro, &accum, &accum);
213 exponent = - exponent - 1;
214 }
215
216 /* Transfer the result */
217 round_Xsig(&accum);
218 FPU_settag0(TAG_Valid);
219 significand(st0_ptr) = XSIG_LL(accum);
220 setexponent16(st0_ptr, exponent + EXTENDED_Ebias); /* Result is positive. */
221
222}
diff --git a/arch/i386/math-emu/polynom_Xsig.S b/arch/i386/math-emu/polynom_Xsig.S
new file mode 100644
index 000000000000..17315c89ff3d
--- /dev/null
+++ b/arch/i386/math-emu/polynom_Xsig.S
@@ -0,0 +1,135 @@
1/*---------------------------------------------------------------------------+
2 | polynomial_Xsig.S |
3 | |
4 | Fixed point arithmetic polynomial evaluation. |
5 | |
6 | Copyright (C) 1992,1993,1994,1995 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
8 | Australia. E-mail billm@jacobi.maths.monash.edu.au |
9 | |
10 | Call from C as: |
11 | void polynomial_Xsig(Xsig *accum, unsigned long long x, |
12 | unsigned long long terms[], int n) |
13 | |
14 | Computes: |
15 | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x |
16 | and adds the result to the 12 byte Xsig. |
17 | The terms[] are each 8 bytes, but all computation is performed to 12 byte |
18 | precision. |
19 | |
20 | This function must be used carefully: most overflow of intermediate |
21 | results is controlled, but overflow of the result is not. |
22 | |
23 +---------------------------------------------------------------------------*/
24 .file "polynomial_Xsig.S"
25
26#include "fpu_emu.h"
27
28
29#define TERM_SIZE $8
30#define SUM_MS -20(%ebp) /* sum ms long */
31#define SUM_MIDDLE -24(%ebp) /* sum middle long */
32#define SUM_LS -28(%ebp) /* sum ls long */
33#define ACCUM_MS -4(%ebp) /* accum ms long */
34#define ACCUM_MIDDLE -8(%ebp) /* accum middle long */
35#define ACCUM_LS -12(%ebp) /* accum ls long */
36#define OVERFLOWED -16(%ebp) /* addition overflow flag */
37
38.text
39ENTRY(polynomial_Xsig)
40 pushl %ebp
41 movl %esp,%ebp
42 subl $32,%esp
43 pushl %esi
44 pushl %edi
45 pushl %ebx
46
47 movl PARAM2,%esi /* x */
48 movl PARAM3,%edi /* terms */
49
50 movl TERM_SIZE,%eax
51 mull PARAM4 /* n */
52 addl %eax,%edi
53
54 movl 4(%edi),%edx /* terms[n] */
55 movl %edx,SUM_MS
56 movl (%edi),%edx /* terms[n] */
57 movl %edx,SUM_MIDDLE
58 xor %eax,%eax
59 movl %eax,SUM_LS
60 movb %al,OVERFLOWED
61
62 subl TERM_SIZE,%edi
63 decl PARAM4
64 js L_accum_done
65
66L_accum_loop:
67 xor %eax,%eax
68 movl %eax,ACCUM_MS
69 movl %eax,ACCUM_MIDDLE
70
71 movl SUM_MIDDLE,%eax
72 mull (%esi) /* x ls long */
73 movl %edx,ACCUM_LS
74
75 movl SUM_MIDDLE,%eax
76 mull 4(%esi) /* x ms long */
77 addl %eax,ACCUM_LS
78 adcl %edx,ACCUM_MIDDLE
79 adcl $0,ACCUM_MS
80
81 movl SUM_MS,%eax
82 mull (%esi) /* x ls long */
83 addl %eax,ACCUM_LS
84 adcl %edx,ACCUM_MIDDLE
85 adcl $0,ACCUM_MS
86
87 movl SUM_MS,%eax
88 mull 4(%esi) /* x ms long */
89 addl %eax,ACCUM_MIDDLE
90 adcl %edx,ACCUM_MS
91
92 testb $0xff,OVERFLOWED
93 jz L_no_overflow
94
95 movl (%esi),%eax
96 addl %eax,ACCUM_MIDDLE
97 movl 4(%esi),%eax
98 adcl %eax,ACCUM_MS /* This could overflow too */
99
100L_no_overflow:
101
102/*
103 * Now put the sum of next term and the accumulator
104 * into the sum register
105 */
106 movl ACCUM_LS,%eax
107 addl (%edi),%eax /* term ls long */
108 movl %eax,SUM_LS
109 movl ACCUM_MIDDLE,%eax
110 adcl (%edi),%eax /* term ls long */
111 movl %eax,SUM_MIDDLE
112 movl ACCUM_MS,%eax
113 adcl 4(%edi),%eax /* term ms long */
114 movl %eax,SUM_MS
115 sbbb %al,%al
116 movb %al,OVERFLOWED /* Used in the next iteration */
117
118 subl TERM_SIZE,%edi
119 decl PARAM4
120 jns L_accum_loop
121
122L_accum_done:
123 movl PARAM1,%edi /* accum */
124 movl SUM_LS,%eax
125 addl %eax,(%edi)
126 movl SUM_MIDDLE,%eax
127 adcl %eax,4(%edi)
128 movl SUM_MS,%eax
129 adcl %eax,8(%edi)
130
131 popl %ebx
132 popl %edi
133 popl %esi
134 leave
135 ret
diff --git a/arch/i386/math-emu/reg_add_sub.c b/arch/i386/math-emu/reg_add_sub.c
new file mode 100644
index 000000000000..7cd3b37ac084
--- /dev/null
+++ b/arch/i386/math-emu/reg_add_sub.c
@@ -0,0 +1,374 @@
1/*---------------------------------------------------------------------------+
2 | reg_add_sub.c |
3 | |
4 | Functions to add or subtract two registers and put the result in a third. |
5 | |
6 | Copyright (C) 1992,1993,1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
8 | E-mail billm@suburbia.net |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13/*---------------------------------------------------------------------------+
14 | For each function, the destination may be any FPU_REG, including one of |
15 | the source FPU_REGs. |
16 | Each function returns 0 if the answer is o.k., otherwise a non-zero |
17 | value is returned, indicating either an exception condition or an |
18 | internal error. |
19 +---------------------------------------------------------------------------*/
20
21#include "exception.h"
22#include "reg_constant.h"
23#include "fpu_emu.h"
24#include "control_w.h"
25#include "fpu_system.h"
26
27static
28int add_sub_specials(FPU_REG const *a, u_char taga, u_char signa,
29 FPU_REG const *b, u_char tagb, u_char signb,
30 FPU_REG *dest, int deststnr, int control_w);
31
32/*
33 Operates on st(0) and st(n), or on st(0) and temporary data.
34 The destination must be one of the source st(x).
35 */
36int FPU_add(FPU_REG const *b, u_char tagb, int deststnr, int control_w)
37{
38 FPU_REG *a = &st(0);
39 FPU_REG *dest = &st(deststnr);
40 u_char signb = getsign(b);
41 u_char taga = FPU_gettag0();
42 u_char signa = getsign(a);
43 u_char saved_sign = getsign(dest);
44 int diff, tag, expa, expb;
45
46 if ( !(taga | tagb) )
47 {
48 expa = exponent(a);
49 expb = exponent(b);
50
51 valid_add:
52 /* Both registers are valid */
53 if (!(signa ^ signb))
54 {
55 /* signs are the same */
56 tag = FPU_u_add(a, b, dest, control_w, signa, expa, expb);
57 }
58 else
59 {
60 /* The signs are different, so do a subtraction */
61 diff = expa - expb;
62 if (!diff)
63 {
64 diff = a->sigh - b->sigh; /* This works only if the ms bits
65 are identical. */
66 if (!diff)
67 {
68 diff = a->sigl > b->sigl;
69 if (!diff)
70 diff = -(a->sigl < b->sigl);
71 }
72 }
73
74 if (diff > 0)
75 {
76 tag = FPU_u_sub(a, b, dest, control_w, signa, expa, expb);
77 }
78 else if ( diff < 0 )
79 {
80 tag = FPU_u_sub(b, a, dest, control_w, signb, expb, expa);
81 }
82 else
83 {
84 FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
85 /* sign depends upon rounding mode */
86 setsign(dest, ((control_w & CW_RC) != RC_DOWN)
87 ? SIGN_POS : SIGN_NEG);
88 return TAG_Zero;
89 }
90 }
91
92 if ( tag < 0 )
93 {
94 setsign(dest, saved_sign);
95 return tag;
96 }
97 FPU_settagi(deststnr, tag);
98 return tag;
99 }
100
101 if ( taga == TAG_Special )
102 taga = FPU_Special(a);
103 if ( tagb == TAG_Special )
104 tagb = FPU_Special(b);
105
106 if ( ((taga == TAG_Valid) && (tagb == TW_Denormal))
107 || ((taga == TW_Denormal) && (tagb == TAG_Valid))
108 || ((taga == TW_Denormal) && (tagb == TW_Denormal)) )
109 {
110 FPU_REG x, y;
111
112 if ( denormal_operand() < 0 )
113 return FPU_Exception;
114
115 FPU_to_exp16(a, &x);
116 FPU_to_exp16(b, &y);
117 a = &x;
118 b = &y;
119 expa = exponent16(a);
120 expb = exponent16(b);
121 goto valid_add;
122 }
123
124 if ( (taga == TW_NaN) || (tagb == TW_NaN) )
125 {
126 if ( deststnr == 0 )
127 return real_2op_NaN(b, tagb, deststnr, a);
128 else
129 return real_2op_NaN(a, taga, deststnr, a);
130 }
131
132 return add_sub_specials(a, taga, signa, b, tagb, signb,
133 dest, deststnr, control_w);
134}
135
136
137/* Subtract b from a. (a-b) -> dest */
138int FPU_sub(int flags, int rm, int control_w)
139{
140 FPU_REG const *a, *b;
141 FPU_REG *dest;
142 u_char taga, tagb, signa, signb, saved_sign, sign;
143 int diff, tag = 0, expa, expb, deststnr;
144
145 a = &st(0);
146 taga = FPU_gettag0();
147
148 deststnr = 0;
149 if ( flags & LOADED )
150 {
151 b = (FPU_REG *)rm;
152 tagb = flags & 0x0f;
153 }
154 else
155 {
156 b = &st(rm);
157 tagb = FPU_gettagi(rm);
158
159 if ( flags & DEST_RM )
160 deststnr = rm;
161 }
162
163 signa = getsign(a);
164 signb = getsign(b);
165
166 if ( flags & REV )
167 {
168 signa ^= SIGN_NEG;
169 signb ^= SIGN_NEG;
170 }
171
172 dest = &st(deststnr);
173 saved_sign = getsign(dest);
174
175 if ( !(taga | tagb) )
176 {
177 expa = exponent(a);
178 expb = exponent(b);
179
180 valid_subtract:
181 /* Both registers are valid */
182
183 diff = expa - expb;
184
185 if (!diff)
186 {
187 diff = a->sigh - b->sigh; /* Works only if ms bits are identical */
188 if (!diff)
189 {
190 diff = a->sigl > b->sigl;
191 if (!diff)
192 diff = -(a->sigl < b->sigl);
193 }
194 }
195
196 switch ( (((int)signa)*2 + signb) / SIGN_NEG )
197 {
198 case 0: /* P - P */
199 case 3: /* N - N */
200 if (diff > 0)
201 {
202 /* |a| > |b| */
203 tag = FPU_u_sub(a, b, dest, control_w, signa, expa, expb);
204 }
205 else if ( diff == 0 )
206 {
207 FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
208
209 /* sign depends upon rounding mode */
210 setsign(dest, ((control_w & CW_RC) != RC_DOWN)
211 ? SIGN_POS : SIGN_NEG);
212 return TAG_Zero;
213 }
214 else
215 {
216 sign = signa ^ SIGN_NEG;
217 tag = FPU_u_sub(b, a, dest, control_w, sign, expb, expa);
218 }
219 break;
220 case 1: /* P - N */
221 tag = FPU_u_add(a, b, dest, control_w, SIGN_POS, expa, expb);
222 break;
223 case 2: /* N - P */
224 tag = FPU_u_add(a, b, dest, control_w, SIGN_NEG, expa, expb);
225 break;
226#ifdef PARANOID
227 default:
228 EXCEPTION(EX_INTERNAL|0x111);
229 return -1;
230#endif
231 }
232 if ( tag < 0 )
233 {
234 setsign(dest, saved_sign);
235 return tag;
236 }
237 FPU_settagi(deststnr, tag);
238 return tag;
239 }
240
241 if ( taga == TAG_Special )
242 taga = FPU_Special(a);
243 if ( tagb == TAG_Special )
244 tagb = FPU_Special(b);
245
246 if ( ((taga == TAG_Valid) && (tagb == TW_Denormal))
247 || ((taga == TW_Denormal) && (tagb == TAG_Valid))
248 || ((taga == TW_Denormal) && (tagb == TW_Denormal)) )
249 {
250 FPU_REG x, y;
251
252 if ( denormal_operand() < 0 )
253 return FPU_Exception;
254
255 FPU_to_exp16(a, &x);
256 FPU_to_exp16(b, &y);
257 a = &x;
258 b = &y;
259 expa = exponent16(a);
260 expb = exponent16(b);
261
262 goto valid_subtract;
263 }
264
265 if ( (taga == TW_NaN) || (tagb == TW_NaN) )
266 {
267 FPU_REG const *d1, *d2;
268 if ( flags & REV )
269 {
270 d1 = b;
271 d2 = a;
272 }
273 else
274 {
275 d1 = a;
276 d2 = b;
277 }
278 if ( flags & LOADED )
279 return real_2op_NaN(b, tagb, deststnr, d1);
280 if ( flags & DEST_RM )
281 return real_2op_NaN(a, taga, deststnr, d2);
282 else
283 return real_2op_NaN(b, tagb, deststnr, d2);
284 }
285
286 return add_sub_specials(a, taga, signa, b, tagb, signb ^ SIGN_NEG,
287 dest, deststnr, control_w);
288}
289
290
291static
292int add_sub_specials(FPU_REG const *a, u_char taga, u_char signa,
293 FPU_REG const *b, u_char tagb, u_char signb,
294 FPU_REG *dest, int deststnr, int control_w)
295{
296 if ( ((taga == TW_Denormal) || (tagb == TW_Denormal))
297 && (denormal_operand() < 0) )
298 return FPU_Exception;
299
300 if (taga == TAG_Zero)
301 {
302 if (tagb == TAG_Zero)
303 {
304 /* Both are zero, result will be zero. */
305 u_char different_signs = signa ^ signb;
306
307 FPU_copy_to_regi(a, TAG_Zero, deststnr);
308 if ( different_signs )
309 {
310 /* Signs are different. */
311 /* Sign of answer depends upon rounding mode. */
312 setsign(dest, ((control_w & CW_RC) != RC_DOWN)
313 ? SIGN_POS : SIGN_NEG);
314 }
315 else
316 setsign(dest, signa); /* signa may differ from the sign of a. */
317 return TAG_Zero;
318 }
319 else
320 {
321 reg_copy(b, dest);
322 if ( (tagb == TW_Denormal) && (b->sigh & 0x80000000) )
323 {
324 /* A pseudoDenormal, convert it. */
325 addexponent(dest, 1);
326 tagb = TAG_Valid;
327 }
328 else if ( tagb > TAG_Empty )
329 tagb = TAG_Special;
330 setsign(dest, signb); /* signb may differ from the sign of b. */
331 FPU_settagi(deststnr, tagb);
332 return tagb;
333 }
334 }
335 else if (tagb == TAG_Zero)
336 {
337 reg_copy(a, dest);
338 if ( (taga == TW_Denormal) && (a->sigh & 0x80000000) )
339 {
340 /* A pseudoDenormal */
341 addexponent(dest, 1);
342 taga = TAG_Valid;
343 }
344 else if ( taga > TAG_Empty )
345 taga = TAG_Special;
346 setsign(dest, signa); /* signa may differ from the sign of a. */
347 FPU_settagi(deststnr, taga);
348 return taga;
349 }
350 else if (taga == TW_Infinity)
351 {
352 if ( (tagb != TW_Infinity) || (signa == signb) )
353 {
354 FPU_copy_to_regi(a, TAG_Special, deststnr);
355 setsign(dest, signa); /* signa may differ from the sign of a. */
356 return taga;
357 }
358 /* Infinity-Infinity is undefined. */
359 return arith_invalid(deststnr);
360 }
361 else if (tagb == TW_Infinity)
362 {
363 FPU_copy_to_regi(b, TAG_Special, deststnr);
364 setsign(dest, signb); /* signb may differ from the sign of b. */
365 return tagb;
366 }
367
368#ifdef PARANOID
369 EXCEPTION(EX_INTERNAL|0x101);
370#endif
371
372 return FPU_Exception;
373}
374
diff --git a/arch/i386/math-emu/reg_compare.c b/arch/i386/math-emu/reg_compare.c
new file mode 100644
index 000000000000..f37c5b5a35ad
--- /dev/null
+++ b/arch/i386/math-emu/reg_compare.c
@@ -0,0 +1,381 @@
1/*---------------------------------------------------------------------------+
2 | reg_compare.c |
3 | |
4 | Compare two floating point registers |
5 | |
6 | Copyright (C) 1992,1993,1994,1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
8 | E-mail billm@suburbia.net |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13/*---------------------------------------------------------------------------+
14 | compare() is the core FPU_REG comparison function |
15 +---------------------------------------------------------------------------*/
16
17#include "fpu_system.h"
18#include "exception.h"
19#include "fpu_emu.h"
20#include "control_w.h"
21#include "status_w.h"
22
23
24static int compare(FPU_REG const *b, int tagb)
25{
26 int diff, exp0, expb;
27 u_char st0_tag;
28 FPU_REG *st0_ptr;
29 FPU_REG x, y;
30 u_char st0_sign, signb = getsign(b);
31
32 st0_ptr = &st(0);
33 st0_tag = FPU_gettag0();
34 st0_sign = getsign(st0_ptr);
35
36 if ( tagb == TAG_Special )
37 tagb = FPU_Special(b);
38 if ( st0_tag == TAG_Special )
39 st0_tag = FPU_Special(st0_ptr);
40
41 if ( ((st0_tag != TAG_Valid) && (st0_tag != TW_Denormal))
42 || ((tagb != TAG_Valid) && (tagb != TW_Denormal)) )
43 {
44 if ( st0_tag == TAG_Zero )
45 {
46 if ( tagb == TAG_Zero ) return COMP_A_eq_B;
47 if ( tagb == TAG_Valid )
48 return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B);
49 if ( tagb == TW_Denormal )
50 return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
51 | COMP_Denormal;
52 }
53 else if ( tagb == TAG_Zero )
54 {
55 if ( st0_tag == TAG_Valid )
56 return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
57 if ( st0_tag == TW_Denormal )
58 return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
59 | COMP_Denormal;
60 }
61
62 if ( st0_tag == TW_Infinity )
63 {
64 if ( (tagb == TAG_Valid) || (tagb == TAG_Zero) )
65 return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
66 else if ( tagb == TW_Denormal )
67 return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
68 | COMP_Denormal;
69 else if ( tagb == TW_Infinity )
70 {
71 /* The 80486 book says that infinities can be equal! */
72 return (st0_sign == signb) ? COMP_A_eq_B :
73 ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
74 }
75 /* Fall through to the NaN code */
76 }
77 else if ( tagb == TW_Infinity )
78 {
79 if ( (st0_tag == TAG_Valid) || (st0_tag == TAG_Zero) )
80 return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B);
81 if ( st0_tag == TW_Denormal )
82 return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
83 | COMP_Denormal;
84 /* Fall through to the NaN code */
85 }
86
87 /* The only possibility now should be that one of the arguments
88 is a NaN */
89 if ( (st0_tag == TW_NaN) || (tagb == TW_NaN) )
90 {
91 int signalling = 0, unsupported = 0;
92 if ( st0_tag == TW_NaN )
93 {
94 signalling = (st0_ptr->sigh & 0xc0000000) == 0x80000000;
95 unsupported = !((exponent(st0_ptr) == EXP_OVER)
96 && (st0_ptr->sigh & 0x80000000));
97 }
98 if ( tagb == TW_NaN )
99 {
100 signalling |= (b->sigh & 0xc0000000) == 0x80000000;
101 unsupported |= !((exponent(b) == EXP_OVER)
102 && (b->sigh & 0x80000000));
103 }
104 if ( signalling || unsupported )
105 return COMP_No_Comp | COMP_SNaN | COMP_NaN;
106 else
107 /* Neither is a signaling NaN */
108 return COMP_No_Comp | COMP_NaN;
109 }
110
111 EXCEPTION(EX_Invalid);
112 }
113
114 if (st0_sign != signb)
115 {
116 return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
117 | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
118 COMP_Denormal : 0);
119 }
120
121 if ( (st0_tag == TW_Denormal) || (tagb == TW_Denormal) )
122 {
123 FPU_to_exp16(st0_ptr, &x);
124 FPU_to_exp16(b, &y);
125 st0_ptr = &x;
126 b = &y;
127 exp0 = exponent16(st0_ptr);
128 expb = exponent16(b);
129 }
130 else
131 {
132 exp0 = exponent(st0_ptr);
133 expb = exponent(b);
134 }
135
136#ifdef PARANOID
137 if (!(st0_ptr->sigh & 0x80000000)) EXCEPTION(EX_Invalid);
138 if (!(b->sigh & 0x80000000)) EXCEPTION(EX_Invalid);
139#endif /* PARANOID */
140
141 diff = exp0 - expb;
142 if ( diff == 0 )
143 {
144 diff = st0_ptr->sigh - b->sigh; /* Works only if ms bits are
145 identical */
146 if ( diff == 0 )
147 {
148 diff = st0_ptr->sigl > b->sigl;
149 if ( diff == 0 )
150 diff = -(st0_ptr->sigl < b->sigl);
151 }
152 }
153
154 if ( diff > 0 )
155 {
156 return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
157 | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
158 COMP_Denormal : 0);
159 }
160 if ( diff < 0 )
161 {
162 return ((st0_sign == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
163 | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
164 COMP_Denormal : 0);
165 }
166
167 return COMP_A_eq_B
168 | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
169 COMP_Denormal : 0);
170
171}
172
173
174/* This function requires that st(0) is not empty */
175int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag)
176{
177 int f = 0, c;
178
179 c = compare(loaded_data, loaded_tag);
180
181 if (c & COMP_NaN)
182 {
183 EXCEPTION(EX_Invalid);
184 f = SW_C3 | SW_C2 | SW_C0;
185 }
186 else
187 switch (c & 7)
188 {
189 case COMP_A_lt_B:
190 f = SW_C0;
191 break;
192 case COMP_A_eq_B:
193 f = SW_C3;
194 break;
195 case COMP_A_gt_B:
196 f = 0;
197 break;
198 case COMP_No_Comp:
199 f = SW_C3 | SW_C2 | SW_C0;
200 break;
201#ifdef PARANOID
202 default:
203 EXCEPTION(EX_INTERNAL|0x121);
204 f = SW_C3 | SW_C2 | SW_C0;
205 break;
206#endif /* PARANOID */
207 }
208 setcc(f);
209 if (c & COMP_Denormal)
210 {
211 return denormal_operand() < 0;
212 }
213 return 0;
214}
215
216
217static int compare_st_st(int nr)
218{
219 int f = 0, c;
220 FPU_REG *st_ptr;
221
222 if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) )
223 {
224 setcc(SW_C3 | SW_C2 | SW_C0);
225 /* Stack fault */
226 EXCEPTION(EX_StackUnder);
227 return !(control_word & CW_Invalid);
228 }
229
230 st_ptr = &st(nr);
231 c = compare(st_ptr, FPU_gettagi(nr));
232 if (c & COMP_NaN)
233 {
234 setcc(SW_C3 | SW_C2 | SW_C0);
235 EXCEPTION(EX_Invalid);
236 return !(control_word & CW_Invalid);
237 }
238 else
239 switch (c & 7)
240 {
241 case COMP_A_lt_B:
242 f = SW_C0;
243 break;
244 case COMP_A_eq_B:
245 f = SW_C3;
246 break;
247 case COMP_A_gt_B:
248 f = 0;
249 break;
250 case COMP_No_Comp:
251 f = SW_C3 | SW_C2 | SW_C0;
252 break;
253#ifdef PARANOID
254 default:
255 EXCEPTION(EX_INTERNAL|0x122);
256 f = SW_C3 | SW_C2 | SW_C0;
257 break;
258#endif /* PARANOID */
259 }
260 setcc(f);
261 if (c & COMP_Denormal)
262 {
263 return denormal_operand() < 0;
264 }
265 return 0;
266}
267
268
269static int compare_u_st_st(int nr)
270{
271 int f = 0, c;
272 FPU_REG *st_ptr;
273
274 if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) )
275 {
276 setcc(SW_C3 | SW_C2 | SW_C0);
277 /* Stack fault */
278 EXCEPTION(EX_StackUnder);
279 return !(control_word & CW_Invalid);
280 }
281
282 st_ptr = &st(nr);
283 c = compare(st_ptr, FPU_gettagi(nr));
284 if (c & COMP_NaN)
285 {
286 setcc(SW_C3 | SW_C2 | SW_C0);
287 if (c & COMP_SNaN) /* This is the only difference between
288 un-ordered and ordinary comparisons */
289 {
290 EXCEPTION(EX_Invalid);
291 return !(control_word & CW_Invalid);
292 }
293 return 0;
294 }
295 else
296 switch (c & 7)
297 {
298 case COMP_A_lt_B:
299 f = SW_C0;
300 break;
301 case COMP_A_eq_B:
302 f = SW_C3;
303 break;
304 case COMP_A_gt_B:
305 f = 0;
306 break;
307 case COMP_No_Comp:
308 f = SW_C3 | SW_C2 | SW_C0;
309 break;
310#ifdef PARANOID
311 default:
312 EXCEPTION(EX_INTERNAL|0x123);
313 f = SW_C3 | SW_C2 | SW_C0;
314 break;
315#endif /* PARANOID */
316 }
317 setcc(f);
318 if (c & COMP_Denormal)
319 {
320 return denormal_operand() < 0;
321 }
322 return 0;
323}
324
325/*---------------------------------------------------------------------------*/
326
327void fcom_st(void)
328{
329 /* fcom st(i) */
330 compare_st_st(FPU_rm);
331}
332
333
334void fcompst(void)
335{
336 /* fcomp st(i) */
337 if ( !compare_st_st(FPU_rm) )
338 FPU_pop();
339}
340
341
342void fcompp(void)
343{
344 /* fcompp */
345 if (FPU_rm != 1)
346 {
347 FPU_illegal();
348 return;
349 }
350 if ( !compare_st_st(1) )
351 poppop();
352}
353
354
355void fucom_(void)
356{
357 /* fucom st(i) */
358 compare_u_st_st(FPU_rm);
359
360}
361
362
363void fucomp(void)
364{
365 /* fucomp st(i) */
366 if ( !compare_u_st_st(FPU_rm) )
367 FPU_pop();
368}
369
370
371void fucompp(void)
372{
373 /* fucompp */
374 if (FPU_rm == 1)
375 {
376 if ( !compare_u_st_st(1) )
377 poppop();
378 }
379 else
380 FPU_illegal();
381}
diff --git a/arch/i386/math-emu/reg_constant.c b/arch/i386/math-emu/reg_constant.c
new file mode 100644
index 000000000000..a85015801969
--- /dev/null
+++ b/arch/i386/math-emu/reg_constant.c
@@ -0,0 +1,120 @@
1/*---------------------------------------------------------------------------+
2 | reg_constant.c |
3 | |
4 | All of the constant FPU_REGs |
5 | |
6 | Copyright (C) 1992,1993,1994,1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
8 | Australia. E-mail billm@suburbia.net |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13#include "fpu_system.h"
14#include "fpu_emu.h"
15#include "status_w.h"
16#include "reg_constant.h"
17#include "control_w.h"
18
19
20#define MAKE_REG(s,e,l,h) { l, h, \
21 ((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) }
22
23FPU_REG const CONST_1 = MAKE_REG(POS, 0, 0x00000000, 0x80000000);
24#if 0
25FPU_REG const CONST_2 = MAKE_REG(POS, 1, 0x00000000, 0x80000000);
26FPU_REG const CONST_HALF = MAKE_REG(POS, -1, 0x00000000, 0x80000000);
27#endif /* 0 */
28static FPU_REG const CONST_L2T = MAKE_REG(POS, 1, 0xcd1b8afe, 0xd49a784b);
29static FPU_REG const CONST_L2E = MAKE_REG(POS, 0, 0x5c17f0bc, 0xb8aa3b29);
30FPU_REG const CONST_PI = MAKE_REG(POS, 1, 0x2168c235, 0xc90fdaa2);
31FPU_REG const CONST_PI2 = MAKE_REG(POS, 0, 0x2168c235, 0xc90fdaa2);
32FPU_REG const CONST_PI4 = MAKE_REG(POS, -1, 0x2168c235, 0xc90fdaa2);
33static FPU_REG const CONST_LG2 = MAKE_REG(POS, -2, 0xfbcff799, 0x9a209a84);
34static FPU_REG const CONST_LN2 = MAKE_REG(POS, -1, 0xd1cf79ac, 0xb17217f7);
35
36/* Extra bits to take pi/2 to more than 128 bits precision. */
37FPU_REG const CONST_PI2extra = MAKE_REG(NEG, -66,
38 0xfc8f8cbb, 0xece675d1);
39
40/* Only the sign (and tag) is used in internal zeroes */
41FPU_REG const CONST_Z = MAKE_REG(POS, EXP_UNDER, 0x0, 0x0);
42
43/* Only the sign and significand (and tag) are used in internal NaNs */
44/* The 80486 never generates one of these
45FPU_REG const CONST_SNAN = MAKE_REG(POS, EXP_OVER, 0x00000001, 0x80000000);
46 */
47/* This is the real indefinite QNaN */
48FPU_REG const CONST_QNaN = MAKE_REG(NEG, EXP_OVER, 0x00000000, 0xC0000000);
49
50/* Only the sign (and tag) is used in internal infinities */
51FPU_REG const CONST_INF = MAKE_REG(POS, EXP_OVER, 0x00000000, 0x80000000);
52
53
54static void fld_const(FPU_REG const *c, int adj, u_char tag)
55{
56 FPU_REG *st_new_ptr;
57
58 if ( STACK_OVERFLOW )
59 {
60 FPU_stack_overflow();
61 return;
62 }
63 push();
64 reg_copy(c, st_new_ptr);
65 st_new_ptr->sigl += adj; /* For all our fldxxx constants, we don't need to
66 borrow or carry. */
67 FPU_settag0(tag);
68 clear_C1();
69}
70
71/* A fast way to find out whether x is one of RC_DOWN or RC_CHOP
72 (and not one of RC_RND or RC_UP).
73 */
74#define DOWN_OR_CHOP(x) (x & RC_DOWN)
75
76static void fld1(int rc)
77{
78 fld_const(&CONST_1, 0, TAG_Valid);
79}
80
81static void fldl2t(int rc)
82{
83 fld_const(&CONST_L2T, (rc == RC_UP) ? 1 : 0, TAG_Valid);
84}
85
86static void fldl2e(int rc)
87{
88 fld_const(&CONST_L2E, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
89}
90
91static void fldpi(int rc)
92{
93 fld_const(&CONST_PI, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
94}
95
96static void fldlg2(int rc)
97{
98 fld_const(&CONST_LG2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
99}
100
101static void fldln2(int rc)
102{
103 fld_const(&CONST_LN2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
104}
105
106static void fldz(int rc)
107{
108 fld_const(&CONST_Z, 0, TAG_Zero);
109}
110
111typedef void (*FUNC_RC)(int);
112
113static FUNC_RC constants_table[] = {
114 fld1, fldl2t, fldl2e, fldpi, fldlg2, fldln2, fldz, (FUNC_RC)FPU_illegal
115};
116
117void fconst(void)
118{
119 (constants_table[FPU_rm])(control_word & CW_RC);
120}
diff --git a/arch/i386/math-emu/reg_constant.h b/arch/i386/math-emu/reg_constant.h
new file mode 100644
index 000000000000..1bffaec3a134
--- /dev/null
+++ b/arch/i386/math-emu/reg_constant.h
@@ -0,0 +1,25 @@
1/*---------------------------------------------------------------------------+
2 | reg_constant.h |
3 | |
4 | Copyright (C) 1992 W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
5 | Australia. E-mail billm@vaxc.cc.monash.edu.au |
6 | |
7 +---------------------------------------------------------------------------*/
8
9#ifndef _REG_CONSTANT_H_
10#define _REG_CONSTANT_H_
11
12#include "fpu_emu.h"
13
14extern FPU_REG const CONST_1;
15extern FPU_REG const CONST_PI;
16extern FPU_REG const CONST_PI2;
17extern FPU_REG const CONST_PI2extra;
18extern FPU_REG const CONST_PI4;
19extern FPU_REG const CONST_Z;
20extern FPU_REG const CONST_PINF;
21extern FPU_REG const CONST_INF;
22extern FPU_REG const CONST_MINF;
23extern FPU_REG const CONST_QNaN;
24
25#endif /* _REG_CONSTANT_H_ */
diff --git a/arch/i386/math-emu/reg_convert.c b/arch/i386/math-emu/reg_convert.c
new file mode 100644
index 000000000000..45a258752703
--- /dev/null
+++ b/arch/i386/math-emu/reg_convert.c
@@ -0,0 +1,53 @@
1/*---------------------------------------------------------------------------+
2 | reg_convert.c |
3 | |
4 | Convert register representation. |
5 | |
6 | Copyright (C) 1992,1993,1994,1996,1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
8 | E-mail billm@suburbia.net |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13#include "exception.h"
14#include "fpu_emu.h"
15
16
17int FPU_to_exp16(FPU_REG const *a, FPU_REG *x)
18{
19 int sign = getsign(a);
20
21 *(long long *)&(x->sigl) = *(const long long *)&(a->sigl);
22
23 /* Set up the exponent as a 16 bit quantity. */
24 setexponent16(x, exponent(a));
25
26 if ( exponent16(x) == EXP_UNDER )
27 {
28 /* The number is a de-normal or pseudodenormal. */
29 /* We only deal with the significand and exponent. */
30
31 if (x->sigh & 0x80000000)
32 {
33 /* Is a pseudodenormal. */
34 /* This is non-80486 behaviour because the number
35 loses its 'denormal' identity. */
36 addexponent(x, 1);
37 }
38 else
39 {
40 /* Is a denormal. */
41 addexponent(x, 1);
42 FPU_normalize_nuo(x);
43 }
44 }
45
46 if ( !(x->sigh & 0x80000000) )
47 {
48 EXCEPTION(EX_INTERNAL | 0x180);
49 }
50
51 return sign;
52}
53
diff --git a/arch/i386/math-emu/reg_divide.c b/arch/i386/math-emu/reg_divide.c
new file mode 100644
index 000000000000..5cee7ff920d9
--- /dev/null
+++ b/arch/i386/math-emu/reg_divide.c
@@ -0,0 +1,207 @@
1/*---------------------------------------------------------------------------+
2 | reg_divide.c |
3 | |
4 | Divide one FPU_REG by another and put the result in a destination FPU_REG.|
5 | |
6 | Copyright (C) 1996 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
8 | E-mail billm@jacobi.maths.monash.edu.au |
9 | |
10 | Return value is the tag of the answer, or-ed with FPU_Exception if |
11 | one was raised, or -1 on internal error. |
12 | |
13 +---------------------------------------------------------------------------*/
14
15/*---------------------------------------------------------------------------+
16 | The destination may be any FPU_REG, including one of the source FPU_REGs. |
17 +---------------------------------------------------------------------------*/
18
19#include "exception.h"
20#include "reg_constant.h"
21#include "fpu_emu.h"
22#include "fpu_system.h"
23
24/*
25 Divide one register by another and put the result into a third register.
26 */
27int FPU_div(int flags, int rm, int control_w)
28{
29 FPU_REG x, y;
30 FPU_REG const *a, *b, *st0_ptr, *st_ptr;
31 FPU_REG *dest;
32 u_char taga, tagb, signa, signb, sign, saved_sign;
33 int tag, deststnr;
34
35 if ( flags & DEST_RM )
36 deststnr = rm;
37 else
38 deststnr = 0;
39
40 if ( flags & REV )
41 {
42 b = &st(0);
43 st0_ptr = b;
44 tagb = FPU_gettag0();
45 if ( flags & LOADED )
46 {
47 a = (FPU_REG *)rm;
48 taga = flags & 0x0f;
49 }
50 else
51 {
52 a = &st(rm);
53 st_ptr = a;
54 taga = FPU_gettagi(rm);
55 }
56 }
57 else
58 {
59 a = &st(0);
60 st0_ptr = a;
61 taga = FPU_gettag0();
62 if ( flags & LOADED )
63 {
64 b = (FPU_REG *)rm;
65 tagb = flags & 0x0f;
66 }
67 else
68 {
69 b = &st(rm);
70 st_ptr = b;
71 tagb = FPU_gettagi(rm);
72 }
73 }
74
75 signa = getsign(a);
76 signb = getsign(b);
77
78 sign = signa ^ signb;
79
80 dest = &st(deststnr);
81 saved_sign = getsign(dest);
82
83 if ( !(taga | tagb) )
84 {
85 /* Both regs Valid, this should be the most common case. */
86 reg_copy(a, &x);
87 reg_copy(b, &y);
88 setpositive(&x);
89 setpositive(&y);
90 tag = FPU_u_div(&x, &y, dest, control_w, sign);
91
92 if ( tag < 0 )
93 return tag;
94
95 FPU_settagi(deststnr, tag);
96 return tag;
97 }
98
99 if ( taga == TAG_Special )
100 taga = FPU_Special(a);
101 if ( tagb == TAG_Special )
102 tagb = FPU_Special(b);
103
104 if ( ((taga == TAG_Valid) && (tagb == TW_Denormal))
105 || ((taga == TW_Denormal) && (tagb == TAG_Valid))
106 || ((taga == TW_Denormal) && (tagb == TW_Denormal)) )
107 {
108 if ( denormal_operand() < 0 )
109 return FPU_Exception;
110
111 FPU_to_exp16(a, &x);
112 FPU_to_exp16(b, &y);
113 tag = FPU_u_div(&x, &y, dest, control_w, sign);
114 if ( tag < 0 )
115 return tag;
116
117 FPU_settagi(deststnr, tag);
118 return tag;
119 }
120 else if ( (taga <= TW_Denormal) && (tagb <= TW_Denormal) )
121 {
122 if ( tagb != TAG_Zero )
123 {
124 /* Want to find Zero/Valid */
125 if ( tagb == TW_Denormal )
126 {
127 if ( denormal_operand() < 0 )
128 return FPU_Exception;
129 }
130
131 /* The result is zero. */
132 FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
133 setsign(dest, sign);
134 return TAG_Zero;
135 }
136 /* We have an exception condition, either 0/0 or Valid/Zero. */
137 if ( taga == TAG_Zero )
138 {
139 /* 0/0 */
140 return arith_invalid(deststnr);
141 }
142 /* Valid/Zero */
143 return FPU_divide_by_zero(deststnr, sign);
144 }
145 /* Must have infinities, NaNs, etc */
146 else if ( (taga == TW_NaN) || (tagb == TW_NaN) )
147 {
148 if ( flags & LOADED )
149 return real_2op_NaN((FPU_REG *)rm, flags & 0x0f, 0, st0_ptr);
150
151 if ( flags & DEST_RM )
152 {
153 int tag;
154 tag = FPU_gettag0();
155 if ( tag == TAG_Special )
156 tag = FPU_Special(st0_ptr);
157 return real_2op_NaN(st0_ptr, tag, rm, (flags & REV) ? st0_ptr : &st(rm));
158 }
159 else
160 {
161 int tag;
162 tag = FPU_gettagi(rm);
163 if ( tag == TAG_Special )
164 tag = FPU_Special(&st(rm));
165 return real_2op_NaN(&st(rm), tag, 0, (flags & REV) ? st0_ptr : &st(rm));
166 }
167 }
168 else if (taga == TW_Infinity)
169 {
170 if (tagb == TW_Infinity)
171 {
172 /* infinity/infinity */
173 return arith_invalid(deststnr);
174 }
175 else
176 {
177 /* tagb must be Valid or Zero */
178 if ( (tagb == TW_Denormal) && (denormal_operand() < 0) )
179 return FPU_Exception;
180
181 /* Infinity divided by Zero or Valid does
182 not raise and exception, but returns Infinity */
183 FPU_copy_to_regi(a, TAG_Special, deststnr);
184 setsign(dest, sign);
185 return taga;
186 }
187 }
188 else if (tagb == TW_Infinity)
189 {
190 if ( (taga == TW_Denormal) && (denormal_operand() < 0) )
191 return FPU_Exception;
192
193 /* The result is zero. */
194 FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
195 setsign(dest, sign);
196 return TAG_Zero;
197 }
198#ifdef PARANOID
199 else
200 {
201 EXCEPTION(EX_INTERNAL|0x102);
202 return FPU_Exception;
203 }
204#endif /* PARANOID */
205
206 return 0;
207}
diff --git a/arch/i386/math-emu/reg_ld_str.c b/arch/i386/math-emu/reg_ld_str.c
new file mode 100644
index 000000000000..f06ed41d191d
--- /dev/null
+++ b/arch/i386/math-emu/reg_ld_str.c
@@ -0,0 +1,1370 @@
1/*---------------------------------------------------------------------------+
2 | reg_ld_str.c |
3 | |
4 | All of the functions which transfer data between user memory and FPU_REGs.|
5 | |
6 | Copyright (C) 1992,1993,1994,1996,1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
8 | E-mail billm@suburbia.net |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13/*---------------------------------------------------------------------------+
14 | Note: |
15 | The file contains code which accesses user memory. |
16 | Emulator static data may change when user memory is accessed, due to |
17 | other processes using the emulator while swapping is in progress. |
18 +---------------------------------------------------------------------------*/
19
20#include "fpu_emu.h"
21
22#include <asm/uaccess.h>
23
24#include "fpu_system.h"
25#include "exception.h"
26#include "reg_constant.h"
27#include "control_w.h"
28#include "status_w.h"
29
30
31#define DOUBLE_Emax 1023 /* largest valid exponent */
32#define DOUBLE_Ebias 1023
33#define DOUBLE_Emin (-1022) /* smallest valid exponent */
34
35#define SINGLE_Emax 127 /* largest valid exponent */
36#define SINGLE_Ebias 127
37#define SINGLE_Emin (-126) /* smallest valid exponent */
38
39
40static u_char normalize_no_excep(FPU_REG *r, int exp, int sign)
41{
42 u_char tag;
43
44 setexponent16(r, exp);
45
46 tag = FPU_normalize_nuo(r);
47 stdexp(r);
48 if ( sign )
49 setnegative(r);
50
51 return tag;
52}
53
54
55int FPU_tagof(FPU_REG *ptr)
56{
57 int exp;
58
59 exp = exponent16(ptr) & 0x7fff;
60 if ( exp == 0 )
61 {
62 if ( !(ptr->sigh | ptr->sigl) )
63 {
64 return TAG_Zero;
65 }
66 /* The number is a de-normal or pseudodenormal. */
67 return TAG_Special;
68 }
69
70 if ( exp == 0x7fff )
71 {
72 /* Is an Infinity, a NaN, or an unsupported data type. */
73 return TAG_Special;
74 }
75
76 if ( !(ptr->sigh & 0x80000000) )
77 {
78 /* Unsupported data type. */
79 /* Valid numbers have the ms bit set to 1. */
80 /* Unnormal. */
81 return TAG_Special;
82 }
83
84 return TAG_Valid;
85}
86
87
88/* Get a long double from user memory */
89int FPU_load_extended(long double __user *s, int stnr)
90{
91 FPU_REG *sti_ptr = &st(stnr);
92
93 RE_ENTRANT_CHECK_OFF;
94 FPU_access_ok(VERIFY_READ, s, 10);
95 __copy_from_user(sti_ptr, s, 10);
96 RE_ENTRANT_CHECK_ON;
97
98 return FPU_tagof(sti_ptr);
99}
100
101
102/* Get a double from user memory */
103int FPU_load_double(double __user *dfloat, FPU_REG *loaded_data)
104{
105 int exp, tag, negative;
106 unsigned m64, l64;
107
108 RE_ENTRANT_CHECK_OFF;
109 FPU_access_ok(VERIFY_READ, dfloat, 8);
110 FPU_get_user(m64, 1 + (unsigned long __user *) dfloat);
111 FPU_get_user(l64, (unsigned long __user *) dfloat);
112 RE_ENTRANT_CHECK_ON;
113
114 negative = (m64 & 0x80000000) ? SIGN_Negative : SIGN_Positive;
115 exp = ((m64 & 0x7ff00000) >> 20) - DOUBLE_Ebias + EXTENDED_Ebias;
116 m64 &= 0xfffff;
117 if ( exp > DOUBLE_Emax + EXTENDED_Ebias )
118 {
119 /* Infinity or NaN */
120 if ((m64 == 0) && (l64 == 0))
121 {
122 /* +- infinity */
123 loaded_data->sigh = 0x80000000;
124 loaded_data->sigl = 0x00000000;
125 exp = EXP_Infinity + EXTENDED_Ebias;
126 tag = TAG_Special;
127 }
128 else
129 {
130 /* Must be a signaling or quiet NaN */
131 exp = EXP_NaN + EXTENDED_Ebias;
132 loaded_data->sigh = (m64 << 11) | 0x80000000;
133 loaded_data->sigh |= l64 >> 21;
134 loaded_data->sigl = l64 << 11;
135 tag = TAG_Special; /* The calling function must look for NaNs */
136 }
137 }
138 else if ( exp < DOUBLE_Emin + EXTENDED_Ebias )
139 {
140 /* Zero or de-normal */
141 if ((m64 == 0) && (l64 == 0))
142 {
143 /* Zero */
144 reg_copy(&CONST_Z, loaded_data);
145 exp = 0;
146 tag = TAG_Zero;
147 }
148 else
149 {
150 /* De-normal */
151 loaded_data->sigh = m64 << 11;
152 loaded_data->sigh |= l64 >> 21;
153 loaded_data->sigl = l64 << 11;
154
155 return normalize_no_excep(loaded_data, DOUBLE_Emin, negative)
156 | (denormal_operand() < 0 ? FPU_Exception : 0);
157 }
158 }
159 else
160 {
161 loaded_data->sigh = (m64 << 11) | 0x80000000;
162 loaded_data->sigh |= l64 >> 21;
163 loaded_data->sigl = l64 << 11;
164
165 tag = TAG_Valid;
166 }
167
168 setexponent16(loaded_data, exp | negative);
169
170 return tag;
171}
172
173
174/* Get a float from user memory */
175int FPU_load_single(float __user *single, FPU_REG *loaded_data)
176{
177 unsigned m32;
178 int exp, tag, negative;
179
180 RE_ENTRANT_CHECK_OFF;
181 FPU_access_ok(VERIFY_READ, single, 4);
182 FPU_get_user(m32, (unsigned long __user *) single);
183 RE_ENTRANT_CHECK_ON;
184
185 negative = (m32 & 0x80000000) ? SIGN_Negative : SIGN_Positive;
186
187 if (!(m32 & 0x7fffffff))
188 {
189 /* Zero */
190 reg_copy(&CONST_Z, loaded_data);
191 addexponent(loaded_data, negative);
192 return TAG_Zero;
193 }
194 exp = ((m32 & 0x7f800000) >> 23) - SINGLE_Ebias + EXTENDED_Ebias;
195 m32 = (m32 & 0x7fffff) << 8;
196 if ( exp < SINGLE_Emin + EXTENDED_Ebias )
197 {
198 /* De-normals */
199 loaded_data->sigh = m32;
200 loaded_data->sigl = 0;
201
202 return normalize_no_excep(loaded_data, SINGLE_Emin, negative)
203 | (denormal_operand() < 0 ? FPU_Exception : 0);
204 }
205 else if ( exp > SINGLE_Emax + EXTENDED_Ebias )
206 {
207 /* Infinity or NaN */
208 if ( m32 == 0 )
209 {
210 /* +- infinity */
211 loaded_data->sigh = 0x80000000;
212 loaded_data->sigl = 0x00000000;
213 exp = EXP_Infinity + EXTENDED_Ebias;
214 tag = TAG_Special;
215 }
216 else
217 {
218 /* Must be a signaling or quiet NaN */
219 exp = EXP_NaN + EXTENDED_Ebias;
220 loaded_data->sigh = m32 | 0x80000000;
221 loaded_data->sigl = 0;
222 tag = TAG_Special; /* The calling function must look for NaNs */
223 }
224 }
225 else
226 {
227 loaded_data->sigh = m32 | 0x80000000;
228 loaded_data->sigl = 0;
229 tag = TAG_Valid;
230 }
231
232 setexponent16(loaded_data, exp | negative); /* Set the sign. */
233
234 return tag;
235}
236
237
238/* Get a long long from user memory */
239int FPU_load_int64(long long __user *_s)
240{
241 long long s;
242 int sign;
243 FPU_REG *st0_ptr = &st(0);
244
245 RE_ENTRANT_CHECK_OFF;
246 FPU_access_ok(VERIFY_READ, _s, 8);
247 copy_from_user(&s,_s,8);
248 RE_ENTRANT_CHECK_ON;
249
250 if (s == 0)
251 {
252 reg_copy(&CONST_Z, st0_ptr);
253 return TAG_Zero;
254 }
255
256 if (s > 0)
257 sign = SIGN_Positive;
258 else
259 {
260 s = -s;
261 sign = SIGN_Negative;
262 }
263
264 significand(st0_ptr) = s;
265
266 return normalize_no_excep(st0_ptr, 63, sign);
267}
268
269
270/* Get a long from user memory */
271int FPU_load_int32(long __user *_s, FPU_REG *loaded_data)
272{
273 long s;
274 int negative;
275
276 RE_ENTRANT_CHECK_OFF;
277 FPU_access_ok(VERIFY_READ, _s, 4);
278 FPU_get_user(s, _s);
279 RE_ENTRANT_CHECK_ON;
280
281 if (s == 0)
282 { reg_copy(&CONST_Z, loaded_data); return TAG_Zero; }
283
284 if (s > 0)
285 negative = SIGN_Positive;
286 else
287 {
288 s = -s;
289 negative = SIGN_Negative;
290 }
291
292 loaded_data->sigh = s;
293 loaded_data->sigl = 0;
294
295 return normalize_no_excep(loaded_data, 31, negative);
296}
297
298
299/* Get a short from user memory */
300int FPU_load_int16(short __user *_s, FPU_REG *loaded_data)
301{
302 int s, negative;
303
304 RE_ENTRANT_CHECK_OFF;
305 FPU_access_ok(VERIFY_READ, _s, 2);
306 /* Cast as short to get the sign extended. */
307 FPU_get_user(s, _s);
308 RE_ENTRANT_CHECK_ON;
309
310 if (s == 0)
311 { reg_copy(&CONST_Z, loaded_data); return TAG_Zero; }
312
313 if (s > 0)
314 negative = SIGN_Positive;
315 else
316 {
317 s = -s;
318 negative = SIGN_Negative;
319 }
320
321 loaded_data->sigh = s << 16;
322 loaded_data->sigl = 0;
323
324 return normalize_no_excep(loaded_data, 15, negative);
325}
326
327
328/* Get a packed bcd array from user memory */
329int FPU_load_bcd(u_char __user *s)
330{
331 FPU_REG *st0_ptr = &st(0);
332 int pos;
333 u_char bcd;
334 long long l=0;
335 int sign;
336
337 RE_ENTRANT_CHECK_OFF;
338 FPU_access_ok(VERIFY_READ, s, 10);
339 RE_ENTRANT_CHECK_ON;
340 for ( pos = 8; pos >= 0; pos--)
341 {
342 l *= 10;
343 RE_ENTRANT_CHECK_OFF;
344 FPU_get_user(bcd, s+pos);
345 RE_ENTRANT_CHECK_ON;
346 l += bcd >> 4;
347 l *= 10;
348 l += bcd & 0x0f;
349 }
350
351 RE_ENTRANT_CHECK_OFF;
352 FPU_get_user(sign, s+9);
353 sign = sign & 0x80 ? SIGN_Negative : SIGN_Positive;
354 RE_ENTRANT_CHECK_ON;
355
356 if ( l == 0 )
357 {
358 reg_copy(&CONST_Z, st0_ptr);
359 addexponent(st0_ptr, sign); /* Set the sign. */
360 return TAG_Zero;
361 }
362 else
363 {
364 significand(st0_ptr) = l;
365 return normalize_no_excep(st0_ptr, 63, sign);
366 }
367}
368
369/*===========================================================================*/
370
371/* Put a long double into user memory */
372int FPU_store_extended(FPU_REG *st0_ptr, u_char st0_tag, long double __user *d)
373{
374 /*
375 The only exception raised by an attempt to store to an
376 extended format is the Invalid Stack exception, i.e.
377 attempting to store from an empty register.
378 */
379
380 if ( st0_tag != TAG_Empty )
381 {
382 RE_ENTRANT_CHECK_OFF;
383 FPU_access_ok(VERIFY_WRITE, d, 10);
384
385 FPU_put_user(st0_ptr->sigl, (unsigned long __user *) d);
386 FPU_put_user(st0_ptr->sigh, (unsigned long __user *) ((u_char __user *)d + 4));
387 FPU_put_user(exponent16(st0_ptr), (unsigned short __user *) ((u_char __user *)d + 8));
388 RE_ENTRANT_CHECK_ON;
389
390 return 1;
391 }
392
393 /* Empty register (stack underflow) */
394 EXCEPTION(EX_StackUnder);
395 if ( control_word & CW_Invalid )
396 {
397 /* The masked response */
398 /* Put out the QNaN indefinite */
399 RE_ENTRANT_CHECK_OFF;
400 FPU_access_ok(VERIFY_WRITE,d,10);
401 FPU_put_user(0, (unsigned long __user *) d);
402 FPU_put_user(0xc0000000, 1 + (unsigned long __user *) d);
403 FPU_put_user(0xffff, 4 + (short __user *) d);
404 RE_ENTRANT_CHECK_ON;
405 return 1;
406 }
407 else
408 return 0;
409
410}
411
412
413/* Put a double into user memory */
414int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, double __user *dfloat)
415{
416 unsigned long l[2];
417 unsigned long increment = 0; /* avoid gcc warnings */
418 int precision_loss;
419 int exp;
420 FPU_REG tmp;
421
422 if ( st0_tag == TAG_Valid )
423 {
424 reg_copy(st0_ptr, &tmp);
425 exp = exponent(&tmp);
426
427 if ( exp < DOUBLE_Emin ) /* It may be a denormal */
428 {
429 addexponent(&tmp, -DOUBLE_Emin + 52); /* largest exp to be 51 */
430
431 denormal_arg:
432
433 if ( (precision_loss = FPU_round_to_int(&tmp, st0_tag)) )
434 {
435#ifdef PECULIAR_486
436 /* Did it round to a non-denormal ? */
437 /* This behaviour might be regarded as peculiar, it appears
438 that the 80486 rounds to the dest precision, then
439 converts to decide underflow. */
440 if ( !((tmp.sigh == 0x00100000) && (tmp.sigl == 0) &&
441 (st0_ptr->sigl & 0x000007ff)) )
442#endif /* PECULIAR_486 */
443 {
444 EXCEPTION(EX_Underflow);
445 /* This is a special case: see sec 16.2.5.1 of
446 the 80486 book */
447 if ( !(control_word & CW_Underflow) )
448 return 0;
449 }
450 EXCEPTION(precision_loss);
451 if ( !(control_word & CW_Precision) )
452 return 0;
453 }
454 l[0] = tmp.sigl;
455 l[1] = tmp.sigh;
456 }
457 else
458 {
459 if ( tmp.sigl & 0x000007ff )
460 {
461 precision_loss = 1;
462 switch (control_word & CW_RC)
463 {
464 case RC_RND:
465 /* Rounding can get a little messy.. */
466 increment = ((tmp.sigl & 0x7ff) > 0x400) | /* nearest */
467 ((tmp.sigl & 0xc00) == 0xc00); /* odd -> even */
468 break;
469 case RC_DOWN: /* towards -infinity */
470 increment = signpositive(&tmp) ? 0 : tmp.sigl & 0x7ff;
471 break;
472 case RC_UP: /* towards +infinity */
473 increment = signpositive(&tmp) ? tmp.sigl & 0x7ff : 0;
474 break;
475 case RC_CHOP:
476 increment = 0;
477 break;
478 }
479
480 /* Truncate the mantissa */
481 tmp.sigl &= 0xfffff800;
482
483 if ( increment )
484 {
485 if ( tmp.sigl >= 0xfffff800 )
486 {
487 /* the sigl part overflows */
488 if ( tmp.sigh == 0xffffffff )
489 {
490 /* The sigh part overflows */
491 tmp.sigh = 0x80000000;
492 exp++;
493 if (exp >= EXP_OVER)
494 goto overflow;
495 }
496 else
497 {
498 tmp.sigh ++;
499 }
500 tmp.sigl = 0x00000000;
501 }
502 else
503 {
504 /* We only need to increment sigl */
505 tmp.sigl += 0x00000800;
506 }
507 }
508 }
509 else
510 precision_loss = 0;
511
512 l[0] = (tmp.sigl >> 11) | (tmp.sigh << 21);
513 l[1] = ((tmp.sigh >> 11) & 0xfffff);
514
515 if ( exp > DOUBLE_Emax )
516 {
517 overflow:
518 EXCEPTION(EX_Overflow);
519 if ( !(control_word & CW_Overflow) )
520 return 0;
521 set_precision_flag_up();
522 if ( !(control_word & CW_Precision) )
523 return 0;
524
525 /* This is a special case: see sec 16.2.5.1 of the 80486 book */
526 /* Overflow to infinity */
527 l[0] = 0x00000000; /* Set to */
528 l[1] = 0x7ff00000; /* + INF */
529 }
530 else
531 {
532 if ( precision_loss )
533 {
534 if ( increment )
535 set_precision_flag_up();
536 else
537 set_precision_flag_down();
538 }
539 /* Add the exponent */
540 l[1] |= (((exp+DOUBLE_Ebias) & 0x7ff) << 20);
541 }
542 }
543 }
544 else if (st0_tag == TAG_Zero)
545 {
546 /* Number is zero */
547 l[0] = 0;
548 l[1] = 0;
549 }
550 else if ( st0_tag == TAG_Special )
551 {
552 st0_tag = FPU_Special(st0_ptr);
553 if ( st0_tag == TW_Denormal )
554 {
555 /* A denormal will always underflow. */
556#ifndef PECULIAR_486
557 /* An 80486 is supposed to be able to generate
558 a denormal exception here, but... */
559 /* Underflow has priority. */
560 if ( control_word & CW_Underflow )
561 denormal_operand();
562#endif /* PECULIAR_486 */
563 reg_copy(st0_ptr, &tmp);
564 goto denormal_arg;
565 }
566 else if (st0_tag == TW_Infinity)
567 {
568 l[0] = 0;
569 l[1] = 0x7ff00000;
570 }
571 else if (st0_tag == TW_NaN)
572 {
573 /* Is it really a NaN ? */
574 if ( (exponent(st0_ptr) == EXP_OVER)
575 && (st0_ptr->sigh & 0x80000000) )
576 {
577 /* See if we can get a valid NaN from the FPU_REG */
578 l[0] = (st0_ptr->sigl >> 11) | (st0_ptr->sigh << 21);
579 l[1] = ((st0_ptr->sigh >> 11) & 0xfffff);
580 if ( !(st0_ptr->sigh & 0x40000000) )
581 {
582 /* It is a signalling NaN */
583 EXCEPTION(EX_Invalid);
584 if ( !(control_word & CW_Invalid) )
585 return 0;
586 l[1] |= (0x40000000 >> 11);
587 }
588 l[1] |= 0x7ff00000;
589 }
590 else
591 {
592 /* It is an unsupported data type */
593 EXCEPTION(EX_Invalid);
594 if ( !(control_word & CW_Invalid) )
595 return 0;
596 l[0] = 0;
597 l[1] = 0xfff80000;
598 }
599 }
600 }
601 else if ( st0_tag == TAG_Empty )
602 {
603 /* Empty register (stack underflow) */
604 EXCEPTION(EX_StackUnder);
605 if ( control_word & CW_Invalid )
606 {
607 /* The masked response */
608 /* Put out the QNaN indefinite */
609 RE_ENTRANT_CHECK_OFF;
610 FPU_access_ok(VERIFY_WRITE,dfloat,8);
611 FPU_put_user(0, (unsigned long __user *) dfloat);
612 FPU_put_user(0xfff80000, 1 + (unsigned long __user *) dfloat);
613 RE_ENTRANT_CHECK_ON;
614 return 1;
615 }
616 else
617 return 0;
618 }
619 if ( getsign(st0_ptr) )
620 l[1] |= 0x80000000;
621
622 RE_ENTRANT_CHECK_OFF;
623 FPU_access_ok(VERIFY_WRITE,dfloat,8);
624 FPU_put_user(l[0], (unsigned long __user *)dfloat);
625 FPU_put_user(l[1], 1 + (unsigned long __user *)dfloat);
626 RE_ENTRANT_CHECK_ON;
627
628 return 1;
629}
630
631
632/* Put a float into user memory */
633int FPU_store_single(FPU_REG *st0_ptr, u_char st0_tag, float __user *single)
634{
635 long templ = 0;
636 unsigned long increment = 0; /* avoid gcc warnings */
637 int precision_loss;
638 int exp;
639 FPU_REG tmp;
640
641 if ( st0_tag == TAG_Valid )
642 {
643
644 reg_copy(st0_ptr, &tmp);
645 exp = exponent(&tmp);
646
647 if ( exp < SINGLE_Emin )
648 {
649 addexponent(&tmp, -SINGLE_Emin + 23); /* largest exp to be 22 */
650
651 denormal_arg:
652
653 if ( (precision_loss = FPU_round_to_int(&tmp, st0_tag)) )
654 {
655#ifdef PECULIAR_486
656 /* Did it round to a non-denormal ? */
657 /* This behaviour might be regarded as peculiar, it appears
658 that the 80486 rounds to the dest precision, then
659 converts to decide underflow. */
660 if ( !((tmp.sigl == 0x00800000) &&
661 ((st0_ptr->sigh & 0x000000ff) || st0_ptr->sigl)) )
662#endif /* PECULIAR_486 */
663 {
664 EXCEPTION(EX_Underflow);
665 /* This is a special case: see sec 16.2.5.1 of
666 the 80486 book */
667 if ( !(control_word & CW_Underflow) )
668 return 0;
669 }
670 EXCEPTION(precision_loss);
671 if ( !(control_word & CW_Precision) )
672 return 0;
673 }
674 templ = tmp.sigl;
675 }
676 else
677 {
678 if ( tmp.sigl | (tmp.sigh & 0x000000ff) )
679 {
680 unsigned long sigh = tmp.sigh;
681 unsigned long sigl = tmp.sigl;
682
683 precision_loss = 1;
684 switch (control_word & CW_RC)
685 {
686 case RC_RND:
687 increment = ((sigh & 0xff) > 0x80) /* more than half */
688 || (((sigh & 0xff) == 0x80) && sigl) /* more than half */
689 || ((sigh & 0x180) == 0x180); /* round to even */
690 break;
691 case RC_DOWN: /* towards -infinity */
692 increment = signpositive(&tmp)
693 ? 0 : (sigl | (sigh & 0xff));
694 break;
695 case RC_UP: /* towards +infinity */
696 increment = signpositive(&tmp)
697 ? (sigl | (sigh & 0xff)) : 0;
698 break;
699 case RC_CHOP:
700 increment = 0;
701 break;
702 }
703
704 /* Truncate part of the mantissa */
705 tmp.sigl = 0;
706
707 if (increment)
708 {
709 if ( sigh >= 0xffffff00 )
710 {
711 /* The sigh part overflows */
712 tmp.sigh = 0x80000000;
713 exp++;
714 if ( exp >= EXP_OVER )
715 goto overflow;
716 }
717 else
718 {
719 tmp.sigh &= 0xffffff00;
720 tmp.sigh += 0x100;
721 }
722 }
723 else
724 {
725 tmp.sigh &= 0xffffff00; /* Finish the truncation */
726 }
727 }
728 else
729 precision_loss = 0;
730
731 templ = (tmp.sigh >> 8) & 0x007fffff;
732
733 if ( exp > SINGLE_Emax )
734 {
735 overflow:
736 EXCEPTION(EX_Overflow);
737 if ( !(control_word & CW_Overflow) )
738 return 0;
739 set_precision_flag_up();
740 if ( !(control_word & CW_Precision) )
741 return 0;
742
743 /* This is a special case: see sec 16.2.5.1 of the 80486 book. */
744 /* Masked response is overflow to infinity. */
745 templ = 0x7f800000;
746 }
747 else
748 {
749 if ( precision_loss )
750 {
751 if ( increment )
752 set_precision_flag_up();
753 else
754 set_precision_flag_down();
755 }
756 /* Add the exponent */
757 templ |= ((exp+SINGLE_Ebias) & 0xff) << 23;
758 }
759 }
760 }
761 else if (st0_tag == TAG_Zero)
762 {
763 templ = 0;
764 }
765 else if ( st0_tag == TAG_Special )
766 {
767 st0_tag = FPU_Special(st0_ptr);
768 if (st0_tag == TW_Denormal)
769 {
770 reg_copy(st0_ptr, &tmp);
771
772 /* A denormal will always underflow. */
773#ifndef PECULIAR_486
774 /* An 80486 is supposed to be able to generate
775 a denormal exception here, but... */
776 /* Underflow has priority. */
777 if ( control_word & CW_Underflow )
778 denormal_operand();
779#endif /* PECULIAR_486 */
780 goto denormal_arg;
781 }
782 else if (st0_tag == TW_Infinity)
783 {
784 templ = 0x7f800000;
785 }
786 else if (st0_tag == TW_NaN)
787 {
788 /* Is it really a NaN ? */
789 if ( (exponent(st0_ptr) == EXP_OVER) && (st0_ptr->sigh & 0x80000000) )
790 {
791 /* See if we can get a valid NaN from the FPU_REG */
792 templ = st0_ptr->sigh >> 8;
793 if ( !(st0_ptr->sigh & 0x40000000) )
794 {
795 /* It is a signalling NaN */
796 EXCEPTION(EX_Invalid);
797 if ( !(control_word & CW_Invalid) )
798 return 0;
799 templ |= (0x40000000 >> 8);
800 }
801 templ |= 0x7f800000;
802 }
803 else
804 {
805 /* It is an unsupported data type */
806 EXCEPTION(EX_Invalid);
807 if ( !(control_word & CW_Invalid) )
808 return 0;
809 templ = 0xffc00000;
810 }
811 }
812#ifdef PARANOID
813 else
814 {
815 EXCEPTION(EX_INTERNAL|0x164);
816 return 0;
817 }
818#endif
819 }
820 else if ( st0_tag == TAG_Empty )
821 {
822 /* Empty register (stack underflow) */
823 EXCEPTION(EX_StackUnder);
824 if ( control_word & EX_Invalid )
825 {
826 /* The masked response */
827 /* Put out the QNaN indefinite */
828 RE_ENTRANT_CHECK_OFF;
829 FPU_access_ok(VERIFY_WRITE,single,4);
830 FPU_put_user(0xffc00000, (unsigned long __user *) single);
831 RE_ENTRANT_CHECK_ON;
832 return 1;
833 }
834 else
835 return 0;
836 }
837#ifdef PARANOID
838 else
839 {
840 EXCEPTION(EX_INTERNAL|0x163);
841 return 0;
842 }
843#endif
844 if ( getsign(st0_ptr) )
845 templ |= 0x80000000;
846
847 RE_ENTRANT_CHECK_OFF;
848 FPU_access_ok(VERIFY_WRITE,single,4);
849 FPU_put_user(templ,(unsigned long __user *) single);
850 RE_ENTRANT_CHECK_ON;
851
852 return 1;
853}
854
855
856/* Put a long long into user memory */
857int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag, long long __user *d)
858{
859 FPU_REG t;
860 long long tll;
861 int precision_loss;
862
863 if ( st0_tag == TAG_Empty )
864 {
865 /* Empty register (stack underflow) */
866 EXCEPTION(EX_StackUnder);
867 goto invalid_operand;
868 }
869 else if ( st0_tag == TAG_Special )
870 {
871 st0_tag = FPU_Special(st0_ptr);
872 if ( (st0_tag == TW_Infinity) ||
873 (st0_tag == TW_NaN) )
874 {
875 EXCEPTION(EX_Invalid);
876 goto invalid_operand;
877 }
878 }
879
880 reg_copy(st0_ptr, &t);
881 precision_loss = FPU_round_to_int(&t, st0_tag);
882 ((long *)&tll)[0] = t.sigl;
883 ((long *)&tll)[1] = t.sigh;
884 if ( (precision_loss == 1) ||
885 ((t.sigh & 0x80000000) &&
886 !((t.sigh == 0x80000000) && (t.sigl == 0) &&
887 signnegative(&t))) )
888 {
889 EXCEPTION(EX_Invalid);
890 /* This is a special case: see sec 16.2.5.1 of the 80486 book */
891 invalid_operand:
892 if ( control_word & EX_Invalid )
893 {
894 /* Produce something like QNaN "indefinite" */
895 tll = 0x8000000000000000LL;
896 }
897 else
898 return 0;
899 }
900 else
901 {
902 if ( precision_loss )
903 set_precision_flag(precision_loss);
904 if ( signnegative(&t) )
905 tll = - tll;
906 }
907
908 RE_ENTRANT_CHECK_OFF;
909 FPU_access_ok(VERIFY_WRITE,d,8);
910 copy_to_user(d, &tll, 8);
911 RE_ENTRANT_CHECK_ON;
912
913 return 1;
914}
915
916
917/* Put a long into user memory */
918int FPU_store_int32(FPU_REG *st0_ptr, u_char st0_tag, long __user *d)
919{
920 FPU_REG t;
921 int precision_loss;
922
923 if ( st0_tag == TAG_Empty )
924 {
925 /* Empty register (stack underflow) */
926 EXCEPTION(EX_StackUnder);
927 goto invalid_operand;
928 }
929 else if ( st0_tag == TAG_Special )
930 {
931 st0_tag = FPU_Special(st0_ptr);
932 if ( (st0_tag == TW_Infinity) ||
933 (st0_tag == TW_NaN) )
934 {
935 EXCEPTION(EX_Invalid);
936 goto invalid_operand;
937 }
938 }
939
940 reg_copy(st0_ptr, &t);
941 precision_loss = FPU_round_to_int(&t, st0_tag);
942 if (t.sigh ||
943 ((t.sigl & 0x80000000) &&
944 !((t.sigl == 0x80000000) && signnegative(&t))) )
945 {
946 EXCEPTION(EX_Invalid);
947 /* This is a special case: see sec 16.2.5.1 of the 80486 book */
948 invalid_operand:
949 if ( control_word & EX_Invalid )
950 {
951 /* Produce something like QNaN "indefinite" */
952 t.sigl = 0x80000000;
953 }
954 else
955 return 0;
956 }
957 else
958 {
959 if ( precision_loss )
960 set_precision_flag(precision_loss);
961 if ( signnegative(&t) )
962 t.sigl = -(long)t.sigl;
963 }
964
965 RE_ENTRANT_CHECK_OFF;
966 FPU_access_ok(VERIFY_WRITE,d,4);
967 FPU_put_user(t.sigl, (unsigned long __user *) d);
968 RE_ENTRANT_CHECK_ON;
969
970 return 1;
971}
972
973
974/* Put a short into user memory */
975int FPU_store_int16(FPU_REG *st0_ptr, u_char st0_tag, short __user *d)
976{
977 FPU_REG t;
978 int precision_loss;
979
980 if ( st0_tag == TAG_Empty )
981 {
982 /* Empty register (stack underflow) */
983 EXCEPTION(EX_StackUnder);
984 goto invalid_operand;
985 }
986 else if ( st0_tag == TAG_Special )
987 {
988 st0_tag = FPU_Special(st0_ptr);
989 if ( (st0_tag == TW_Infinity) ||
990 (st0_tag == TW_NaN) )
991 {
992 EXCEPTION(EX_Invalid);
993 goto invalid_operand;
994 }
995 }
996
997 reg_copy(st0_ptr, &t);
998 precision_loss = FPU_round_to_int(&t, st0_tag);
999 if (t.sigh ||
1000 ((t.sigl & 0xffff8000) &&
1001 !((t.sigl == 0x8000) && signnegative(&t))) )
1002 {
1003 EXCEPTION(EX_Invalid);
1004 /* This is a special case: see sec 16.2.5.1 of the 80486 book */
1005 invalid_operand:
1006 if ( control_word & EX_Invalid )
1007 {
1008 /* Produce something like QNaN "indefinite" */
1009 t.sigl = 0x8000;
1010 }
1011 else
1012 return 0;
1013 }
1014 else
1015 {
1016 if ( precision_loss )
1017 set_precision_flag(precision_loss);
1018 if ( signnegative(&t) )
1019 t.sigl = -t.sigl;
1020 }
1021
1022 RE_ENTRANT_CHECK_OFF;
1023 FPU_access_ok(VERIFY_WRITE,d,2);
1024 FPU_put_user((short)t.sigl, d);
1025 RE_ENTRANT_CHECK_ON;
1026
1027 return 1;
1028}
1029
1030
1031/* Put a packed bcd array into user memory */
1032int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d)
1033{
1034 FPU_REG t;
1035 unsigned long long ll;
1036 u_char b;
1037 int i, precision_loss;
1038 u_char sign = (getsign(st0_ptr) == SIGN_NEG) ? 0x80 : 0;
1039
1040 if ( st0_tag == TAG_Empty )
1041 {
1042 /* Empty register (stack underflow) */
1043 EXCEPTION(EX_StackUnder);
1044 goto invalid_operand;
1045 }
1046 else if ( st0_tag == TAG_Special )
1047 {
1048 st0_tag = FPU_Special(st0_ptr);
1049 if ( (st0_tag == TW_Infinity) ||
1050 (st0_tag == TW_NaN) )
1051 {
1052 EXCEPTION(EX_Invalid);
1053 goto invalid_operand;
1054 }
1055 }
1056
1057 reg_copy(st0_ptr, &t);
1058 precision_loss = FPU_round_to_int(&t, st0_tag);
1059 ll = significand(&t);
1060
1061 /* Check for overflow, by comparing with 999999999999999999 decimal. */
1062 if ( (t.sigh > 0x0de0b6b3) ||
1063 ((t.sigh == 0x0de0b6b3) && (t.sigl > 0xa763ffff)) )
1064 {
1065 EXCEPTION(EX_Invalid);
1066 /* This is a special case: see sec 16.2.5.1 of the 80486 book */
1067 invalid_operand:
1068 if ( control_word & CW_Invalid )
1069 {
1070 /* Produce the QNaN "indefinite" */
1071 RE_ENTRANT_CHECK_OFF;
1072 FPU_access_ok(VERIFY_WRITE,d,10);
1073 for ( i = 0; i < 7; i++)
1074 FPU_put_user(0, d+i); /* These bytes "undefined" */
1075 FPU_put_user(0xc0, d+7); /* This byte "undefined" */
1076 FPU_put_user(0xff, d+8);
1077 FPU_put_user(0xff, d+9);
1078 RE_ENTRANT_CHECK_ON;
1079 return 1;
1080 }
1081 else
1082 return 0;
1083 }
1084 else if ( precision_loss )
1085 {
1086 /* Precision loss doesn't stop the data transfer */
1087 set_precision_flag(precision_loss);
1088 }
1089
1090 RE_ENTRANT_CHECK_OFF;
1091 FPU_access_ok(VERIFY_WRITE,d,10);
1092 RE_ENTRANT_CHECK_ON;
1093 for ( i = 0; i < 9; i++)
1094 {
1095 b = FPU_div_small(&ll, 10);
1096 b |= (FPU_div_small(&ll, 10)) << 4;
1097 RE_ENTRANT_CHECK_OFF;
1098 FPU_put_user(b, d+i);
1099 RE_ENTRANT_CHECK_ON;
1100 }
1101 RE_ENTRANT_CHECK_OFF;
1102 FPU_put_user(sign, d+9);
1103 RE_ENTRANT_CHECK_ON;
1104
1105 return 1;
1106}
1107
1108/*===========================================================================*/
1109
1110/* r gets mangled such that sig is int, sign:
1111 it is NOT normalized */
1112/* The return value (in eax) is zero if the result is exact,
1113 if bits are changed due to rounding, truncation, etc, then
1114 a non-zero value is returned */
1115/* Overflow is signalled by a non-zero return value (in eax).
1116 In the case of overflow, the returned significand always has the
1117 largest possible value */
1118int FPU_round_to_int(FPU_REG *r, u_char tag)
1119{
1120 u_char very_big;
1121 unsigned eax;
1122
1123 if (tag == TAG_Zero)
1124 {
1125 /* Make sure that zero is returned */
1126 significand(r) = 0;
1127 return 0; /* o.k. */
1128 }
1129
1130 if (exponent(r) > 63)
1131 {
1132 r->sigl = r->sigh = ~0; /* The largest representable number */
1133 return 1; /* overflow */
1134 }
1135
1136 eax = FPU_shrxs(&r->sigl, 63 - exponent(r));
1137 very_big = !(~(r->sigh) | ~(r->sigl)); /* test for 0xfff...fff */
1138#define half_or_more (eax & 0x80000000)
1139#define frac_part (eax)
1140#define more_than_half ((eax & 0x80000001) == 0x80000001)
1141 switch (control_word & CW_RC)
1142 {
1143 case RC_RND:
1144 if ( more_than_half /* nearest */
1145 || (half_or_more && (r->sigl & 1)) ) /* odd -> even */
1146 {
1147 if ( very_big ) return 1; /* overflow */
1148 significand(r) ++;
1149 return PRECISION_LOST_UP;
1150 }
1151 break;
1152 case RC_DOWN:
1153 if (frac_part && getsign(r))
1154 {
1155 if ( very_big ) return 1; /* overflow */
1156 significand(r) ++;
1157 return PRECISION_LOST_UP;
1158 }
1159 break;
1160 case RC_UP:
1161 if (frac_part && !getsign(r))
1162 {
1163 if ( very_big ) return 1; /* overflow */
1164 significand(r) ++;
1165 return PRECISION_LOST_UP;
1166 }
1167 break;
1168 case RC_CHOP:
1169 break;
1170 }
1171
1172 return eax ? PRECISION_LOST_DOWN : 0;
1173
1174}
1175
1176/*===========================================================================*/
1177
1178u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user *s)
1179{
1180 unsigned short tag_word = 0;
1181 u_char tag;
1182 int i;
1183
1184 if ( (addr_modes.default_mode == VM86) ||
1185 ((addr_modes.default_mode == PM16)
1186 ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX)) )
1187 {
1188 RE_ENTRANT_CHECK_OFF;
1189 FPU_access_ok(VERIFY_READ, s, 0x0e);
1190 FPU_get_user(control_word, (unsigned short __user *) s);
1191 FPU_get_user(partial_status, (unsigned short __user *) (s+2));
1192 FPU_get_user(tag_word, (unsigned short __user *) (s+4));
1193 FPU_get_user(instruction_address.offset, (unsigned short __user *) (s+6));
1194 FPU_get_user(instruction_address.selector, (unsigned short __user *) (s+8));
1195 FPU_get_user(operand_address.offset, (unsigned short __user *) (s+0x0a));
1196 FPU_get_user(operand_address.selector, (unsigned short __user *) (s+0x0c));
1197 RE_ENTRANT_CHECK_ON;
1198 s += 0x0e;
1199 if ( addr_modes.default_mode == VM86 )
1200 {
1201 instruction_address.offset
1202 += (instruction_address.selector & 0xf000) << 4;
1203 operand_address.offset += (operand_address.selector & 0xf000) << 4;
1204 }
1205 }
1206 else
1207 {
1208 RE_ENTRANT_CHECK_OFF;
1209 FPU_access_ok(VERIFY_READ, s, 0x1c);
1210 FPU_get_user(control_word, (unsigned short __user *) s);
1211 FPU_get_user(partial_status, (unsigned short __user *) (s+4));
1212 FPU_get_user(tag_word, (unsigned short __user *) (s+8));
1213 FPU_get_user(instruction_address.offset, (unsigned long __user *) (s+0x0c));
1214 FPU_get_user(instruction_address.selector, (unsigned short __user *) (s+0x10));
1215 FPU_get_user(instruction_address.opcode, (unsigned short __user *) (s+0x12));
1216 FPU_get_user(operand_address.offset, (unsigned long __user *) (s+0x14));
1217 FPU_get_user(operand_address.selector, (unsigned long __user *) (s+0x18));
1218 RE_ENTRANT_CHECK_ON;
1219 s += 0x1c;
1220 }
1221
1222#ifdef PECULIAR_486
1223 control_word &= ~0xe080;
1224#endif /* PECULIAR_486 */
1225
1226 top = (partial_status >> SW_Top_Shift) & 7;
1227
1228 if ( partial_status & ~control_word & CW_Exceptions )
1229 partial_status |= (SW_Summary | SW_Backward);
1230 else
1231 partial_status &= ~(SW_Summary | SW_Backward);
1232
1233 for ( i = 0; i < 8; i++ )
1234 {
1235 tag = tag_word & 3;
1236 tag_word >>= 2;
1237
1238 if ( tag == TAG_Empty )
1239 /* New tag is empty. Accept it */
1240 FPU_settag(i, TAG_Empty);
1241 else if ( FPU_gettag(i) == TAG_Empty )
1242 {
1243 /* Old tag is empty and new tag is not empty. New tag is determined
1244 by old reg contents */
1245 if ( exponent(&fpu_register(i)) == - EXTENDED_Ebias )
1246 {
1247 if ( !(fpu_register(i).sigl | fpu_register(i).sigh) )
1248 FPU_settag(i, TAG_Zero);
1249 else
1250 FPU_settag(i, TAG_Special);
1251 }
1252 else if ( exponent(&fpu_register(i)) == 0x7fff - EXTENDED_Ebias )
1253 {
1254 FPU_settag(i, TAG_Special);
1255 }
1256 else if ( fpu_register(i).sigh & 0x80000000 )
1257 FPU_settag(i, TAG_Valid);
1258 else
1259 FPU_settag(i, TAG_Special); /* An Un-normal */
1260 }
1261 /* Else old tag is not empty and new tag is not empty. Old tag
1262 remains correct */
1263 }
1264
1265 return s;
1266}
1267
1268
1269void frstor(fpu_addr_modes addr_modes, u_char __user *data_address)
1270{
1271 int i, regnr;
1272 u_char __user *s = fldenv(addr_modes, data_address);
1273 int offset = (top & 7) * 10, other = 80 - offset;
1274
1275 /* Copy all registers in stack order. */
1276 RE_ENTRANT_CHECK_OFF;
1277 FPU_access_ok(VERIFY_READ,s,80);
1278 __copy_from_user(register_base+offset, s, other);
1279 if ( offset )
1280 __copy_from_user(register_base, s+other, offset);
1281 RE_ENTRANT_CHECK_ON;
1282
1283 for ( i = 0; i < 8; i++ )
1284 {
1285 regnr = (i+top) & 7;
1286 if ( FPU_gettag(regnr) != TAG_Empty )
1287 /* The loaded data over-rides all other cases. */
1288 FPU_settag(regnr, FPU_tagof(&st(i)));
1289 }
1290
1291}
1292
1293
1294u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d)
1295{
1296 if ( (addr_modes.default_mode == VM86) ||
1297 ((addr_modes.default_mode == PM16)
1298 ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX)) )
1299 {
1300 RE_ENTRANT_CHECK_OFF;
1301 FPU_access_ok(VERIFY_WRITE,d,14);
1302#ifdef PECULIAR_486
1303 FPU_put_user(control_word & ~0xe080, (unsigned long __user *) d);
1304#else
1305 FPU_put_user(control_word, (unsigned short __user *) d);
1306#endif /* PECULIAR_486 */
1307 FPU_put_user(status_word(), (unsigned short __user *) (d+2));
1308 FPU_put_user(fpu_tag_word, (unsigned short __user *) (d+4));
1309 FPU_put_user(instruction_address.offset, (unsigned short __user *) (d+6));
1310 FPU_put_user(operand_address.offset, (unsigned short __user *) (d+0x0a));
1311 if ( addr_modes.default_mode == VM86 )
1312 {
1313 FPU_put_user((instruction_address.offset & 0xf0000) >> 4,
1314 (unsigned short __user *) (d+8));
1315 FPU_put_user((operand_address.offset & 0xf0000) >> 4,
1316 (unsigned short __user *) (d+0x0c));
1317 }
1318 else
1319 {
1320 FPU_put_user(instruction_address.selector, (unsigned short __user *) (d+8));
1321 FPU_put_user(operand_address.selector, (unsigned short __user *) (d+0x0c));
1322 }
1323 RE_ENTRANT_CHECK_ON;
1324 d += 0x0e;
1325 }
1326 else
1327 {
1328 RE_ENTRANT_CHECK_OFF;
1329 FPU_access_ok(VERIFY_WRITE, d, 7*4);
1330#ifdef PECULIAR_486
1331 control_word &= ~0xe080;
1332 /* An 80486 sets nearly all of the reserved bits to 1. */
1333 control_word |= 0xffff0040;
1334 partial_status = status_word() | 0xffff0000;
1335 fpu_tag_word |= 0xffff0000;
1336 I387.soft.fcs &= ~0xf8000000;
1337 I387.soft.fos |= 0xffff0000;
1338#endif /* PECULIAR_486 */
1339 __copy_to_user(d, &control_word, 7*4);
1340 RE_ENTRANT_CHECK_ON;
1341 d += 0x1c;
1342 }
1343
1344 control_word |= CW_Exceptions;
1345 partial_status &= ~(SW_Summary | SW_Backward);
1346
1347 return d;
1348}
1349
1350
1351void fsave(fpu_addr_modes addr_modes, u_char __user *data_address)
1352{
1353 u_char __user *d;
1354 int offset = (top & 7) * 10, other = 80 - offset;
1355
1356 d = fstenv(addr_modes, data_address);
1357
1358 RE_ENTRANT_CHECK_OFF;
1359 FPU_access_ok(VERIFY_WRITE,d,80);
1360
1361 /* Copy all registers in stack order. */
1362 __copy_to_user(d, register_base+offset, other);
1363 if ( offset )
1364 __copy_to_user(d+other, register_base, offset);
1365 RE_ENTRANT_CHECK_ON;
1366
1367 finit();
1368}
1369
1370/*===========================================================================*/
diff --git a/arch/i386/math-emu/reg_mul.c b/arch/i386/math-emu/reg_mul.c
new file mode 100644
index 000000000000..40f50b61bc67
--- /dev/null
+++ b/arch/i386/math-emu/reg_mul.c
@@ -0,0 +1,132 @@
1/*---------------------------------------------------------------------------+
2 | reg_mul.c |
3 | |
4 | Multiply one FPU_REG by another, put the result in a destination FPU_REG. |
5 | |
6 | Copyright (C) 1992,1993,1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
8 | E-mail billm@suburbia.net |
9 | |
10 | Returns the tag of the result if no exceptions or errors occurred. |
11 | |
12 +---------------------------------------------------------------------------*/
13
14/*---------------------------------------------------------------------------+
15 | The destination may be any FPU_REG, including one of the source FPU_REGs. |
16 +---------------------------------------------------------------------------*/
17
18#include "fpu_emu.h"
19#include "exception.h"
20#include "reg_constant.h"
21#include "fpu_system.h"
22
23
24/*
25 Multiply two registers to give a register result.
26 The sources are st(deststnr) and (b,tagb,signb).
27 The destination is st(deststnr).
28 */
29/* This routine must be called with non-empty source registers */
30int FPU_mul(FPU_REG const *b, u_char tagb, int deststnr, int control_w)
31{
32 FPU_REG *a = &st(deststnr);
33 FPU_REG *dest = a;
34 u_char taga = FPU_gettagi(deststnr);
35 u_char saved_sign = getsign(dest);
36 u_char sign = (getsign(a) ^ getsign(b));
37 int tag;
38
39
40 if ( !(taga | tagb) )
41 {
42 /* Both regs Valid, this should be the most common case. */
43
44 tag = FPU_u_mul(a, b, dest, control_w, sign, exponent(a) + exponent(b));
45 if ( tag < 0 )
46 {
47 setsign(dest, saved_sign);
48 return tag;
49 }
50 FPU_settagi(deststnr, tag);
51 return tag;
52 }
53
54 if ( taga == TAG_Special )
55 taga = FPU_Special(a);
56 if ( tagb == TAG_Special )
57 tagb = FPU_Special(b);
58
59 if ( ((taga == TAG_Valid) && (tagb == TW_Denormal))
60 || ((taga == TW_Denormal) && (tagb == TAG_Valid))
61 || ((taga == TW_Denormal) && (tagb == TW_Denormal)) )
62 {
63 FPU_REG x, y;
64 if ( denormal_operand() < 0 )
65 return FPU_Exception;
66
67 FPU_to_exp16(a, &x);
68 FPU_to_exp16(b, &y);
69 tag = FPU_u_mul(&x, &y, dest, control_w, sign,
70 exponent16(&x) + exponent16(&y));
71 if ( tag < 0 )
72 {
73 setsign(dest, saved_sign);
74 return tag;
75 }
76 FPU_settagi(deststnr, tag);
77 return tag;
78 }
79 else if ( (taga <= TW_Denormal) && (tagb <= TW_Denormal) )
80 {
81 if ( ((tagb == TW_Denormal) || (taga == TW_Denormal))
82 && (denormal_operand() < 0) )
83 return FPU_Exception;
84
85 /* Must have either both arguments == zero, or
86 one valid and the other zero.
87 The result is therefore zero. */
88 FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
89 /* The 80486 book says that the answer is +0, but a real
90 80486 behaves this way.
91 IEEE-754 apparently says it should be this way. */
92 setsign(dest, sign);
93 return TAG_Zero;
94 }
95 /* Must have infinities, NaNs, etc */
96 else if ( (taga == TW_NaN) || (tagb == TW_NaN) )
97 {
98 return real_2op_NaN(b, tagb, deststnr, &st(0));
99 }
100 else if ( ((taga == TW_Infinity) && (tagb == TAG_Zero))
101 || ((tagb == TW_Infinity) && (taga == TAG_Zero)) )
102 {
103 return arith_invalid(deststnr); /* Zero*Infinity is invalid */
104 }
105 else if ( ((taga == TW_Denormal) || (tagb == TW_Denormal))
106 && (denormal_operand() < 0) )
107 {
108 return FPU_Exception;
109 }
110 else if (taga == TW_Infinity)
111 {
112 FPU_copy_to_regi(a, TAG_Special, deststnr);
113 setsign(dest, sign);
114 return TAG_Special;
115 }
116 else if (tagb == TW_Infinity)
117 {
118 FPU_copy_to_regi(b, TAG_Special, deststnr);
119 setsign(dest, sign);
120 return TAG_Special;
121 }
122
123#ifdef PARANOID
124 else
125 {
126 EXCEPTION(EX_INTERNAL|0x102);
127 return FPU_Exception;
128 }
129#endif /* PARANOID */
130
131 return 0;
132}
diff --git a/arch/i386/math-emu/reg_norm.S b/arch/i386/math-emu/reg_norm.S
new file mode 100644
index 000000000000..8b6352efceef
--- /dev/null
+++ b/arch/i386/math-emu/reg_norm.S
@@ -0,0 +1,147 @@
1/*---------------------------------------------------------------------------+
2 | reg_norm.S |
3 | |
4 | Copyright (C) 1992,1993,1994,1995,1997 |
5 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
6 | Australia. E-mail billm@suburbia.net |
7 | |
8 | Normalize the value in a FPU_REG. |
9 | |
10 | Call from C as: |
11 | int FPU_normalize(FPU_REG *n) |
12 | |
13 | int FPU_normalize_nuo(FPU_REG *n) |
14 | |
15 | Return value is the tag of the answer, or-ed with FPU_Exception if |
16 | one was raised, or -1 on internal error. |
17 | |
18 +---------------------------------------------------------------------------*/
19
20#include "fpu_emu.h"
21
22
23.text
24ENTRY(FPU_normalize)
25 pushl %ebp
26 movl %esp,%ebp
27 pushl %ebx
28
29 movl PARAM1,%ebx
30
31 movl SIGH(%ebx),%edx
32 movl SIGL(%ebx),%eax
33
34 orl %edx,%edx /* ms bits */
35 js L_done /* Already normalized */
36 jnz L_shift_1 /* Shift left 1 - 31 bits */
37
38 orl %eax,%eax
39 jz L_zero /* The contents are zero */
40
41 movl %eax,%edx
42 xorl %eax,%eax
43 subw $32,EXP(%ebx) /* This can cause an underflow */
44
45/* We need to shift left by 1 - 31 bits */
46L_shift_1:
47 bsrl %edx,%ecx /* get the required shift in %ecx */
48 subl $31,%ecx
49 negl %ecx
50 shld %cl,%eax,%edx
51 shl %cl,%eax
52 subw %cx,EXP(%ebx) /* This can cause an underflow */
53
54 movl %edx,SIGH(%ebx)
55 movl %eax,SIGL(%ebx)
56
57L_done:
58 cmpw EXP_OVER,EXP(%ebx)
59 jge L_overflow
60
61 cmpw EXP_UNDER,EXP(%ebx)
62 jle L_underflow
63
64L_exit_valid:
65 movl TAG_Valid,%eax
66
67 /* Convert the exponent to 80x87 form. */
68 addw EXTENDED_Ebias,EXP(%ebx)
69 andw $0x7fff,EXP(%ebx)
70
71L_exit:
72 popl %ebx
73 leave
74 ret
75
76
77L_zero:
78 movw $0,EXP(%ebx)
79 movl TAG_Zero,%eax
80 jmp L_exit
81
82L_underflow:
83 /* Convert the exponent to 80x87 form. */
84 addw EXTENDED_Ebias,EXP(%ebx)
85 push %ebx
86 call arith_underflow
87 pop %ebx
88 jmp L_exit
89
90L_overflow:
91 /* Convert the exponent to 80x87 form. */
92 addw EXTENDED_Ebias,EXP(%ebx)
93 push %ebx
94 call arith_overflow
95 pop %ebx
96 jmp L_exit
97
98
99
100/* Normalise without reporting underflow or overflow */
101ENTRY(FPU_normalize_nuo)
102 pushl %ebp
103 movl %esp,%ebp
104 pushl %ebx
105
106 movl PARAM1,%ebx
107
108 movl SIGH(%ebx),%edx
109 movl SIGL(%ebx),%eax
110
111 orl %edx,%edx /* ms bits */
112 js L_exit_nuo_valid /* Already normalized */
113 jnz L_nuo_shift_1 /* Shift left 1 - 31 bits */
114
115 orl %eax,%eax
116 jz L_exit_nuo_zero /* The contents are zero */
117
118 movl %eax,%edx
119 xorl %eax,%eax
120 subw $32,EXP(%ebx) /* This can cause an underflow */
121
122/* We need to shift left by 1 - 31 bits */
123L_nuo_shift_1:
124 bsrl %edx,%ecx /* get the required shift in %ecx */
125 subl $31,%ecx
126 negl %ecx
127 shld %cl,%eax,%edx
128 shl %cl,%eax
129 subw %cx,EXP(%ebx) /* This can cause an underflow */
130
131 movl %edx,SIGH(%ebx)
132 movl %eax,SIGL(%ebx)
133
134L_exit_nuo_valid:
135 movl TAG_Valid,%eax
136
137 popl %ebx
138 leave
139 ret
140
141L_exit_nuo_zero:
142 movl TAG_Zero,%eax
143 movw EXP_UNDER,EXP(%ebx)
144
145 popl %ebx
146 leave
147 ret
diff --git a/arch/i386/math-emu/reg_round.S b/arch/i386/math-emu/reg_round.S
new file mode 100644
index 000000000000..d1d4e48b4f67
--- /dev/null
+++ b/arch/i386/math-emu/reg_round.S
@@ -0,0 +1,708 @@
1 .file "reg_round.S"
2/*---------------------------------------------------------------------------+
3 | reg_round.S |
4 | |
5 | Rounding/truncation/etc for FPU basic arithmetic functions. |
6 | |
7 | Copyright (C) 1993,1995,1997 |
8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
9 | Australia. E-mail billm@suburbia.net |
10 | |
11 | This code has four possible entry points. |
12 | The following must be entered by a jmp instruction: |
13 | fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. |
14 | |
15 | The FPU_round entry point is intended to be used by C code. |
16 | From C, call as: |
17 | int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) |
18 | |
19 | Return value is the tag of the answer, or-ed with FPU_Exception if |
20 | one was raised, or -1 on internal error. |
21 | |
22 | For correct "up" and "down" rounding, the argument must have the correct |
23 | sign. |
24 | |
25 +---------------------------------------------------------------------------*/
26
27/*---------------------------------------------------------------------------+
28 | Four entry points. |
29 | |
30 | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: |
31 | %eax:%ebx 64 bit significand |
32 | %edx 32 bit extension of the significand |
33 | %edi pointer to an FPU_REG for the result to be stored |
34 | stack calling function must have set up a C stack frame and |
35 | pushed %esi, %edi, and %ebx |
36 | |
37 | Needed just for the fpu_reg_round_sqrt entry point: |
38 | %cx A control word in the same format as the FPU control word. |
39 | Otherwise, PARAM4 must give such a value. |
40 | |
41 | |
42 | The significand and its extension are assumed to be exact in the |
43 | following sense: |
44 | If the significand by itself is the exact result then the significand |
45 | extension (%edx) must contain 0, otherwise the significand extension |
46 | must be non-zero. |
47 | If the significand extension is non-zero then the significand is |
48 | smaller than the magnitude of the correct exact result by an amount |
49 | greater than zero and less than one ls bit of the significand. |
50 | The significand extension is only required to have three possible |
51 | non-zero values: |
52 | less than 0x80000000 <=> the significand is less than 1/2 an ls |
53 | bit smaller than the magnitude of the |
54 | true exact result. |
55 | exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit |
56 | smaller than the magnitude of the true |
57 | exact result. |
58 | greater than 0x80000000 <=> the significand is more than 1/2 an ls |
59 | bit smaller than the magnitude of the |
60 | true exact result. |
61 | |
62 +---------------------------------------------------------------------------*/
63
64/*---------------------------------------------------------------------------+
65 | The code in this module has become quite complex, but it should handle |
66 | all of the FPU flags which are set at this stage of the basic arithmetic |
67 | computations. |
68 | There are a few rare cases where the results are not set identically to |
69 | a real FPU. These require a bit more thought because at this stage the |
70 | results of the code here appear to be more consistent... |
71 | This may be changed in a future version. |
72 +---------------------------------------------------------------------------*/
73
74
75#include "fpu_emu.h"
76#include "exception.h"
77#include "control_w.h"
78
79/* Flags for FPU_bits_lost */
80#define LOST_DOWN $1
81#define LOST_UP $2
82
83/* Flags for FPU_denormal */
84#define DENORMAL $1
85#define UNMASKED_UNDERFLOW $2
86
87
88#ifndef NON_REENTRANT_FPU
89/* Make the code re-entrant by putting
90 local storage on the stack: */
91#define FPU_bits_lost (%esp)
92#define FPU_denormal 1(%esp)
93
94#else
95/* Not re-entrant, so we can gain speed by putting
96 local storage in a static area: */
97.data
98 .align 4,0
99FPU_bits_lost:
100 .byte 0
101FPU_denormal:
102 .byte 0
103#endif /* NON_REENTRANT_FPU */
104
105
106.text
107.globl fpu_reg_round
108.globl fpu_Arith_exit
109
110/* Entry point when called from C */
111ENTRY(FPU_round)
112 pushl %ebp
113 movl %esp,%ebp
114 pushl %esi
115 pushl %edi
116 pushl %ebx
117
118 movl PARAM1,%edi
119 movl SIGH(%edi),%eax
120 movl SIGL(%edi),%ebx
121 movl PARAM2,%edx
122
123fpu_reg_round: /* Normal entry point */
124 movl PARAM4,%ecx
125
126#ifndef NON_REENTRANT_FPU
127 pushl %ebx /* adjust the stack pointer */
128#endif /* NON_REENTRANT_FPU */
129
130#ifdef PARANOID
131/* Cannot use this here yet */
132/* orl %eax,%eax */
133/* jns L_entry_bugged */
134#endif /* PARANOID */
135
136 cmpw EXP_UNDER,EXP(%edi)
137 jle L_Make_denorm /* The number is a de-normal */
138
139 movb $0,FPU_denormal /* 0 -> not a de-normal */
140
141Denorm_done:
142 movb $0,FPU_bits_lost /* No bits yet lost in rounding */
143
144 movl %ecx,%esi
145 andl CW_PC,%ecx
146 cmpl PR_64_BITS,%ecx
147 je LRound_To_64
148
149 cmpl PR_53_BITS,%ecx
150 je LRound_To_53
151
152 cmpl PR_24_BITS,%ecx
153 je LRound_To_24
154
155#ifdef PECULIAR_486
156/* With the precision control bits set to 01 "(reserved)", a real 80486
157 behaves as if the precision control bits were set to 11 "64 bits" */
158 cmpl PR_RESERVED_BITS,%ecx
159 je LRound_To_64
160#ifdef PARANOID
161 jmp L_bugged_denorm_486
162#endif /* PARANOID */
163#else
164#ifdef PARANOID
165 jmp L_bugged_denorm /* There is no bug, just a bad control word */
166#endif /* PARANOID */
167#endif /* PECULIAR_486 */
168
169
170/* Round etc to 24 bit precision */
171LRound_To_24:
172 movl %esi,%ecx
173 andl CW_RC,%ecx
174 cmpl RC_RND,%ecx
175 je LRound_nearest_24
176
177 cmpl RC_CHOP,%ecx
178 je LCheck_truncate_24
179
180 cmpl RC_UP,%ecx /* Towards +infinity */
181 je LUp_24
182
183 cmpl RC_DOWN,%ecx /* Towards -infinity */
184 je LDown_24
185
186#ifdef PARANOID
187 jmp L_bugged_round24
188#endif /* PARANOID */
189
190LUp_24:
191 cmpb SIGN_POS,PARAM5
192 jne LCheck_truncate_24 /* If negative then up==truncate */
193
194 jmp LCheck_24_round_up
195
196LDown_24:
197 cmpb SIGN_POS,PARAM5
198 je LCheck_truncate_24 /* If positive then down==truncate */
199
200LCheck_24_round_up:
201 movl %eax,%ecx
202 andl $0x000000ff,%ecx
203 orl %ebx,%ecx
204 orl %edx,%ecx
205 jnz LDo_24_round_up
206 jmp L_Re_normalise
207
208LRound_nearest_24:
209 /* Do rounding of the 24th bit if needed (nearest or even) */
210 movl %eax,%ecx
211 andl $0x000000ff,%ecx
212 cmpl $0x00000080,%ecx
213 jc LCheck_truncate_24 /* less than half, no increment needed */
214
215 jne LGreater_Half_24 /* greater than half, increment needed */
216
217 /* Possibly half, we need to check the ls bits */
218 orl %ebx,%ebx
219 jnz LGreater_Half_24 /* greater than half, increment needed */
220
221 orl %edx,%edx
222 jnz LGreater_Half_24 /* greater than half, increment needed */
223
224 /* Exactly half, increment only if 24th bit is 1 (round to even) */
225 testl $0x00000100,%eax
226 jz LDo_truncate_24
227
228LGreater_Half_24: /* Rounding: increment at the 24th bit */
229LDo_24_round_up:
230 andl $0xffffff00,%eax /* Truncate to 24 bits */
231 xorl %ebx,%ebx
232 movb LOST_UP,FPU_bits_lost
233 addl $0x00000100,%eax
234 jmp LCheck_Round_Overflow
235
236LCheck_truncate_24:
237 movl %eax,%ecx
238 andl $0x000000ff,%ecx
239 orl %ebx,%ecx
240 orl %edx,%ecx
241 jz L_Re_normalise /* No truncation needed */
242
243LDo_truncate_24:
244 andl $0xffffff00,%eax /* Truncate to 24 bits */
245 xorl %ebx,%ebx
246 movb LOST_DOWN,FPU_bits_lost
247 jmp L_Re_normalise
248
249
250/* Round etc to 53 bit precision */
251LRound_To_53:
252 movl %esi,%ecx
253 andl CW_RC,%ecx
254 cmpl RC_RND,%ecx
255 je LRound_nearest_53
256
257 cmpl RC_CHOP,%ecx
258 je LCheck_truncate_53
259
260 cmpl RC_UP,%ecx /* Towards +infinity */
261 je LUp_53
262
263 cmpl RC_DOWN,%ecx /* Towards -infinity */
264 je LDown_53
265
266#ifdef PARANOID
267 jmp L_bugged_round53
268#endif /* PARANOID */
269
270LUp_53:
271 cmpb SIGN_POS,PARAM5
272 jne LCheck_truncate_53 /* If negative then up==truncate */
273
274 jmp LCheck_53_round_up
275
276LDown_53:
277 cmpb SIGN_POS,PARAM5
278 je LCheck_truncate_53 /* If positive then down==truncate */
279
280LCheck_53_round_up:
281 movl %ebx,%ecx
282 andl $0x000007ff,%ecx
283 orl %edx,%ecx
284 jnz LDo_53_round_up
285 jmp L_Re_normalise
286
287LRound_nearest_53:
288 /* Do rounding of the 53rd bit if needed (nearest or even) */
289 movl %ebx,%ecx
290 andl $0x000007ff,%ecx
291 cmpl $0x00000400,%ecx
292 jc LCheck_truncate_53 /* less than half, no increment needed */
293
294 jnz LGreater_Half_53 /* greater than half, increment needed */
295
296 /* Possibly half, we need to check the ls bits */
297 orl %edx,%edx
298 jnz LGreater_Half_53 /* greater than half, increment needed */
299
300 /* Exactly half, increment only if 53rd bit is 1 (round to even) */
301 testl $0x00000800,%ebx
302 jz LTruncate_53
303
304LGreater_Half_53: /* Rounding: increment at the 53rd bit */
305LDo_53_round_up:
306 movb LOST_UP,FPU_bits_lost
307 andl $0xfffff800,%ebx /* Truncate to 53 bits */
308 addl $0x00000800,%ebx
309 adcl $0,%eax
310 jmp LCheck_Round_Overflow
311
312LCheck_truncate_53:
313 movl %ebx,%ecx
314 andl $0x000007ff,%ecx
315 orl %edx,%ecx
316 jz L_Re_normalise
317
318LTruncate_53:
319 movb LOST_DOWN,FPU_bits_lost
320 andl $0xfffff800,%ebx /* Truncate to 53 bits */
321 jmp L_Re_normalise
322
323
324/* Round etc to 64 bit precision */
325LRound_To_64:
326 movl %esi,%ecx
327 andl CW_RC,%ecx
328 cmpl RC_RND,%ecx
329 je LRound_nearest_64
330
331 cmpl RC_CHOP,%ecx
332 je LCheck_truncate_64
333
334 cmpl RC_UP,%ecx /* Towards +infinity */
335 je LUp_64
336
337 cmpl RC_DOWN,%ecx /* Towards -infinity */
338 je LDown_64
339
340#ifdef PARANOID
341 jmp L_bugged_round64
342#endif /* PARANOID */
343
344LUp_64:
345 cmpb SIGN_POS,PARAM5
346 jne LCheck_truncate_64 /* If negative then up==truncate */
347
348 orl %edx,%edx
349 jnz LDo_64_round_up
350 jmp L_Re_normalise
351
352LDown_64:
353 cmpb SIGN_POS,PARAM5
354 je LCheck_truncate_64 /* If positive then down==truncate */
355
356 orl %edx,%edx
357 jnz LDo_64_round_up
358 jmp L_Re_normalise
359
360LRound_nearest_64:
361 cmpl $0x80000000,%edx
362 jc LCheck_truncate_64
363
364 jne LDo_64_round_up
365
366 /* Now test for round-to-even */
367 testb $1,%bl
368 jz LCheck_truncate_64
369
370LDo_64_round_up:
371 movb LOST_UP,FPU_bits_lost
372 addl $1,%ebx
373 adcl $0,%eax
374
375LCheck_Round_Overflow:
376 jnc L_Re_normalise
377
378 /* Overflow, adjust the result (significand to 1.0) */
379 rcrl $1,%eax
380 rcrl $1,%ebx
381 incw EXP(%edi)
382 jmp L_Re_normalise
383
384LCheck_truncate_64:
385 orl %edx,%edx
386 jz L_Re_normalise
387
388LTruncate_64:
389 movb LOST_DOWN,FPU_bits_lost
390
391L_Re_normalise:
392 testb $0xff,FPU_denormal
393 jnz Normalise_result
394
395L_Normalised:
396 movl TAG_Valid,%edx
397
398L_deNormalised:
399 cmpb LOST_UP,FPU_bits_lost
400 je L_precision_lost_up
401
402 cmpb LOST_DOWN,FPU_bits_lost
403 je L_precision_lost_down
404
405L_no_precision_loss:
406 /* store the result */
407
408L_Store_significand:
409 movl %eax,SIGH(%edi)
410 movl %ebx,SIGL(%edi)
411
412 cmpw EXP_OVER,EXP(%edi)
413 jge L_overflow
414
415 movl %edx,%eax
416
417 /* Convert the exponent to 80x87 form. */
418 addw EXTENDED_Ebias,EXP(%edi)
419 andw $0x7fff,EXP(%edi)
420
421fpu_reg_round_signed_special_exit:
422
423 cmpb SIGN_POS,PARAM5
424 je fpu_reg_round_special_exit
425
426 orw $0x8000,EXP(%edi) /* Negative sign for the result. */
427
428fpu_reg_round_special_exit:
429
430#ifndef NON_REENTRANT_FPU
431 popl %ebx /* adjust the stack pointer */
432#endif /* NON_REENTRANT_FPU */
433
434fpu_Arith_exit:
435 popl %ebx
436 popl %edi
437 popl %esi
438 leave
439 ret
440
441
442/*
443 * Set the FPU status flags to represent precision loss due to
444 * round-up.
445 */
446L_precision_lost_up:
447 push %edx
448 push %eax
449 call set_precision_flag_up
450 popl %eax
451 popl %edx
452 jmp L_no_precision_loss
453
454/*
455 * Set the FPU status flags to represent precision loss due to
456 * truncation.
457 */
458L_precision_lost_down:
459 push %edx
460 push %eax
461 call set_precision_flag_down
462 popl %eax
463 popl %edx
464 jmp L_no_precision_loss
465
466
467/*
468 * The number is a denormal (which might get rounded up to a normal)
469 * Shift the number right the required number of bits, which will
470 * have to be undone later...
471 */
472L_Make_denorm:
473 /* The action to be taken depends upon whether the underflow
474 exception is masked */
475 testb CW_Underflow,%cl /* Underflow mask. */
476 jz Unmasked_underflow /* Do not make a denormal. */
477
478 movb DENORMAL,FPU_denormal
479
480 pushl %ecx /* Save */
481 movw EXP_UNDER+1,%cx
482 subw EXP(%edi),%cx
483
484 cmpw $64,%cx /* shrd only works for 0..31 bits */
485 jnc Denorm_shift_more_than_63
486
487 cmpw $32,%cx /* shrd only works for 0..31 bits */
488 jnc Denorm_shift_more_than_32
489
490/*
491 * We got here without jumps by assuming that the most common requirement
492 * is for a small de-normalising shift.
493 * Shift by [1..31] bits
494 */
495 addw %cx,EXP(%edi)
496 orl %edx,%edx /* extension */
497 setne %ch /* Save whether %edx is non-zero */
498 xorl %edx,%edx
499 shrd %cl,%ebx,%edx
500 shrd %cl,%eax,%ebx
501 shr %cl,%eax
502 orb %ch,%dl
503 popl %ecx
504 jmp Denorm_done
505
506/* Shift by [32..63] bits */
507Denorm_shift_more_than_32:
508 addw %cx,EXP(%edi)
509 subb $32,%cl
510 orl %edx,%edx
511 setne %ch
512 orb %ch,%bl
513 xorl %edx,%edx
514 shrd %cl,%ebx,%edx
515 shrd %cl,%eax,%ebx
516 shr %cl,%eax
517 orl %edx,%edx /* test these 32 bits */
518 setne %cl
519 orb %ch,%bl
520 orb %cl,%bl
521 movl %ebx,%edx
522 movl %eax,%ebx
523 xorl %eax,%eax
524 popl %ecx
525 jmp Denorm_done
526
527/* Shift by [64..) bits */
528Denorm_shift_more_than_63:
529 cmpw $64,%cx
530 jne Denorm_shift_more_than_64
531
532/* Exactly 64 bit shift */
533 addw %cx,EXP(%edi)
534 xorl %ecx,%ecx
535 orl %edx,%edx
536 setne %cl
537 orl %ebx,%ebx
538 setne %ch
539 orb %ch,%cl
540 orb %cl,%al
541 movl %eax,%edx
542 xorl %eax,%eax
543 xorl %ebx,%ebx
544 popl %ecx
545 jmp Denorm_done
546
547Denorm_shift_more_than_64:
548 movw EXP_UNDER+1,EXP(%edi)
549/* This is easy, %eax must be non-zero, so.. */
550 movl $1,%edx
551 xorl %eax,%eax
552 xorl %ebx,%ebx
553 popl %ecx
554 jmp Denorm_done
555
556
557Unmasked_underflow:
558 movb UNMASKED_UNDERFLOW,FPU_denormal
559 jmp Denorm_done
560
561
562/* Undo the de-normalisation. */
563Normalise_result:
564 cmpb UNMASKED_UNDERFLOW,FPU_denormal
565 je Signal_underflow
566
567/* The number must be a denormal if we got here. */
568#ifdef PARANOID
569 /* But check it... just in case. */
570 cmpw EXP_UNDER+1,EXP(%edi)
571 jne L_norm_bugged
572#endif /* PARANOID */
573
574#ifdef PECULIAR_486
575 /*
576 * This implements a special feature of 80486 behaviour.
577 * Underflow will be signalled even if the number is
578 * not a denormal after rounding.
579 * This difference occurs only for masked underflow, and not
580 * in the unmasked case.
581 * Actual 80486 behaviour differs from this in some circumstances.
582 */
583 orl %eax,%eax /* ms bits */
584 js LPseudoDenormal /* Will be masked underflow */
585#else
586 orl %eax,%eax /* ms bits */
587 js L_Normalised /* No longer a denormal */
588#endif /* PECULIAR_486 */
589
590 jnz LDenormal_adj_exponent
591
592 orl %ebx,%ebx
593 jz L_underflow_to_zero /* The contents are zero */
594
595LDenormal_adj_exponent:
596 decw EXP(%edi)
597
598LPseudoDenormal:
599 testb $0xff,FPU_bits_lost /* bits lost == underflow */
600 movl TAG_Special,%edx
601 jz L_deNormalised
602
603 /* There must be a masked underflow */
604 push %eax
605 pushl EX_Underflow
606 call EXCEPTION
607 popl %eax
608 popl %eax
609 movl TAG_Special,%edx
610 jmp L_deNormalised
611
612
613/*
614 * The operations resulted in a number too small to represent.
615 * Masked response.
616 */
617L_underflow_to_zero:
618 push %eax
619 call set_precision_flag_down
620 popl %eax
621
622 push %eax
623 pushl EX_Underflow
624 call EXCEPTION
625 popl %eax
626 popl %eax
627
628/* Reduce the exponent to EXP_UNDER */
629 movw EXP_UNDER,EXP(%edi)
630 movl TAG_Zero,%edx
631 jmp L_Store_significand
632
633
634/* The operations resulted in a number too large to represent. */
635L_overflow:
636 addw EXTENDED_Ebias,EXP(%edi) /* Set for unmasked response. */
637 push %edi
638 call arith_overflow
639 pop %edi
640 jmp fpu_reg_round_signed_special_exit
641
642
643Signal_underflow:
644 /* The number may have been changed to a non-denormal */
645 /* by the rounding operations. */
646 cmpw EXP_UNDER,EXP(%edi)
647 jle Do_unmasked_underflow
648
649 jmp L_Normalised
650
651Do_unmasked_underflow:
652 /* Increase the exponent by the magic number */
653 addw $(3*(1<<13)),EXP(%edi)
654 push %eax
655 pushl EX_Underflow
656 call EXCEPTION
657 popl %eax
658 popl %eax
659 jmp L_Normalised
660
661
662#ifdef PARANOID
663#ifdef PECULIAR_486
664L_bugged_denorm_486:
665 pushl EX_INTERNAL|0x236
666 call EXCEPTION
667 popl %ebx
668 jmp L_exception_exit
669#else
670L_bugged_denorm:
671 pushl EX_INTERNAL|0x230
672 call EXCEPTION
673 popl %ebx
674 jmp L_exception_exit
675#endif /* PECULIAR_486 */
676
677L_bugged_round24:
678 pushl EX_INTERNAL|0x231
679 call EXCEPTION
680 popl %ebx
681 jmp L_exception_exit
682
683L_bugged_round53:
684 pushl EX_INTERNAL|0x232
685 call EXCEPTION
686 popl %ebx
687 jmp L_exception_exit
688
689L_bugged_round64:
690 pushl EX_INTERNAL|0x233
691 call EXCEPTION
692 popl %ebx
693 jmp L_exception_exit
694
695L_norm_bugged:
696 pushl EX_INTERNAL|0x234
697 call EXCEPTION
698 popl %ebx
699 jmp L_exception_exit
700
701L_entry_bugged:
702 pushl EX_INTERNAL|0x235
703 call EXCEPTION
704 popl %ebx
705L_exception_exit:
706 mov $-1,%eax
707 jmp fpu_reg_round_special_exit
708#endif /* PARANOID */
diff --git a/arch/i386/math-emu/reg_u_add.S b/arch/i386/math-emu/reg_u_add.S
new file mode 100644
index 000000000000..47c4c2434d85
--- /dev/null
+++ b/arch/i386/math-emu/reg_u_add.S
@@ -0,0 +1,167 @@
1 .file "reg_u_add.S"
2/*---------------------------------------------------------------------------+
3 | reg_u_add.S |
4 | |
5 | Add two valid (TAG_Valid) FPU_REG numbers, of the same sign, and put the |
6 | result in a destination FPU_REG. |
7 | |
8 | Copyright (C) 1992,1993,1995,1997 |
9 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
10 | E-mail billm@suburbia.net |
11 | |
12 | Call from C as: |
13 | int FPU_u_add(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ, |
14 | int control_w) |
15 | Return value is the tag of the answer, or-ed with FPU_Exception if |
16 | one was raised, or -1 on internal error. |
17 | |
18 +---------------------------------------------------------------------------*/
19
20/*
21 | Kernel addition routine FPU_u_add(reg *arg1, reg *arg2, reg *answ).
22 | Takes two valid reg f.p. numbers (TAG_Valid), which are
23 | treated as unsigned numbers,
24 | and returns their sum as a TAG_Valid or TAG_Special f.p. number.
25 | The returned number is normalized.
26 | Basic checks are performed if PARANOID is defined.
27 */
28
29#include "exception.h"
30#include "fpu_emu.h"
31#include "control_w.h"
32
33.text
34ENTRY(FPU_u_add)
35 pushl %ebp
36 movl %esp,%ebp
37 pushl %esi
38 pushl %edi
39 pushl %ebx
40
41 movl PARAM1,%esi /* source 1 */
42 movl PARAM2,%edi /* source 2 */
43
44 movl PARAM6,%ecx
45 movl %ecx,%edx
46 subl PARAM7,%ecx /* exp1 - exp2 */
47 jge L_arg1_larger
48
49 /* num1 is smaller */
50 movl SIGL(%esi),%ebx
51 movl SIGH(%esi),%eax
52
53 movl %edi,%esi
54 movl PARAM7,%edx
55 negw %cx
56 jmp L_accum_loaded
57
58L_arg1_larger:
59 /* num1 has larger or equal exponent */
60 movl SIGL(%edi),%ebx
61 movl SIGH(%edi),%eax
62
63L_accum_loaded:
64 movl PARAM3,%edi /* destination */
65 movw %dx,EXP(%edi) /* Copy exponent to destination */
66
67 xorl %edx,%edx /* clear the extension */
68
69#ifdef PARANOID
70 testl $0x80000000,%eax
71 je L_bugged
72
73 testl $0x80000000,SIGH(%esi)
74 je L_bugged
75#endif /* PARANOID */
76
77/* The number to be shifted is in %eax:%ebx:%edx */
78 cmpw $32,%cx /* shrd only works for 0..31 bits */
79 jnc L_more_than_31
80
81/* less than 32 bits */
82 shrd %cl,%ebx,%edx
83 shrd %cl,%eax,%ebx
84 shr %cl,%eax
85 jmp L_shift_done
86
87L_more_than_31:
88 cmpw $64,%cx
89 jnc L_more_than_63
90
91 subb $32,%cl
92 jz L_exactly_32
93
94 shrd %cl,%eax,%edx
95 shr %cl,%eax
96 orl %ebx,%ebx
97 jz L_more_31_no_low /* none of the lowest bits is set */
98
99 orl $1,%edx /* record the fact in the extension */
100
101L_more_31_no_low:
102 movl %eax,%ebx
103 xorl %eax,%eax
104 jmp L_shift_done
105
106L_exactly_32:
107 movl %ebx,%edx
108 movl %eax,%ebx
109 xorl %eax,%eax
110 jmp L_shift_done
111
112L_more_than_63:
113 cmpw $65,%cx
114 jnc L_more_than_64
115
116 movl %eax,%edx
117 orl %ebx,%ebx
118 jz L_more_63_no_low
119
120 orl $1,%edx
121 jmp L_more_63_no_low
122
123L_more_than_64:
124 movl $1,%edx /* The shifted nr always at least one '1' */
125
126L_more_63_no_low:
127 xorl %ebx,%ebx
128 xorl %eax,%eax
129
130L_shift_done:
131 /* Now do the addition */
132 addl SIGL(%esi),%ebx
133 adcl SIGH(%esi),%eax
134 jnc L_round_the_result
135
136 /* Overflow, adjust the result */
137 rcrl $1,%eax
138 rcrl $1,%ebx
139 rcrl $1,%edx
140 jnc L_no_bit_lost
141
142 orl $1,%edx
143
144L_no_bit_lost:
145 incw EXP(%edi)
146
147L_round_the_result:
148 jmp fpu_reg_round /* Round the result */
149
150
151
152#ifdef PARANOID
153/* If we ever get here then we have problems! */
154L_bugged:
155 pushl EX_INTERNAL|0x201
156 call EXCEPTION
157 pop %ebx
158 movl $-1,%eax
159 jmp L_exit
160
161L_exit:
162 popl %ebx
163 popl %edi
164 popl %esi
165 leave
166 ret
167#endif /* PARANOID */
diff --git a/arch/i386/math-emu/reg_u_div.S b/arch/i386/math-emu/reg_u_div.S
new file mode 100644
index 000000000000..cc00654b6f9a
--- /dev/null
+++ b/arch/i386/math-emu/reg_u_div.S
@@ -0,0 +1,471 @@
1 .file "reg_u_div.S"
2/*---------------------------------------------------------------------------+
3 | reg_u_div.S |
4 | |
5 | Divide one FPU_REG by another and put the result in a destination FPU_REG.|
6 | |
7 | Copyright (C) 1992,1993,1995,1997 |
8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
9 | E-mail billm@suburbia.net |
10 | |
11 | |
12 +---------------------------------------------------------------------------*/
13
14/*---------------------------------------------------------------------------+
15 | Call from C as: |
16 | int FPU_u_div(FPU_REG *a, FPU_REG *b, FPU_REG *dest, |
17 | unsigned int control_word, char *sign) |
18 | |
19 | Does not compute the destination exponent, but does adjust it. |
20 | |
21 | Return value is the tag of the answer, or-ed with FPU_Exception if |
22 | one was raised, or -1 on internal error. |
23 +---------------------------------------------------------------------------*/
24
25#include "exception.h"
26#include "fpu_emu.h"
27#include "control_w.h"
28
29
30/* #define dSIGL(x) (x) */
31/* #define dSIGH(x) 4(x) */
32
33
34#ifndef NON_REENTRANT_FPU
35/*
36 Local storage on the stack:
37 Result: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
38 Overflow flag: ovfl_flag
39 */
40#define FPU_accum_3 -4(%ebp)
41#define FPU_accum_2 -8(%ebp)
42#define FPU_accum_1 -12(%ebp)
43#define FPU_accum_0 -16(%ebp)
44#define FPU_result_1 -20(%ebp)
45#define FPU_result_2 -24(%ebp)
46#define FPU_ovfl_flag -28(%ebp)
47
48#else
49.data
50/*
51 Local storage in a static area:
52 Result: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
53 Overflow flag: ovfl_flag
54 */
55 .align 4,0
56FPU_accum_3:
57 .long 0
58FPU_accum_2:
59 .long 0
60FPU_accum_1:
61 .long 0
62FPU_accum_0:
63 .long 0
64FPU_result_1:
65 .long 0
66FPU_result_2:
67 .long 0
68FPU_ovfl_flag:
69 .byte 0
70#endif /* NON_REENTRANT_FPU */
71
72#define REGA PARAM1
73#define REGB PARAM2
74#define DEST PARAM3
75
76.text
77ENTRY(FPU_u_div)
78 pushl %ebp
79 movl %esp,%ebp
80#ifndef NON_REENTRANT_FPU
81 subl $28,%esp
82#endif /* NON_REENTRANT_FPU */
83
84 pushl %esi
85 pushl %edi
86 pushl %ebx
87
88 movl REGA,%esi
89 movl REGB,%ebx
90 movl DEST,%edi
91
92 movswl EXP(%esi),%edx
93 movswl EXP(%ebx),%eax
94 subl %eax,%edx
95 addl EXP_BIAS,%edx
96
97 /* A denormal and a large number can cause an exponent underflow */
98 cmpl EXP_WAY_UNDER,%edx
99 jg xExp_not_underflow
100
101 /* Set to a really low value allow correct handling */
102 movl EXP_WAY_UNDER,%edx
103
104xExp_not_underflow:
105
106 movw %dx,EXP(%edi)
107
108#ifdef PARANOID
109/* testl $0x80000000, SIGH(%esi) // Dividend */
110/* je L_bugged */
111 testl $0x80000000, SIGH(%ebx) /* Divisor */
112 je L_bugged
113#endif /* PARANOID */
114
115/* Check if the divisor can be treated as having just 32 bits */
116 cmpl $0,SIGL(%ebx)
117 jnz L_Full_Division /* Can't do a quick divide */
118
119/* We should be able to zip through the division here */
120 movl SIGH(%ebx),%ecx /* The divisor */
121 movl SIGH(%esi),%edx /* Dividend */
122 movl SIGL(%esi),%eax /* Dividend */
123
124 cmpl %ecx,%edx
125 setaeb FPU_ovfl_flag /* Keep a record */
126 jb L_no_adjust
127
128 subl %ecx,%edx /* Prevent the overflow */
129
130L_no_adjust:
131 /* Divide the 64 bit number by the 32 bit denominator */
132 divl %ecx
133 movl %eax,FPU_result_2
134
135 /* Work on the remainder of the first division */
136 xorl %eax,%eax
137 divl %ecx
138 movl %eax,FPU_result_1
139
140 /* Work on the remainder of the 64 bit division */
141 xorl %eax,%eax
142 divl %ecx
143
144 testb $255,FPU_ovfl_flag /* was the num > denom ? */
145 je L_no_overflow
146
147 /* Do the shifting here */
148 /* increase the exponent */
149 incw EXP(%edi)
150
151 /* shift the mantissa right one bit */
152 stc /* To set the ms bit */
153 rcrl FPU_result_2
154 rcrl FPU_result_1
155 rcrl %eax
156
157L_no_overflow:
158 jmp LRound_precision /* Do the rounding as required */
159
160
161/*---------------------------------------------------------------------------+
162 | Divide: Return arg1/arg2 to arg3. |
163 | |
164 | This routine does not use the exponents of arg1 and arg2, but does |
165 | adjust the exponent of arg3. |
166 | |
167 | The maximum returned value is (ignoring exponents) |
168 | .ffffffff ffffffff |
169 | ------------------ = 1.ffffffff fffffffe |
170 | .80000000 00000000 |
171 | and the minimum is |
172 | .80000000 00000000 |
173 | ------------------ = .80000000 00000001 (rounded) |
174 | .ffffffff ffffffff |
175 | |
176 +---------------------------------------------------------------------------*/
177
178
179L_Full_Division:
180 /* Save extended dividend in local register */
181 movl SIGL(%esi),%eax
182 movl %eax,FPU_accum_2
183 movl SIGH(%esi),%eax
184 movl %eax,FPU_accum_3
185 xorl %eax,%eax
186 movl %eax,FPU_accum_1 /* zero the extension */
187 movl %eax,FPU_accum_0 /* zero the extension */
188
189 movl SIGL(%esi),%eax /* Get the current num */
190 movl SIGH(%esi),%edx
191
192/*----------------------------------------------------------------------*/
193/* Initialization done.
194 Do the first 32 bits. */
195
196 movb $0,FPU_ovfl_flag
197 cmpl SIGH(%ebx),%edx /* Test for imminent overflow */
198 jb LLess_than_1
199 ja LGreater_than_1
200
201 cmpl SIGL(%ebx),%eax
202 jb LLess_than_1
203
204LGreater_than_1:
205/* The dividend is greater or equal, would cause overflow */
206 setaeb FPU_ovfl_flag /* Keep a record */
207
208 subl SIGL(%ebx),%eax
209 sbbl SIGH(%ebx),%edx /* Prevent the overflow */
210 movl %eax,FPU_accum_2
211 movl %edx,FPU_accum_3
212
213LLess_than_1:
214/* At this point, we have a dividend < divisor, with a record of
215 adjustment in FPU_ovfl_flag */
216
217 /* We will divide by a number which is too large */
218 movl SIGH(%ebx),%ecx
219 addl $1,%ecx
220 jnc LFirst_div_not_1
221
222 /* here we need to divide by 100000000h,
223 i.e., no division at all.. */
224 mov %edx,%eax
225 jmp LFirst_div_done
226
227LFirst_div_not_1:
228 divl %ecx /* Divide the numerator by the augmented
229 denom ms dw */
230
231LFirst_div_done:
232 movl %eax,FPU_result_2 /* Put the result in the answer */
233
234 mull SIGH(%ebx) /* mul by the ms dw of the denom */
235
236 subl %eax,FPU_accum_2 /* Subtract from the num local reg */
237 sbbl %edx,FPU_accum_3
238
239 movl FPU_result_2,%eax /* Get the result back */
240 mull SIGL(%ebx) /* now mul the ls dw of the denom */
241
242 subl %eax,FPU_accum_1 /* Subtract from the num local reg */
243 sbbl %edx,FPU_accum_2
244 sbbl $0,FPU_accum_3
245 je LDo_2nd_32_bits /* Must check for non-zero result here */
246
247#ifdef PARANOID
248 jb L_bugged_1
249#endif /* PARANOID */
250
251 /* need to subtract another once of the denom */
252 incl FPU_result_2 /* Correct the answer */
253
254 movl SIGL(%ebx),%eax
255 movl SIGH(%ebx),%edx
256 subl %eax,FPU_accum_1 /* Subtract from the num local reg */
257 sbbl %edx,FPU_accum_2
258
259#ifdef PARANOID
260 sbbl $0,FPU_accum_3
261 jne L_bugged_1 /* Must check for non-zero result here */
262#endif /* PARANOID */
263
264/*----------------------------------------------------------------------*/
265/* Half of the main problem is done, there is just a reduced numerator
266 to handle now.
267 Work with the second 32 bits, FPU_accum_0 not used from now on */
268LDo_2nd_32_bits:
269 movl FPU_accum_2,%edx /* get the reduced num */
270 movl FPU_accum_1,%eax
271
272 /* need to check for possible subsequent overflow */
273 cmpl SIGH(%ebx),%edx
274 jb LDo_2nd_div
275 ja LPrevent_2nd_overflow
276
277 cmpl SIGL(%ebx),%eax
278 jb LDo_2nd_div
279
280LPrevent_2nd_overflow:
281/* The numerator is greater or equal, would cause overflow */
282 /* prevent overflow */
283 subl SIGL(%ebx),%eax
284 sbbl SIGH(%ebx),%edx
285 movl %edx,FPU_accum_2
286 movl %eax,FPU_accum_1
287
288 incl FPU_result_2 /* Reflect the subtraction in the answer */
289
290#ifdef PARANOID
291 je L_bugged_2 /* Can't bump the result to 1.0 */
292#endif /* PARANOID */
293
294LDo_2nd_div:
295 cmpl $0,%ecx /* augmented denom msw */
296 jnz LSecond_div_not_1
297
298 /* %ecx == 0, we are dividing by 1.0 */
299 mov %edx,%eax
300 jmp LSecond_div_done
301
302LSecond_div_not_1:
303 divl %ecx /* Divide the numerator by the denom ms dw */
304
305LSecond_div_done:
306 movl %eax,FPU_result_1 /* Put the result in the answer */
307
308 mull SIGH(%ebx) /* mul by the ms dw of the denom */
309
310 subl %eax,FPU_accum_1 /* Subtract from the num local reg */
311 sbbl %edx,FPU_accum_2
312
313#ifdef PARANOID
314 jc L_bugged_2
315#endif /* PARANOID */
316
317 movl FPU_result_1,%eax /* Get the result back */
318 mull SIGL(%ebx) /* now mul the ls dw of the denom */
319
320 subl %eax,FPU_accum_0 /* Subtract from the num local reg */
321 sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */
322 sbbl $0,FPU_accum_2
323
324#ifdef PARANOID
325 jc L_bugged_2
326#endif /* PARANOID */
327
328 jz LDo_3rd_32_bits
329
330#ifdef PARANOID
331 cmpl $1,FPU_accum_2
332 jne L_bugged_2
333#endif /* PARANOID */
334
335 /* need to subtract another once of the denom */
336 movl SIGL(%ebx),%eax
337 movl SIGH(%ebx),%edx
338 subl %eax,FPU_accum_0 /* Subtract from the num local reg */
339 sbbl %edx,FPU_accum_1
340 sbbl $0,FPU_accum_2
341
342#ifdef PARANOID
343 jc L_bugged_2
344 jne L_bugged_2
345#endif /* PARANOID */
346
347 addl $1,FPU_result_1 /* Correct the answer */
348 adcl $0,FPU_result_2
349
350#ifdef PARANOID
351 jc L_bugged_2 /* Must check for non-zero result here */
352#endif /* PARANOID */
353
354/*----------------------------------------------------------------------*/
355/* The division is essentially finished here, we just need to perform
356 tidying operations.
357 Deal with the 3rd 32 bits */
358LDo_3rd_32_bits:
359 movl FPU_accum_1,%edx /* get the reduced num */
360 movl FPU_accum_0,%eax
361
362 /* need to check for possible subsequent overflow */
363 cmpl SIGH(%ebx),%edx /* denom */
364 jb LRound_prep
365 ja LPrevent_3rd_overflow
366
367 cmpl SIGL(%ebx),%eax /* denom */
368 jb LRound_prep
369
370LPrevent_3rd_overflow:
371 /* prevent overflow */
372 subl SIGL(%ebx),%eax
373 sbbl SIGH(%ebx),%edx
374 movl %edx,FPU_accum_1
375 movl %eax,FPU_accum_0
376
377 addl $1,FPU_result_1 /* Reflect the subtraction in the answer */
378 adcl $0,FPU_result_2
379 jne LRound_prep
380 jnc LRound_prep
381
382 /* This is a tricky spot, there is an overflow of the answer */
383 movb $255,FPU_ovfl_flag /* Overflow -> 1.000 */
384
385LRound_prep:
386/*
387 * Prepare for rounding.
388 * To test for rounding, we just need to compare 2*accum with the
389 * denom.
390 */
391 movl FPU_accum_0,%ecx
392 movl FPU_accum_1,%edx
393 movl %ecx,%eax
394 orl %edx,%eax
395 jz LRound_ovfl /* The accumulator contains zero. */
396
397 /* Multiply by 2 */
398 clc
399 rcll $1,%ecx
400 rcll $1,%edx
401 jc LRound_large /* No need to compare, denom smaller */
402
403 subl SIGL(%ebx),%ecx
404 sbbl SIGH(%ebx),%edx
405 jnc LRound_not_small
406
407 movl $0x70000000,%eax /* Denom was larger */
408 jmp LRound_ovfl
409
410LRound_not_small:
411 jnz LRound_large
412
413 movl $0x80000000,%eax /* Remainder was exactly 1/2 denom */
414 jmp LRound_ovfl
415
416LRound_large:
417 movl $0xff000000,%eax /* Denom was smaller */
418
419LRound_ovfl:
420/* We are now ready to deal with rounding, but first we must get
421 the bits properly aligned */
422 testb $255,FPU_ovfl_flag /* was the num > denom ? */
423 je LRound_precision
424
425 incw EXP(%edi)
426
427 /* shift the mantissa right one bit */
428 stc /* Will set the ms bit */
429 rcrl FPU_result_2
430 rcrl FPU_result_1
431 rcrl %eax
432
433/* Round the result as required */
434LRound_precision:
435 decw EXP(%edi) /* binary point between 1st & 2nd bits */
436
437 movl %eax,%edx
438 movl FPU_result_1,%ebx
439 movl FPU_result_2,%eax
440 jmp fpu_reg_round
441
442
443#ifdef PARANOID
444/* The logic is wrong if we got here */
445L_bugged:
446 pushl EX_INTERNAL|0x202
447 call EXCEPTION
448 pop %ebx
449 jmp L_exit
450
451L_bugged_1:
452 pushl EX_INTERNAL|0x203
453 call EXCEPTION
454 pop %ebx
455 jmp L_exit
456
457L_bugged_2:
458 pushl EX_INTERNAL|0x204
459 call EXCEPTION
460 pop %ebx
461 jmp L_exit
462
463L_exit:
464 movl $-1,%eax
465 popl %ebx
466 popl %edi
467 popl %esi
468
469 leave
470 ret
471#endif /* PARANOID */
diff --git a/arch/i386/math-emu/reg_u_mul.S b/arch/i386/math-emu/reg_u_mul.S
new file mode 100644
index 000000000000..973f12af97df
--- /dev/null
+++ b/arch/i386/math-emu/reg_u_mul.S
@@ -0,0 +1,148 @@
1 .file "reg_u_mul.S"
2/*---------------------------------------------------------------------------+
3 | reg_u_mul.S |
4 | |
5 | Core multiplication routine |
6 | |
7 | Copyright (C) 1992,1993,1995,1997 |
8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
9 | E-mail billm@suburbia.net |
10 | |
11 | |
12 +---------------------------------------------------------------------------*/
13
14/*---------------------------------------------------------------------------+
15 | Basic multiplication routine. |
16 | Does not check the resulting exponent for overflow/underflow |
17 | |
18 | FPU_u_mul(FPU_REG *a, FPU_REG *b, FPU_REG *c, unsigned int cw); |
19 | |
20 | Internal working is at approx 128 bits. |
21 | Result is rounded to nearest 53 or 64 bits, using "nearest or even". |
22 +---------------------------------------------------------------------------*/
23
24#include "exception.h"
25#include "fpu_emu.h"
26#include "control_w.h"
27
28
29
30#ifndef NON_REENTRANT_FPU
31/* Local storage on the stack: */
32#define FPU_accum_0 -4(%ebp) /* ms word */
33#define FPU_accum_1 -8(%ebp)
34
35#else
36/* Local storage in a static area: */
37.data
38 .align 4,0
39FPU_accum_0:
40 .long 0
41FPU_accum_1:
42 .long 0
43#endif /* NON_REENTRANT_FPU */
44
45
46.text
47ENTRY(FPU_u_mul)
48 pushl %ebp
49 movl %esp,%ebp
50#ifndef NON_REENTRANT_FPU
51 subl $8,%esp
52#endif /* NON_REENTRANT_FPU */
53
54 pushl %esi
55 pushl %edi
56 pushl %ebx
57
58 movl PARAM1,%esi
59 movl PARAM2,%edi
60
61#ifdef PARANOID
62 testl $0x80000000,SIGH(%esi)
63 jz L_bugged
64 testl $0x80000000,SIGH(%edi)
65 jz L_bugged
66#endif /* PARANOID */
67
68 xorl %ecx,%ecx
69 xorl %ebx,%ebx
70
71 movl SIGL(%esi),%eax
72 mull SIGL(%edi)
73 movl %eax,FPU_accum_0
74 movl %edx,FPU_accum_1
75
76 movl SIGL(%esi),%eax
77 mull SIGH(%edi)
78 addl %eax,FPU_accum_1
79 adcl %edx,%ebx
80/* adcl $0,%ecx // overflow here is not possible */
81
82 movl SIGH(%esi),%eax
83 mull SIGL(%edi)
84 addl %eax,FPU_accum_1
85 adcl %edx,%ebx
86 adcl $0,%ecx
87
88 movl SIGH(%esi),%eax
89 mull SIGH(%edi)
90 addl %eax,%ebx
91 adcl %edx,%ecx
92
93 /* Get the sum of the exponents. */
94 movl PARAM6,%eax
95 subl EXP_BIAS-1,%eax
96
97 /* Two denormals can cause an exponent underflow */
98 cmpl EXP_WAY_UNDER,%eax
99 jg Exp_not_underflow
100
101 /* Set to a really low value allow correct handling */
102 movl EXP_WAY_UNDER,%eax
103
104Exp_not_underflow:
105
106/* Have now finished with the sources */
107 movl PARAM3,%edi /* Point to the destination */
108 movw %ax,EXP(%edi)
109
110/* Now make sure that the result is normalized */
111 testl $0x80000000,%ecx
112 jnz LResult_Normalised
113
114 /* Normalize by shifting left one bit */
115 shll $1,FPU_accum_0
116 rcll $1,FPU_accum_1
117 rcll $1,%ebx
118 rcll $1,%ecx
119 decw EXP(%edi)
120
121LResult_Normalised:
122 movl FPU_accum_0,%eax
123 movl FPU_accum_1,%edx
124 orl %eax,%eax
125 jz L_extent_zero
126
127 orl $1,%edx
128
129L_extent_zero:
130 movl %ecx,%eax
131 jmp fpu_reg_round
132
133
134#ifdef PARANOID
135L_bugged:
136 pushl EX_INTERNAL|0x205
137 call EXCEPTION
138 pop %ebx
139 jmp L_exit
140
141L_exit:
142 popl %ebx
143 popl %edi
144 popl %esi
145 leave
146 ret
147#endif /* PARANOID */
148
diff --git a/arch/i386/math-emu/reg_u_sub.S b/arch/i386/math-emu/reg_u_sub.S
new file mode 100644
index 000000000000..1b6c24801d22
--- /dev/null
+++ b/arch/i386/math-emu/reg_u_sub.S
@@ -0,0 +1,272 @@
1 .file "reg_u_sub.S"
2/*---------------------------------------------------------------------------+
3 | reg_u_sub.S |
4 | |
5 | Core floating point subtraction routine. |
6 | |
7 | Copyright (C) 1992,1993,1995,1997 |
8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
9 | E-mail billm@suburbia.net |
10 | |
11 | Call from C as: |
12 | int FPU_u_sub(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ, |
13 | int control_w) |
14 | Return value is the tag of the answer, or-ed with FPU_Exception if |
15 | one was raised, or -1 on internal error. |
16 | |
17 +---------------------------------------------------------------------------*/
18
19/*
20 | Kernel subtraction routine FPU_u_sub(reg *arg1, reg *arg2, reg *answ).
21 | Takes two valid reg f.p. numbers (TAG_Valid), which are
22 | treated as unsigned numbers,
23 | and returns their difference as a TAG_Valid or TAG_Zero f.p.
24 | number.
25 | The first number (arg1) must be the larger.
26 | The returned number is normalized.
27 | Basic checks are performed if PARANOID is defined.
28 */
29
30#include "exception.h"
31#include "fpu_emu.h"
32#include "control_w.h"
33
34.text
35ENTRY(FPU_u_sub)
36 pushl %ebp
37 movl %esp,%ebp
38 pushl %esi
39 pushl %edi
40 pushl %ebx
41
42 movl PARAM1,%esi /* source 1 */
43 movl PARAM2,%edi /* source 2 */
44
45 movl PARAM6,%ecx
46 subl PARAM7,%ecx /* exp1 - exp2 */
47
48#ifdef PARANOID
49 /* source 2 is always smaller than source 1 */
50 js L_bugged_1
51
52 testl $0x80000000,SIGH(%edi) /* The args are assumed to be be normalized */
53 je L_bugged_2
54
55 testl $0x80000000,SIGH(%esi)
56 je L_bugged_2
57#endif /* PARANOID */
58
59/*--------------------------------------+
60 | Form a register holding the |
61 | smaller number |
62 +--------------------------------------*/
63 movl SIGH(%edi),%eax /* register ms word */
64 movl SIGL(%edi),%ebx /* register ls word */
65
66 movl PARAM3,%edi /* destination */
67 movl PARAM6,%edx
68 movw %dx,EXP(%edi) /* Copy exponent to destination */
69
70 xorl %edx,%edx /* register extension */
71
72/*--------------------------------------+
73 | Shift the temporary register |
74 | right the required number of |
75 | places. |
76 +--------------------------------------*/
77
78 cmpw $32,%cx /* shrd only works for 0..31 bits */
79 jnc L_more_than_31
80
81/* less than 32 bits */
82 shrd %cl,%ebx,%edx
83 shrd %cl,%eax,%ebx
84 shr %cl,%eax
85 jmp L_shift_done
86
87L_more_than_31:
88 cmpw $64,%cx
89 jnc L_more_than_63
90
91 subb $32,%cl
92 jz L_exactly_32
93
94 shrd %cl,%eax,%edx
95 shr %cl,%eax
96 orl %ebx,%ebx
97 jz L_more_31_no_low /* none of the lowest bits is set */
98
99 orl $1,%edx /* record the fact in the extension */
100
101L_more_31_no_low:
102 movl %eax,%ebx
103 xorl %eax,%eax
104 jmp L_shift_done
105
106L_exactly_32:
107 movl %ebx,%edx
108 movl %eax,%ebx
109 xorl %eax,%eax
110 jmp L_shift_done
111
112L_more_than_63:
113 cmpw $65,%cx
114 jnc L_more_than_64
115
116 /* Shift right by 64 bits */
117 movl %eax,%edx
118 orl %ebx,%ebx
119 jz L_more_63_no_low
120
121 orl $1,%edx
122 jmp L_more_63_no_low
123
124L_more_than_64:
125 jne L_more_than_65
126
127 /* Shift right by 65 bits */
128 /* Carry is clear if we get here */
129 movl %eax,%edx
130 rcrl %edx
131 jnc L_shift_65_nc
132
133 orl $1,%edx
134 jmp L_more_63_no_low
135
136L_shift_65_nc:
137 orl %ebx,%ebx
138 jz L_more_63_no_low
139
140 orl $1,%edx
141 jmp L_more_63_no_low
142
143L_more_than_65:
144 movl $1,%edx /* The shifted nr always at least one '1' */
145
146L_more_63_no_low:
147 xorl %ebx,%ebx
148 xorl %eax,%eax
149
150L_shift_done:
151L_subtr:
152/*------------------------------+
153 | Do the subtraction |
154 +------------------------------*/
155 xorl %ecx,%ecx
156 subl %edx,%ecx
157 movl %ecx,%edx
158 movl SIGL(%esi),%ecx
159 sbbl %ebx,%ecx
160 movl %ecx,%ebx
161 movl SIGH(%esi),%ecx
162 sbbl %eax,%ecx
163 movl %ecx,%eax
164
165#ifdef PARANOID
166 /* We can never get a borrow */
167 jc L_bugged
168#endif /* PARANOID */
169
170/*--------------------------------------+
171 | Normalize the result |
172 +--------------------------------------*/
173 testl $0x80000000,%eax
174 jnz L_round /* no shifting needed */
175
176 orl %eax,%eax
177 jnz L_shift_1 /* shift left 1 - 31 bits */
178
179 orl %ebx,%ebx
180 jnz L_shift_32 /* shift left 32 - 63 bits */
181
182/*
183 * A rare case, the only one which is non-zero if we got here
184 * is: 1000000 .... 0000
185 * -0111111 .... 1111 1
186 * --------------------
187 * 0000000 .... 0000 1
188 */
189
190 cmpl $0x80000000,%edx
191 jnz L_must_be_zero
192
193 /* Shift left 64 bits */
194 subw $64,EXP(%edi)
195 xchg %edx,%eax
196 jmp fpu_reg_round
197
198L_must_be_zero:
199#ifdef PARANOID
200 orl %edx,%edx
201 jnz L_bugged_3
202#endif /* PARANOID */
203
204 /* The result is zero */
205 movw $0,EXP(%edi) /* exponent */
206 movl $0,SIGL(%edi)
207 movl $0,SIGH(%edi)
208 movl TAG_Zero,%eax
209 jmp L_exit
210
211L_shift_32:
212 movl %ebx,%eax
213 movl %edx,%ebx
214 movl $0,%edx
215 subw $32,EXP(%edi) /* Can get underflow here */
216
217/* We need to shift left by 1 - 31 bits */
218L_shift_1:
219 bsrl %eax,%ecx /* get the required shift in %ecx */
220 subl $31,%ecx
221 negl %ecx
222 shld %cl,%ebx,%eax
223 shld %cl,%edx,%ebx
224 shl %cl,%edx
225 subw %cx,EXP(%edi) /* Can get underflow here */
226
227L_round:
228 jmp fpu_reg_round /* Round the result */
229
230
231#ifdef PARANOID
232L_bugged_1:
233 pushl EX_INTERNAL|0x206
234 call EXCEPTION
235 pop %ebx
236 jmp L_error_exit
237
238L_bugged_2:
239 pushl EX_INTERNAL|0x209
240 call EXCEPTION
241 pop %ebx
242 jmp L_error_exit
243
244L_bugged_3:
245 pushl EX_INTERNAL|0x210
246 call EXCEPTION
247 pop %ebx
248 jmp L_error_exit
249
250L_bugged_4:
251 pushl EX_INTERNAL|0x211
252 call EXCEPTION
253 pop %ebx
254 jmp L_error_exit
255
256L_bugged:
257 pushl EX_INTERNAL|0x212
258 call EXCEPTION
259 pop %ebx
260 jmp L_error_exit
261
262L_error_exit:
263 movl $-1,%eax
264
265#endif /* PARANOID */
266
267L_exit:
268 popl %ebx
269 popl %edi
270 popl %esi
271 leave
272 ret
diff --git a/arch/i386/math-emu/round_Xsig.S b/arch/i386/math-emu/round_Xsig.S
new file mode 100644
index 000000000000..bbe0e87718e4
--- /dev/null
+++ b/arch/i386/math-emu/round_Xsig.S
@@ -0,0 +1,141 @@
1/*---------------------------------------------------------------------------+
2 | round_Xsig.S |
3 | |
4 | Copyright (C) 1992,1993,1994,1995 |
5 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
6 | Australia. E-mail billm@jacobi.maths.monash.edu.au |
7 | |
8 | Normalize and round a 12 byte quantity. |
9 | Call from C as: |
10 | int round_Xsig(Xsig *n) |
11 | |
12 | Normalize a 12 byte quantity. |
13 | Call from C as: |
14 | int norm_Xsig(Xsig *n) |
15 | |
16 | Each function returns the size of the shift (nr of bits). |
17 | |
18 +---------------------------------------------------------------------------*/
19 .file "round_Xsig.S"
20
21#include "fpu_emu.h"
22
23
24.text
25ENTRY(round_Xsig)
26 pushl %ebp
27 movl %esp,%ebp
28 pushl %ebx /* Reserve some space */
29 pushl %ebx
30 pushl %esi
31
32 movl PARAM1,%esi
33
34 movl 8(%esi),%edx
35 movl 4(%esi),%ebx
36 movl (%esi),%eax
37
38 movl $0,-4(%ebp)
39
40 orl %edx,%edx /* ms bits */
41 js L_round /* Already normalized */
42 jnz L_shift_1 /* Shift left 1 - 31 bits */
43
44 movl %ebx,%edx
45 movl %eax,%ebx
46 xorl %eax,%eax
47 movl $-32,-4(%ebp)
48
49/* We need to shift left by 1 - 31 bits */
50L_shift_1:
51 bsrl %edx,%ecx /* get the required shift in %ecx */
52 subl $31,%ecx
53 negl %ecx
54 subl %ecx,-4(%ebp)
55 shld %cl,%ebx,%edx
56 shld %cl,%eax,%ebx
57 shl %cl,%eax
58
59L_round:
60 testl $0x80000000,%eax
61 jz L_exit
62
63 addl $1,%ebx
64 adcl $0,%edx
65 jnz L_exit
66
67 movl $0x80000000,%edx
68 incl -4(%ebp)
69
70L_exit:
71 movl %edx,8(%esi)
72 movl %ebx,4(%esi)
73 movl %eax,(%esi)
74
75 movl -4(%ebp),%eax
76
77 popl %esi
78 popl %ebx
79 leave
80 ret
81
82
83
84
85ENTRY(norm_Xsig)
86 pushl %ebp
87 movl %esp,%ebp
88 pushl %ebx /* Reserve some space */
89 pushl %ebx
90 pushl %esi
91
92 movl PARAM1,%esi
93
94 movl 8(%esi),%edx
95 movl 4(%esi),%ebx
96 movl (%esi),%eax
97
98 movl $0,-4(%ebp)
99
100 orl %edx,%edx /* ms bits */
101 js L_n_exit /* Already normalized */
102 jnz L_n_shift_1 /* Shift left 1 - 31 bits */
103
104 movl %ebx,%edx
105 movl %eax,%ebx
106 xorl %eax,%eax
107 movl $-32,-4(%ebp)
108
109 orl %edx,%edx /* ms bits */
110 js L_n_exit /* Normalized now */
111 jnz L_n_shift_1 /* Shift left 1 - 31 bits */
112
113 movl %ebx,%edx
114 movl %eax,%ebx
115 xorl %eax,%eax
116 addl $-32,-4(%ebp)
117 jmp L_n_exit /* Might not be normalized,
118 but shift no more. */
119
120/* We need to shift left by 1 - 31 bits */
121L_n_shift_1:
122 bsrl %edx,%ecx /* get the required shift in %ecx */
123 subl $31,%ecx
124 negl %ecx
125 subl %ecx,-4(%ebp)
126 shld %cl,%ebx,%edx
127 shld %cl,%eax,%ebx
128 shl %cl,%eax
129
130L_n_exit:
131 movl %edx,8(%esi)
132 movl %ebx,4(%esi)
133 movl %eax,(%esi)
134
135 movl -4(%ebp),%eax
136
137 popl %esi
138 popl %ebx
139 leave
140 ret
141
diff --git a/arch/i386/math-emu/shr_Xsig.S b/arch/i386/math-emu/shr_Xsig.S
new file mode 100644
index 000000000000..31cdd118e918
--- /dev/null
+++ b/arch/i386/math-emu/shr_Xsig.S
@@ -0,0 +1,87 @@
1 .file "shr_Xsig.S"
2/*---------------------------------------------------------------------------+
3 | shr_Xsig.S |
4 | |
5 | 12 byte right shift function |
6 | |
7 | Copyright (C) 1992,1994,1995 |
8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
9 | Australia. E-mail billm@jacobi.maths.monash.edu.au |
10 | |
11 | Call from C as: |
12 | void shr_Xsig(Xsig *arg, unsigned nr) |
13 | |
14 | Extended shift right function. |
15 | Fastest for small shifts. |
16 | Shifts the 12 byte quantity pointed to by the first arg (arg) |
17 | right by the number of bits specified by the second arg (nr). |
18 | |
19 +---------------------------------------------------------------------------*/
20
21#include "fpu_emu.h"
22
23.text
24ENTRY(shr_Xsig)
25 push %ebp
26 movl %esp,%ebp
27 pushl %esi
28 movl PARAM2,%ecx
29 movl PARAM1,%esi
30 cmpl $32,%ecx /* shrd only works for 0..31 bits */
31 jnc L_more_than_31
32
33/* less than 32 bits */
34 pushl %ebx
35 movl (%esi),%eax /* lsl */
36 movl 4(%esi),%ebx /* midl */
37 movl 8(%esi),%edx /* msl */
38 shrd %cl,%ebx,%eax
39 shrd %cl,%edx,%ebx
40 shr %cl,%edx
41 movl %eax,(%esi)
42 movl %ebx,4(%esi)
43 movl %edx,8(%esi)
44 popl %ebx
45 popl %esi
46 leave
47 ret
48
49L_more_than_31:
50 cmpl $64,%ecx
51 jnc L_more_than_63
52
53 subb $32,%cl
54 movl 4(%esi),%eax /* midl */
55 movl 8(%esi),%edx /* msl */
56 shrd %cl,%edx,%eax
57 shr %cl,%edx
58 movl %eax,(%esi)
59 movl %edx,4(%esi)
60 movl $0,8(%esi)
61 popl %esi
62 leave
63 ret
64
65L_more_than_63:
66 cmpl $96,%ecx
67 jnc L_more_than_95
68
69 subb $64,%cl
70 movl 8(%esi),%eax /* msl */
71 shr %cl,%eax
72 xorl %edx,%edx
73 movl %eax,(%esi)
74 movl %edx,4(%esi)
75 movl %edx,8(%esi)
76 popl %esi
77 leave
78 ret
79
80L_more_than_95:
81 xorl %eax,%eax
82 movl %eax,(%esi)
83 movl %eax,4(%esi)
84 movl %eax,8(%esi)
85 popl %esi
86 leave
87 ret
diff --git a/arch/i386/math-emu/status_w.h b/arch/i386/math-emu/status_w.h
new file mode 100644
index 000000000000..78d7b7689dd6
--- /dev/null
+++ b/arch/i386/math-emu/status_w.h
@@ -0,0 +1,65 @@
1/*---------------------------------------------------------------------------+
2 | status_w.h |
3 | |
4 | Copyright (C) 1992,1993 |
5 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
6 | Australia. E-mail billm@vaxc.cc.monash.edu.au |
7 | |
8 +---------------------------------------------------------------------------*/
9
10#ifndef _STATUS_H_
11#define _STATUS_H_
12
13#include "fpu_emu.h" /* for definition of PECULIAR_486 */
14
15#ifdef __ASSEMBLY__
16#define Const__(x) $##x
17#else
18#define Const__(x) x
19#endif
20
21#define SW_Backward Const__(0x8000) /* backward compatibility */
22#define SW_C3 Const__(0x4000) /* condition bit 3 */
23#define SW_Top Const__(0x3800) /* top of stack */
24#define SW_Top_Shift Const__(11) /* shift for top of stack bits */
25#define SW_C2 Const__(0x0400) /* condition bit 2 */
26#define SW_C1 Const__(0x0200) /* condition bit 1 */
27#define SW_C0 Const__(0x0100) /* condition bit 0 */
28#define SW_Summary Const__(0x0080) /* exception summary */
29#define SW_Stack_Fault Const__(0x0040) /* stack fault */
30#define SW_Precision Const__(0x0020) /* loss of precision */
31#define SW_Underflow Const__(0x0010) /* underflow */
32#define SW_Overflow Const__(0x0008) /* overflow */
33#define SW_Zero_Div Const__(0x0004) /* divide by zero */
34#define SW_Denorm_Op Const__(0x0002) /* denormalized operand */
35#define SW_Invalid Const__(0x0001) /* invalid operation */
36
37#define SW_Exc_Mask Const__(0x27f) /* Status word exception bit mask */
38
39#ifndef __ASSEMBLY__
40
41#define COMP_A_gt_B 1
42#define COMP_A_eq_B 2
43#define COMP_A_lt_B 3
44#define COMP_No_Comp 4
45#define COMP_Denormal 0x20
46#define COMP_NaN 0x40
47#define COMP_SNaN 0x80
48
49#define status_word() \
50 ((partial_status & ~SW_Top & 0xffff) | ((top << SW_Top_Shift) & SW_Top))
51#define setcc(cc) ({ \
52 partial_status &= ~(SW_C0|SW_C1|SW_C2|SW_C3); \
53 partial_status |= (cc) & (SW_C0|SW_C1|SW_C2|SW_C3); })
54
55#ifdef PECULIAR_486
56 /* Default, this conveys no information, but an 80486 does it. */
57 /* Clear the SW_C1 bit, "other bits undefined". */
58# define clear_C1() { partial_status &= ~SW_C1; }
59# else
60# define clear_C1()
61#endif /* PECULIAR_486 */
62
63#endif /* __ASSEMBLY__ */
64
65#endif /* _STATUS_H_ */
diff --git a/arch/i386/math-emu/version.h b/arch/i386/math-emu/version.h
new file mode 100644
index 000000000000..a0d73a1d2b67
--- /dev/null
+++ b/arch/i386/math-emu/version.h
@@ -0,0 +1,12 @@
1/*---------------------------------------------------------------------------+
2 | version.h |
3 | |
4 | |
5 | Copyright (C) 1992,1993,1994,1996,1997,1999 |
6 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
7 | E-mail billm@melbpc.org.au |
8 | |
9 | |
10 +---------------------------------------------------------------------------*/
11
12#define FPU_VERSION "wm-FPU-emu version 2.01"
diff --git a/arch/i386/math-emu/wm_shrx.S b/arch/i386/math-emu/wm_shrx.S
new file mode 100644
index 000000000000..518428317985
--- /dev/null
+++ b/arch/i386/math-emu/wm_shrx.S
@@ -0,0 +1,204 @@
1 .file "wm_shrx.S"
2/*---------------------------------------------------------------------------+
3 | wm_shrx.S |
4 | |
5 | 64 bit right shift functions |
6 | |
7 | Copyright (C) 1992,1995 |
8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
9 | Australia. E-mail billm@jacobi.maths.monash.edu.au |
10 | |
11 | Call from C as: |
12 | unsigned FPU_shrx(void *arg1, unsigned arg2) |
13 | and |
14 | unsigned FPU_shrxs(void *arg1, unsigned arg2) |
15 | |
16 +---------------------------------------------------------------------------*/
17
18#include "fpu_emu.h"
19
20.text
21/*---------------------------------------------------------------------------+
22 | unsigned FPU_shrx(void *arg1, unsigned arg2) |
23 | |
24 | Extended shift right function. |
25 | Fastest for small shifts. |
26 | Shifts the 64 bit quantity pointed to by the first arg (arg1) |
27 | right by the number of bits specified by the second arg (arg2). |
28 | Forms a 96 bit quantity from the 64 bit arg and eax: |
29 | [ 64 bit arg ][ eax ] |
30 | shift right ---------> |
31 | The eax register is initialized to 0 before the shifting. |
32 | Results returned in the 64 bit arg and eax. |
33 +---------------------------------------------------------------------------*/
34
35ENTRY(FPU_shrx)
36 push %ebp
37 movl %esp,%ebp
38 pushl %esi
39 movl PARAM2,%ecx
40 movl PARAM1,%esi
41 cmpl $32,%ecx /* shrd only works for 0..31 bits */
42 jnc L_more_than_31
43
44/* less than 32 bits */
45 pushl %ebx
46 movl (%esi),%ebx /* lsl */
47 movl 4(%esi),%edx /* msl */
48 xorl %eax,%eax /* extension */
49 shrd %cl,%ebx,%eax
50 shrd %cl,%edx,%ebx
51 shr %cl,%edx
52 movl %ebx,(%esi)
53 movl %edx,4(%esi)
54 popl %ebx
55 popl %esi
56 leave
57 ret
58
59L_more_than_31:
60 cmpl $64,%ecx
61 jnc L_more_than_63
62
63 subb $32,%cl
64 movl (%esi),%eax /* lsl */
65 movl 4(%esi),%edx /* msl */
66 shrd %cl,%edx,%eax
67 shr %cl,%edx
68 movl %edx,(%esi)
69 movl $0,4(%esi)
70 popl %esi
71 leave
72 ret
73
74L_more_than_63:
75 cmpl $96,%ecx
76 jnc L_more_than_95
77
78 subb $64,%cl
79 movl 4(%esi),%eax /* msl */
80 shr %cl,%eax
81 xorl %edx,%edx
82 movl %edx,(%esi)
83 movl %edx,4(%esi)
84 popl %esi
85 leave
86 ret
87
88L_more_than_95:
89 xorl %eax,%eax
90 movl %eax,(%esi)
91 movl %eax,4(%esi)
92 popl %esi
93 leave
94 ret
95
96
97/*---------------------------------------------------------------------------+
98 | unsigned FPU_shrxs(void *arg1, unsigned arg2) |
99 | |
100 | Extended shift right function (optimized for small floating point |
101 | integers). |
102 | Shifts the 64 bit quantity pointed to by the first arg (arg1) |
103 | right by the number of bits specified by the second arg (arg2). |
104 | Forms a 96 bit quantity from the 64 bit arg and eax: |
105 | [ 64 bit arg ][ eax ] |
106 | shift right ---------> |
107 | The eax register is initialized to 0 before the shifting. |
108 | The lower 8 bits of eax are lost and replaced by a flag which is |
109 | set (to 0x01) if any bit, apart from the first one, is set in the |
110 | part which has been shifted out of the arg. |
111 | Results returned in the 64 bit arg and eax. |
112 +---------------------------------------------------------------------------*/
113ENTRY(FPU_shrxs)
114 push %ebp
115 movl %esp,%ebp
116 pushl %esi
117 pushl %ebx
118 movl PARAM2,%ecx
119 movl PARAM1,%esi
120 cmpl $64,%ecx /* shrd only works for 0..31 bits */
121 jnc Ls_more_than_63
122
123 cmpl $32,%ecx /* shrd only works for 0..31 bits */
124 jc Ls_less_than_32
125
126/* We got here without jumps by assuming that the most common requirement
127 is for small integers */
128/* Shift by [32..63] bits */
129 subb $32,%cl
130 movl (%esi),%eax /* lsl */
131 movl 4(%esi),%edx /* msl */
132 xorl %ebx,%ebx
133 shrd %cl,%eax,%ebx
134 shrd %cl,%edx,%eax
135 shr %cl,%edx
136 orl %ebx,%ebx /* test these 32 bits */
137 setne %bl
138 test $0x7fffffff,%eax /* and 31 bits here */
139 setne %bh
140 orw %bx,%bx /* Any of the 63 bit set ? */
141 setne %al
142 movl %edx,(%esi)
143 movl $0,4(%esi)
144 popl %ebx
145 popl %esi
146 leave
147 ret
148
149/* Shift by [0..31] bits */
150Ls_less_than_32:
151 movl (%esi),%ebx /* lsl */
152 movl 4(%esi),%edx /* msl */
153 xorl %eax,%eax /* extension */
154 shrd %cl,%ebx,%eax
155 shrd %cl,%edx,%ebx
156 shr %cl,%edx
157 test $0x7fffffff,%eax /* only need to look at eax here */
158 setne %al
159 movl %ebx,(%esi)
160 movl %edx,4(%esi)
161 popl %ebx
162 popl %esi
163 leave
164 ret
165
166/* Shift by [64..95] bits */
167Ls_more_than_63:
168 cmpl $96,%ecx
169 jnc Ls_more_than_95
170
171 subb $64,%cl
172 movl (%esi),%ebx /* lsl */
173 movl 4(%esi),%eax /* msl */
174 xorl %edx,%edx /* extension */
175 shrd %cl,%ebx,%edx
176 shrd %cl,%eax,%ebx
177 shr %cl,%eax
178 orl %ebx,%edx
179 setne %bl
180 test $0x7fffffff,%eax /* only need to look at eax here */
181 setne %bh
182 orw %bx,%bx
183 setne %al
184 xorl %edx,%edx
185 movl %edx,(%esi) /* set to zero */
186 movl %edx,4(%esi) /* set to zero */
187 popl %ebx
188 popl %esi
189 leave
190 ret
191
192Ls_more_than_95:
193/* Shift by [96..inf) bits */
194 xorl %eax,%eax
195 movl (%esi),%ebx
196 orl 4(%esi),%ebx
197 setne %al
198 xorl %ebx,%ebx
199 movl %ebx,(%esi)
200 movl %ebx,4(%esi)
201 popl %ebx
202 popl %esi
203 leave
204 ret
diff --git a/arch/i386/math-emu/wm_sqrt.S b/arch/i386/math-emu/wm_sqrt.S
new file mode 100644
index 000000000000..d258f59564e1
--- /dev/null
+++ b/arch/i386/math-emu/wm_sqrt.S
@@ -0,0 +1,470 @@
1 .file "wm_sqrt.S"
2/*---------------------------------------------------------------------------+
3 | wm_sqrt.S |
4 | |
5 | Fixed point arithmetic square root evaluation. |
6 | |
7 | Copyright (C) 1992,1993,1995,1997 |
8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
9 | Australia. E-mail billm@suburbia.net |
10 | |
11 | Call from C as: |
12 | int wm_sqrt(FPU_REG *n, unsigned int control_word) |
13 | |
14 +---------------------------------------------------------------------------*/
15
16/*---------------------------------------------------------------------------+
17 | wm_sqrt(FPU_REG *n, unsigned int control_word) |
18 | returns the square root of n in n. |
19 | |
20 | Use Newton's method to compute the square root of a number, which must |
21 | be in the range [1.0 .. 4.0), to 64 bits accuracy. |
22 | Does not check the sign or tag of the argument. |
23 | Sets the exponent, but not the sign or tag of the result. |
24 | |
25 | The guess is kept in %esi:%edi |
26 +---------------------------------------------------------------------------*/
27
28#include "exception.h"
29#include "fpu_emu.h"
30
31
32#ifndef NON_REENTRANT_FPU
33/* Local storage on the stack: */
34#define FPU_accum_3 -4(%ebp) /* ms word */
35#define FPU_accum_2 -8(%ebp)
36#define FPU_accum_1 -12(%ebp)
37#define FPU_accum_0 -16(%ebp)
38
39/*
40 * The de-normalised argument:
41 * sq_2 sq_1 sq_0
42 * b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
43 * ^ binary point here
44 */
45#define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */
46#define FPU_fsqrt_arg_1 -24(%ebp)
47#define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */
48
49#else
50/* Local storage in a static area: */
51.data
52 .align 4,0
53FPU_accum_3:
54 .long 0 /* ms word */
55FPU_accum_2:
56 .long 0
57FPU_accum_1:
58 .long 0
59FPU_accum_0:
60 .long 0
61
62/* The de-normalised argument:
63 sq_2 sq_1 sq_0
64 b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
65 ^ binary point here
66 */
67FPU_fsqrt_arg_2:
68 .long 0 /* ms word */
69FPU_fsqrt_arg_1:
70 .long 0
71FPU_fsqrt_arg_0:
72 .long 0 /* ls word, at most the ms bit is set */
73#endif /* NON_REENTRANT_FPU */
74
75
76.text
77ENTRY(wm_sqrt)
78 pushl %ebp
79 movl %esp,%ebp
80#ifndef NON_REENTRANT_FPU
81 subl $28,%esp
82#endif /* NON_REENTRANT_FPU */
83 pushl %esi
84 pushl %edi
85 pushl %ebx
86
87 movl PARAM1,%esi
88
89 movl SIGH(%esi),%eax
90 movl SIGL(%esi),%ecx
91 xorl %edx,%edx
92
93/* We use a rough linear estimate for the first guess.. */
94
95 cmpw EXP_BIAS,EXP(%esi)
96 jnz sqrt_arg_ge_2
97
98 shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */
99 rcrl $1,%ecx
100 rcrl $1,%edx
101
102sqrt_arg_ge_2:
103/* From here on, n is never accessed directly again until it is
104 replaced by the answer. */
105
106 movl %eax,FPU_fsqrt_arg_2 /* ms word of n */
107 movl %ecx,FPU_fsqrt_arg_1
108 movl %edx,FPU_fsqrt_arg_0
109
110/* Make a linear first estimate */
111 shrl $1,%eax
112 addl $0x40000000,%eax
113 movl $0xaaaaaaaa,%ecx
114 mull %ecx
115 shll %edx /* max result was 7fff... */
116 testl $0x80000000,%edx /* but min was 3fff... */
117 jnz sqrt_prelim_no_adjust
118
119 movl $0x80000000,%edx /* round up */
120
121sqrt_prelim_no_adjust:
122 movl %edx,%esi /* Our first guess */
123
124/* We have now computed (approx) (2 + x) / 3, which forms the basis
125 for a few iterations of Newton's method */
126
127 movl FPU_fsqrt_arg_2,%ecx /* ms word */
128
129/*
130 * From our initial estimate, three iterations are enough to get us
131 * to 30 bits or so. This will then allow two iterations at better
132 * precision to complete the process.
133 */
134
135/* Compute (g + n/g)/2 at each iteration (g is the guess). */
136 shrl %ecx /* Doing this first will prevent a divide */
137 /* overflow later. */
138
139 movl %ecx,%edx /* msw of the arg / 2 */
140 divl %esi /* current estimate */
141 shrl %esi /* divide by 2 */
142 addl %eax,%esi /* the new estimate */
143
144 movl %ecx,%edx
145 divl %esi
146 shrl %esi
147 addl %eax,%esi
148
149 movl %ecx,%edx
150 divl %esi
151 shrl %esi
152 addl %eax,%esi
153
154/*
155 * Now that an estimate accurate to about 30 bits has been obtained (in %esi),
156 * we improve it to 60 bits or so.
157 *
158 * The strategy from now on is to compute new estimates from
159 * guess := guess + (n - guess^2) / (2 * guess)
160 */
161
162/* First, find the square of the guess */
163 movl %esi,%eax
164 mull %esi
165/* guess^2 now in %edx:%eax */
166
167 movl FPU_fsqrt_arg_1,%ecx
168 subl %ecx,%eax
169 movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */
170 sbbl %ecx,%edx
171 jnc sqrt_stage_2_positive
172
173/* Subtraction gives a negative result,
174 negate the result before division. */
175 notl %edx
176 notl %eax
177 addl $1,%eax
178 adcl $0,%edx
179
180 divl %esi
181 movl %eax,%ecx
182
183 movl %edx,%eax
184 divl %esi
185 jmp sqrt_stage_2_finish
186
187sqrt_stage_2_positive:
188 divl %esi
189 movl %eax,%ecx
190
191 movl %edx,%eax
192 divl %esi
193
194 notl %ecx
195 notl %eax
196 addl $1,%eax
197 adcl $0,%ecx
198
199sqrt_stage_2_finish:
200 sarl $1,%ecx /* divide by 2 */
201 rcrl $1,%eax
202
203 /* Form the new estimate in %esi:%edi */
204 movl %eax,%edi
205 addl %ecx,%esi
206
207 jnz sqrt_stage_2_done /* result should be [1..2) */
208
209#ifdef PARANOID
210/* It should be possible to get here only if the arg is ffff....ffff */
211 cmp $0xffffffff,FPU_fsqrt_arg_1
212 jnz sqrt_stage_2_error
213#endif /* PARANOID */
214
215/* The best rounded result. */
216 xorl %eax,%eax
217 decl %eax
218 movl %eax,%edi
219 movl %eax,%esi
220 movl $0x7fffffff,%eax
221 jmp sqrt_round_result
222
223#ifdef PARANOID
224sqrt_stage_2_error:
225 pushl EX_INTERNAL|0x213
226 call EXCEPTION
227#endif /* PARANOID */
228
229sqrt_stage_2_done:
230
231/* Now the square root has been computed to better than 60 bits. */
232
233/* Find the square of the guess. */
234 movl %edi,%eax /* ls word of guess */
235 mull %edi
236 movl %edx,FPU_accum_1
237
238 movl %esi,%eax
239 mull %esi
240 movl %edx,FPU_accum_3
241 movl %eax,FPU_accum_2
242
243 movl %edi,%eax
244 mull %esi
245 addl %eax,FPU_accum_1
246 adcl %edx,FPU_accum_2
247 adcl $0,FPU_accum_3
248
249/* movl %esi,%eax */
250/* mull %edi */
251 addl %eax,FPU_accum_1
252 adcl %edx,FPU_accum_2
253 adcl $0,FPU_accum_3
254
255/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
256
257 movl FPU_fsqrt_arg_0,%eax /* get normalized n */
258 subl %eax,FPU_accum_1
259 movl FPU_fsqrt_arg_1,%eax
260 sbbl %eax,FPU_accum_2
261 movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */
262 sbbl %eax,FPU_accum_3
263 jnc sqrt_stage_3_positive
264
265/* Subtraction gives a negative result,
266 negate the result before division */
267 notl FPU_accum_1
268 notl FPU_accum_2
269 notl FPU_accum_3
270 addl $1,FPU_accum_1
271 adcl $0,FPU_accum_2
272
273#ifdef PARANOID
274 adcl $0,FPU_accum_3 /* This must be zero */
275 jz sqrt_stage_3_no_error
276
277sqrt_stage_3_error:
278 pushl EX_INTERNAL|0x207
279 call EXCEPTION
280
281sqrt_stage_3_no_error:
282#endif /* PARANOID */
283
284 movl FPU_accum_2,%edx
285 movl FPU_accum_1,%eax
286 divl %esi
287 movl %eax,%ecx
288
289 movl %edx,%eax
290 divl %esi
291
292 sarl $1,%ecx /* divide by 2 */
293 rcrl $1,%eax
294
295 /* prepare to round the result */
296
297 addl %ecx,%edi
298 adcl $0,%esi
299
300 jmp sqrt_stage_3_finished
301
302sqrt_stage_3_positive:
303 movl FPU_accum_2,%edx
304 movl FPU_accum_1,%eax
305 divl %esi
306 movl %eax,%ecx
307
308 movl %edx,%eax
309 divl %esi
310
311 sarl $1,%ecx /* divide by 2 */
312 rcrl $1,%eax
313
314 /* prepare to round the result */
315
316 notl %eax /* Negate the correction term */
317 notl %ecx
318 addl $1,%eax
319 adcl $0,%ecx /* carry here ==> correction == 0 */
320 adcl $0xffffffff,%esi
321
322 addl %ecx,%edi
323 adcl $0,%esi
324
325sqrt_stage_3_finished:
326
327/*
328 * The result in %esi:%edi:%esi should be good to about 90 bits here,
329 * and the rounding information here does not have sufficient accuracy
330 * in a few rare cases.
331 */
332 cmpl $0xffffffe0,%eax
333 ja sqrt_near_exact_x
334
335 cmpl $0x00000020,%eax
336 jb sqrt_near_exact
337
338 cmpl $0x7fffffe0,%eax
339 jb sqrt_round_result
340
341 cmpl $0x80000020,%eax
342 jb sqrt_get_more_precision
343
344sqrt_round_result:
345/* Set up for rounding operations */
346 movl %eax,%edx
347 movl %esi,%eax
348 movl %edi,%ebx
349 movl PARAM1,%edi
350 movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */
351 jmp fpu_reg_round
352
353
354sqrt_near_exact_x:
355/* First, the estimate must be rounded up. */
356 addl $1,%edi
357 adcl $0,%esi
358
359sqrt_near_exact:
360/*
361 * This is an easy case because x^1/2 is monotonic.
362 * We need just find the square of our estimate, compare it
363 * with the argument, and deduce whether our estimate is
364 * above, below, or exact. We use the fact that the estimate
365 * is known to be accurate to about 90 bits.
366 */
367 movl %edi,%eax /* ls word of guess */
368 mull %edi
369 movl %edx,%ebx /* 2nd ls word of square */
370 movl %eax,%ecx /* ls word of square */
371
372 movl %edi,%eax
373 mull %esi
374 addl %eax,%ebx
375 addl %eax,%ebx
376
377#ifdef PARANOID
378 cmp $0xffffffb0,%ebx
379 jb sqrt_near_exact_ok
380
381 cmp $0x00000050,%ebx
382 ja sqrt_near_exact_ok
383
384 pushl EX_INTERNAL|0x214
385 call EXCEPTION
386
387sqrt_near_exact_ok:
388#endif /* PARANOID */
389
390 or %ebx,%ebx
391 js sqrt_near_exact_small
392
393 jnz sqrt_near_exact_large
394
395 or %ebx,%edx
396 jnz sqrt_near_exact_large
397
398/* Our estimate is exactly the right answer */
399 xorl %eax,%eax
400 jmp sqrt_round_result
401
402sqrt_near_exact_small:
403/* Our estimate is too small */
404 movl $0x000000ff,%eax
405 jmp sqrt_round_result
406
407sqrt_near_exact_large:
408/* Our estimate is too large, we need to decrement it */
409 subl $1,%edi
410 sbbl $0,%esi
411 movl $0xffffff00,%eax
412 jmp sqrt_round_result
413
414
415sqrt_get_more_precision:
416/* This case is almost the same as the above, except we start
417 with an extra bit of precision in the estimate. */
418 stc /* The extra bit. */
419 rcll $1,%edi /* Shift the estimate left one bit */
420 rcll $1,%esi
421
422 movl %edi,%eax /* ls word of guess */
423 mull %edi
424 movl %edx,%ebx /* 2nd ls word of square */
425 movl %eax,%ecx /* ls word of square */
426
427 movl %edi,%eax
428 mull %esi
429 addl %eax,%ebx
430 addl %eax,%ebx
431
432/* Put our estimate back to its original value */
433 stc /* The ms bit. */
434 rcrl $1,%esi /* Shift the estimate left one bit */
435 rcrl $1,%edi
436
437#ifdef PARANOID
438 cmp $0xffffff60,%ebx
439 jb sqrt_more_prec_ok
440
441 cmp $0x000000a0,%ebx
442 ja sqrt_more_prec_ok
443
444 pushl EX_INTERNAL|0x215
445 call EXCEPTION
446
447sqrt_more_prec_ok:
448#endif /* PARANOID */
449
450 or %ebx,%ebx
451 js sqrt_more_prec_small
452
453 jnz sqrt_more_prec_large
454
455 or %ebx,%ecx
456 jnz sqrt_more_prec_large
457
458/* Our estimate is exactly the right answer */
459 movl $0x80000000,%eax
460 jmp sqrt_round_result
461
462sqrt_more_prec_small:
463/* Our estimate is too small */
464 movl $0x800000ff,%eax
465 jmp sqrt_round_result
466
467sqrt_more_prec_large:
468/* Our estimate is too large */
469 movl $0x7fffff00,%eax
470 jmp sqrt_round_result