diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2007-10-11 05:16:31 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2007-10-11 05:16:31 -0400 |
commit | da957e111bb0c189a4a3bf8a00caaecb59ed94ca (patch) | |
tree | 6916075fdd3e28869dcd3dfa2cf160a74d1cb02e /arch/x86/math-emu | |
parent | 2ec1df4130c60d1eb49dc0fa0ed15858fede6b05 (diff) |
i386: move math-emu
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/math-emu')
47 files changed, 13419 insertions, 0 deletions
diff --git a/arch/x86/math-emu/Makefile b/arch/x86/math-emu/Makefile new file mode 100644 index 000000000000..9c943fa6ce6b --- /dev/null +++ b/arch/x86/math-emu/Makefile | |||
@@ -0,0 +1,30 @@ | |||
1 | # | ||
2 | # Makefile for wm-FPU-emu | ||
3 | # | ||
4 | |||
5 | #DEBUG = -DDEBUGGING | ||
6 | DEBUG = | ||
7 | PARANOID = -DPARANOID | ||
8 | CFLAGS := $(CFLAGS) $(PARANOID) $(DEBUG) -fno-builtin $(MATH_EMULATION) | ||
9 | |||
10 | EXTRA_AFLAGS := $(PARANOID) | ||
11 | |||
12 | # From 'C' language sources: | ||
13 | C_OBJS =fpu_entry.o errors.o \ | ||
14 | fpu_arith.o fpu_aux.o fpu_etc.o fpu_tags.o fpu_trig.o \ | ||
15 | load_store.o get_address.o \ | ||
16 | poly_atan.o poly_l2.o poly_2xm1.o poly_sin.o poly_tan.o \ | ||
17 | reg_add_sub.o reg_compare.o reg_constant.o reg_convert.o \ | ||
18 | reg_ld_str.o reg_divide.o reg_mul.o | ||
19 | |||
20 | # From 80x86 assembler sources: | ||
21 | A_OBJS =reg_u_add.o reg_u_div.o reg_u_mul.o reg_u_sub.o \ | ||
22 | div_small.o reg_norm.o reg_round.o \ | ||
23 | wm_shrx.o wm_sqrt.o \ | ||
24 | div_Xsig.o polynom_Xsig.o round_Xsig.o \ | ||
25 | shr_Xsig.o mul_Xsig.o | ||
26 | |||
27 | obj-y =$(C_OBJS) $(A_OBJS) | ||
28 | |||
29 | proto: | ||
30 | cproto -e -DMAKING_PROTO *.c >fpu_proto.h | ||
diff --git a/arch/x86/math-emu/README b/arch/x86/math-emu/README new file mode 100644 index 000000000000..e6235491d6eb --- /dev/null +++ b/arch/x86/math-emu/README | |||
@@ -0,0 +1,427 @@ | |||
1 | +---------------------------------------------------------------------------+ | ||
2 | | wm-FPU-emu an FPU emulator for 80386 and 80486SX microprocessors. | | ||
3 | | | | ||
4 | | Copyright (C) 1992,1993,1994,1995,1996,1997,1999 | | ||
5 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | ||
6 | | Australia. E-mail billm@melbpc.org.au | | ||
7 | | | | ||
8 | | This program is free software; you can redistribute it and/or modify | | ||
9 | | it under the terms of the GNU General Public License version 2 as | | ||
10 | | published by the Free Software Foundation. | | ||
11 | | | | ||
12 | | This program is distributed in the hope that it will be useful, | | ||
13 | | but WITHOUT ANY WARRANTY; without even the implied warranty of | | ||
14 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | ||
15 | | GNU General Public License for more details. | | ||
16 | | | | ||
17 | | You should have received a copy of the GNU General Public License | | ||
18 | | along with this program; if not, write to the Free Software | | ||
19 | | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | | ||
20 | | | | ||
21 | +---------------------------------------------------------------------------+ | ||
22 | |||
23 | |||
24 | |||
25 | wm-FPU-emu is an FPU emulator for Linux. It is derived from wm-emu387 | ||
26 | which was my 80387 emulator for early versions of djgpp (gcc under | ||
27 | msdos); wm-emu387 was in turn based upon emu387 which was written by | ||
28 | DJ Delorie for djgpp. The interface to the Linux kernel is based upon | ||
29 | the original Linux math emulator by Linus Torvalds. | ||
30 | |||
31 | My target FPU for wm-FPU-emu is that described in the Intel486 | ||
32 | Programmer's Reference Manual (1992 edition). Unfortunately, numerous | ||
33 | facets of the functioning of the FPU are not well covered in the | ||
34 | Reference Manual. The information in the manual has been supplemented | ||
35 | with measurements on real 80486's. Unfortunately, it is simply not | ||
36 | possible to be sure that all of the peculiarities of the 80486 have | ||
37 | been discovered, so there is always likely to be obscure differences | ||
38 | in the detailed behaviour of the emulator and a real 80486. | ||
39 | |||
40 | wm-FPU-emu does not implement all of the behaviour of the 80486 FPU, | ||
41 | but is very close. See "Limitations" later in this file for a list of | ||
42 | some differences. | ||
43 | |||
44 | Please report bugs, etc to me at: | ||
45 | billm@melbpc.org.au | ||
46 | or b.metzenthen@medoto.unimelb.edu.au | ||
47 | |||
48 | For more information on the emulator and on floating point topics, see | ||
49 | my web pages, currently at http://www.suburbia.net/~billm/ | ||
50 | |||
51 | |||
52 | --Bill Metzenthen | ||
53 | December 1999 | ||
54 | |||
55 | |||
56 | ----------------------- Internals of wm-FPU-emu ----------------------- | ||
57 | |||
58 | Numeric algorithms: | ||
59 | (1) Add, subtract, and multiply. Nothing remarkable in these. | ||
60 | (2) Divide has been tuned to get reasonable performance. The algorithm | ||
61 | is not the obvious one which most people seem to use, but is designed | ||
62 | to take advantage of the characteristics of the 80386. I expect that | ||
63 | it has been invented many times before I discovered it, but I have not | ||
64 | seen it. It is based upon one of those ideas which one carries around | ||
65 | for years without ever bothering to check it out. | ||
66 | (3) The sqrt function has been tuned to get good performance. It is based | ||
67 | upon Newton's classic method. Performance was improved by capitalizing | ||
68 | upon the properties of Newton's method, and the code is once again | ||
69 | structured taking account of the 80386 characteristics. | ||
70 | (4) The trig, log, and exp functions are based in each case upon quasi- | ||
71 | "optimal" polynomial approximations. My definition of "optimal" was | ||
72 | based upon getting good accuracy with reasonable speed. | ||
73 | (5) The argument reducing code for the trig function effectively uses | ||
74 | a value of pi which is accurate to more than 128 bits. As a consequence, | ||
75 | the reduced argument is accurate to more than 64 bits for arguments up | ||
76 | to a few pi, and accurate to more than 64 bits for most arguments, | ||
77 | even for arguments approaching 2^63. This is far superior to an | ||
78 | 80486, which uses a value of pi which is accurate to 66 bits. | ||
79 | |||
80 | The code of the emulator is complicated slightly by the need to | ||
81 | account for a limited form of re-entrancy. Normally, the emulator will | ||
82 | emulate each FPU instruction to completion without interruption. | ||
83 | However, it may happen that when the emulator is accessing the user | ||
84 | memory space, swapping may be needed. In this case the emulator may be | ||
85 | temporarily suspended while disk i/o takes place. During this time | ||
86 | another process may use the emulator, thereby perhaps changing static | ||
87 | variables. The code which accesses user memory is confined to five | ||
88 | files: | ||
89 | fpu_entry.c | ||
90 | reg_ld_str.c | ||
91 | load_store.c | ||
92 | get_address.c | ||
93 | errors.c | ||
94 | As from version 1.12 of the emulator, no static variables are used | ||
95 | (apart from those in the kernel's per-process tables). The emulator is | ||
96 | therefore now fully re-entrant, rather than having just the restricted | ||
97 | form of re-entrancy which is required by the Linux kernel. | ||
98 | |||
99 | ----------------------- Limitations of wm-FPU-emu ----------------------- | ||
100 | |||
101 | There are a number of differences between the current wm-FPU-emu | ||
102 | (version 2.01) and the 80486 FPU (apart from bugs). The differences | ||
103 | are fewer than those which applied to the 1.xx series of the emulator. | ||
104 | Some of the more important differences are listed below: | ||
105 | |||
106 | The Roundup flag does not have much meaning for the transcendental | ||
107 | functions and its 80486 value with these functions is likely to differ | ||
108 | from its emulator value. | ||
109 | |||
110 | In a few rare cases the Underflow flag obtained with the emulator will | ||
111 | be different from that obtained with an 80486. This occurs when the | ||
112 | following conditions apply simultaneously: | ||
113 | (a) the operands have a higher precision than the current setting of the | ||
114 | precision control (PC) flags. | ||
115 | (b) the underflow exception is masked. | ||
116 | (c) the magnitude of the exact result (before rounding) is less than 2^-16382. | ||
117 | (d) the magnitude of the final result (after rounding) is exactly 2^-16382. | ||
118 | (e) the magnitude of the exact result would be exactly 2^-16382 if the | ||
119 | operands were rounded to the current precision before the arithmetic | ||
120 | operation was performed. | ||
121 | If all of these apply, the emulator will set the Underflow flag but a real | ||
122 | 80486 will not. | ||
123 | |||
124 | NOTE: Certain formats of Extended Real are UNSUPPORTED. They are | ||
125 | unsupported by the 80486. They are the Pseudo-NaNs, Pseudoinfinities, | ||
126 | and Unnormals. None of these will be generated by an 80486 or by the | ||
127 | emulator. Do not use them. The emulator treats them differently in | ||
128 | detail from the way an 80486 does. | ||
129 | |||
130 | Self modifying code can cause the emulator to fail. An example of such | ||
131 | code is: | ||
132 | movl %esp,[%ebx] | ||
133 | fld1 | ||
134 | The FPU instruction may be (usually will be) loaded into the pre-fetch | ||
135 | queue of the CPU before the mov instruction is executed. If the | ||
136 | destination of the 'movl' overlaps the FPU instruction then the bytes | ||
137 | in the prefetch queue and memory will be inconsistent when the FPU | ||
138 | instruction is executed. The emulator will be invoked but will not be | ||
139 | able to find the instruction which caused the device-not-present | ||
140 | exception. For this case, the emulator cannot emulate the behaviour of | ||
141 | an 80486DX. | ||
142 | |||
143 | Handling of the address size override prefix byte (0x67) has not been | ||
144 | extensively tested yet. A major problem exists because using it in | ||
145 | vm86 mode can cause a general protection fault. Address offsets | ||
146 | greater than 0xffff appear to be illegal in vm86 mode but are quite | ||
147 | acceptable (and work) in real mode. A small test program developed to | ||
148 | check the addressing, and which runs successfully in real mode, | ||
149 | crashes dosemu under Linux and also brings Windows down with a general | ||
150 | protection fault message when run under the MS-DOS prompt of Windows | ||
151 | 3.1. (The program simply reads data from a valid address). | ||
152 | |||
153 | The emulator supports 16-bit protected mode, with one difference from | ||
154 | an 80486DX. A 80486DX will allow some floating point instructions to | ||
155 | write a few bytes below the lowest address of the stack. The emulator | ||
156 | will not allow this in 16-bit protected mode: no instructions are | ||
157 | allowed to write outside the bounds set by the protection. | ||
158 | |||
159 | ----------------------- Performance of wm-FPU-emu ----------------------- | ||
160 | |||
161 | Speed. | ||
162 | ----- | ||
163 | |||
164 | The speed of floating point computation with the emulator will depend | ||
165 | upon instruction mix. Relative performance is best for the instructions | ||
166 | which require most computation. The simple instructions are adversely | ||
167 | affected by the FPU instruction trap overhead. | ||
168 | |||
169 | |||
170 | Timing: Some simple timing tests have been made on the emulator functions. | ||
171 | The times include load/store instructions. All times are in microseconds | ||
172 | measured on a 33MHz 386 with 64k cache. The Turbo C tests were under | ||
173 | ms-dos, the next two columns are for emulators running with the djgpp | ||
174 | ms-dos extender. The final column is for wm-FPU-emu in Linux 0.97, | ||
175 | using libm4.0 (hard). | ||
176 | |||
177 | function Turbo C djgpp 1.06 WM-emu387 wm-FPU-emu | ||
178 | |||
179 | + 60.5 154.8 76.5 139.4 | ||
180 | - 61.1-65.5 157.3-160.8 76.2-79.5 142.9-144.7 | ||
181 | * 71.0 190.8 79.6 146.6 | ||
182 | / 61.2-75.0 261.4-266.9 75.3-91.6 142.2-158.1 | ||
183 | |||
184 | sin() 310.8 4692.0 319.0 398.5 | ||
185 | cos() 284.4 4855.2 308.0 388.7 | ||
186 | tan() 495.0 8807.1 394.9 504.7 | ||
187 | atan() 328.9 4866.4 601.1 419.5-491.9 | ||
188 | |||
189 | sqrt() 128.7 crashed 145.2 227.0 | ||
190 | log() 413.1-419.1 5103.4-5354.21 254.7-282.2 409.4-437.1 | ||
191 | exp() 479.1 6619.2 469.1 850.8 | ||
192 | |||
193 | |||
194 | The performance under Linux is improved by the use of look-ahead code. | ||
195 | The following results show the improvement which is obtained under | ||
196 | Linux due to the look-ahead code. Also given are the times for the | ||
197 | original Linux emulator with the 4.1 'soft' lib. | ||
198 | |||
199 | [ Linus' note: I changed look-ahead to be the default under linux, as | ||
200 | there was no reason not to use it after I had edited it to be | ||
201 | disabled during tracing ] | ||
202 | |||
203 | wm-FPU-emu w original w | ||
204 | look-ahead 'soft' lib | ||
205 | + 106.4 190.2 | ||
206 | - 108.6-111.6 192.4-216.2 | ||
207 | * 113.4 193.1 | ||
208 | / 108.8-124.4 700.1-706.2 | ||
209 | |||
210 | sin() 390.5 2642.0 | ||
211 | cos() 381.5 2767.4 | ||
212 | tan() 496.5 3153.3 | ||
213 | atan() 367.2-435.5 2439.4-3396.8 | ||
214 | |||
215 | sqrt() 195.1 4732.5 | ||
216 | log() 358.0-387.5 3359.2-3390.3 | ||
217 | exp() 619.3 4046.4 | ||
218 | |||
219 | |||
220 | These figures are now somewhat out-of-date. The emulator has become | ||
221 | progressively slower for most functions as more of the 80486 features | ||
222 | have been implemented. | ||
223 | |||
224 | |||
225 | ----------------------- Accuracy of wm-FPU-emu ----------------------- | ||
226 | |||
227 | |||
228 | The accuracy of the emulator is in almost all cases equal to or better | ||
229 | than that of an Intel 80486 FPU. | ||
230 | |||
231 | The results of the basic arithmetic functions (+,-,*,/), and fsqrt | ||
232 | match those of an 80486 FPU. They are the best possible; the error for | ||
233 | these never exceeds 1/2 an lsb. The fprem and fprem1 instructions | ||
234 | return exact results; they have no error. | ||
235 | |||
236 | |||
237 | The following table compares the emulator accuracy for the sqrt(), | ||
238 | trig and log functions against the Turbo C "emulator". For this table, | ||
239 | each function was tested at about 400 points. Ideal worst-case results | ||
240 | would be 64 bits. The reduced Turbo C accuracy of cos() and tan() for | ||
241 | arguments greater than pi/4 can be thought of as being related to the | ||
242 | precision of the argument x; e.g. an argument of pi/2-(1e-10) which is | ||
243 | accurate to 64 bits can result in a relative accuracy in cos() of | ||
244 | about 64 + log2(cos(x)) = 31 bits. | ||
245 | |||
246 | |||
247 | Function Tested x range Worst result Turbo C | ||
248 | (relative bits) | ||
249 | |||
250 | sqrt(x) 1 .. 2 64.1 63.2 | ||
251 | atan(x) 1e-10 .. 200 64.2 62.8 | ||
252 | cos(x) 0 .. pi/2-(1e-10) 64.4 (x <= pi/4) 62.4 | ||
253 | 64.1 (x = pi/2-(1e-10)) 31.9 | ||
254 | sin(x) 1e-10 .. pi/2 64.0 62.8 | ||
255 | tan(x) 1e-10 .. pi/2-(1e-10) 64.0 (x <= pi/4) 62.1 | ||
256 | 64.1 (x = pi/2-(1e-10)) 31.9 | ||
257 | exp(x) 0 .. 1 63.1 ** 62.9 | ||
258 | log(x) 1+1e-6 .. 2 63.8 ** 62.1 | ||
259 | |||
260 | ** The accuracy for exp() and log() is low because the FPU (emulator) | ||
261 | does not compute them directly; two operations are required. | ||
262 | |||
263 | |||
264 | The emulator passes the "paranoia" tests (compiled with gcc 2.3.3 or | ||
265 | later) for 'float' variables (24 bit precision numbers) when precision | ||
266 | control is set to 24, 53 or 64 bits, and for 'double' variables (53 | ||
267 | bit precision numbers) when precision control is set to 53 bits (a | ||
268 | properly performing FPU cannot pass the 'paranoia' tests for 'double' | ||
269 | variables when precision control is set to 64 bits). | ||
270 | |||
271 | The code for reducing the argument for the trig functions (fsin, fcos, | ||
272 | fptan and fsincos) has been improved and now effectively uses a value | ||
273 | for pi which is accurate to more than 128 bits precision. As a | ||
274 | consequence, the accuracy of these functions for large arguments has | ||
275 | been dramatically improved (and is now very much better than an 80486 | ||
276 | FPU). There is also now no degradation of accuracy for fcos and fptan | ||
277 | for operands close to pi/2. Measured results are (note that the | ||
278 | definition of accuracy has changed slightly from that used for the | ||
279 | above table): | ||
280 | |||
281 | Function Tested x range Worst result | ||
282 | (absolute bits) | ||
283 | |||
284 | cos(x) 0 .. 9.22e+18 62.0 | ||
285 | sin(x) 1e-16 .. 9.22e+18 62.1 | ||
286 | tan(x) 1e-16 .. 9.22e+18 61.8 | ||
287 | |||
288 | It is possible with some effort to find very large arguments which | ||
289 | give much degraded precision. For example, the integer number | ||
290 | 8227740058411162616.0 | ||
291 | is within about 10e-7 of a multiple of pi. To find the tan (for | ||
292 | example) of this number to 64 bits precision it would be necessary to | ||
293 | have a value of pi which had about 150 bits precision. The FPU | ||
294 | emulator computes the result to about 42.6 bits precision (the correct | ||
295 | result is about -9.739715e-8). On the other hand, an 80486 FPU returns | ||
296 | 0.01059, which in relative terms is hopelessly inaccurate. | ||
297 | |||
298 | For arguments close to critical angles (which occur at multiples of | ||
299 | pi/2) the emulator is more accurate than an 80486 FPU. For very large | ||
300 | arguments, the emulator is far more accurate. | ||
301 | |||
302 | |||
303 | Prior to version 1.20 of the emulator, the accuracy of the results for | ||
304 | the transcendental functions (in their principal range) was not as | ||
305 | good as the results from an 80486 FPU. From version 1.20, the accuracy | ||
306 | has been considerably improved and these functions now give measured | ||
307 | worst-case results which are better than the worst-case results given | ||
308 | by an 80486 FPU. | ||
309 | |||
310 | The following table gives the measured results for the emulator. The | ||
311 | number of randomly selected arguments in each case is about half a | ||
312 | million. The group of three columns gives the frequency of the given | ||
313 | accuracy in number of times per million, thus the second of these | ||
314 | columns shows that an accuracy of between 63.80 and 63.89 bits was | ||
315 | found at a rate of 133 times per one million measurements for fsin. | ||
316 | The results show that the fsin, fcos and fptan instructions return | ||
317 | results which are in error (i.e. less accurate than the best possible | ||
318 | result (which is 64 bits)) for about one per cent of all arguments | ||
319 | between -pi/2 and +pi/2. The other instructions have a lower | ||
320 | frequency of results which are in error. The last two columns give | ||
321 | the worst accuracy which was found (in bits) and the approximate value | ||
322 | of the argument which produced it. | ||
323 | |||
324 | frequency (per M) | ||
325 | ------------------- --------------- | ||
326 | instr arg range # tests 63.7 63.8 63.9 worst at arg | ||
327 | bits bits bits bits | ||
328 | ----- ------------ ------- ---- ---- ----- ----- -------- | ||
329 | fsin (0,pi/2) 547756 0 133 10673 63.89 0.451317 | ||
330 | fcos (0,pi/2) 547563 0 126 10532 63.85 0.700801 | ||
331 | fptan (0,pi/2) 536274 11 267 10059 63.74 0.784876 | ||
332 | fpatan 4 quadrants 517087 0 8 1855 63.88 0.435121 (4q) | ||
333 | fyl2x (0,20) 541861 0 0 1323 63.94 1.40923 (x) | ||
334 | fyl2xp1 (-.293,.414) 520256 0 0 5678 63.93 0.408542 (x) | ||
335 | f2xm1 (-1,1) 538847 4 481 6488 63.79 0.167709 | ||
336 | |||
337 | |||
338 | Tests performed on an 80486 FPU showed results of lower accuracy. The | ||
339 | following table gives the results which were obtained with an AMD | ||
340 | 486DX2/66 (other tests indicate that an Intel 486DX produces | ||
341 | identical results). The tests were basically the same as those used | ||
342 | to measure the emulator (the values, being random, were in general not | ||
343 | the same). The total number of tests for each instruction are given | ||
344 | at the end of the table, in case each about 100k tests were performed. | ||
345 | Another line of figures at the end of the table shows that most of the | ||
346 | instructions return results which are in error for more than 10 | ||
347 | percent of the arguments tested. | ||
348 | |||
349 | The numbers in the body of the table give the approx number of times a | ||
350 | result of the given accuracy in bits (given in the left-most column) | ||
351 | was obtained per one million arguments. For three of the instructions, | ||
352 | two columns of results are given: * The second column for f2xm1 gives | ||
353 | the number cases where the results of the first column were for a | ||
354 | positive argument, this shows that this instruction gives better | ||
355 | results for positive arguments than it does for negative. * In the | ||
356 | cases of fcos and fptan, the first column gives the results when all | ||
357 | cases where arguments greater than 1.5 were removed from the results | ||
358 | given in the second column. Unlike the emulator, an 80486 FPU returns | ||
359 | results of relatively poor accuracy for these instructions when the | ||
360 | argument approaches pi/2. The table does not show those cases when the | ||
361 | accuracy of the results were less than 62 bits, which occurs quite | ||
362 | often for fsin and fptan when the argument approaches pi/2. This poor | ||
363 | accuracy is discussed above in relation to the Turbo C "emulator", and | ||
364 | the accuracy of the value of pi. | ||
365 | |||
366 | |||
367 | bits f2xm1 f2xm1 fpatan fcos fcos fyl2x fyl2xp1 fsin fptan fptan | ||
368 | 62.0 0 0 0 0 437 0 0 0 0 925 | ||
369 | 62.1 0 0 10 0 894 0 0 0 0 1023 | ||
370 | 62.2 14 0 0 0 1033 0 0 0 0 945 | ||
371 | 62.3 57 0 0 0 1202 0 0 0 0 1023 | ||
372 | 62.4 385 0 0 10 1292 0 23 0 0 1178 | ||
373 | 62.5 1140 0 0 119 1649 0 39 0 0 1149 | ||
374 | 62.6 2037 0 0 189 1620 0 16 0 0 1169 | ||
375 | 62.7 5086 14 0 646 2315 10 101 35 39 1402 | ||
376 | 62.8 8818 86 0 984 3050 59 287 131 224 2036 | ||
377 | 62.9 11340 1355 0 2126 4153 79 605 357 321 1948 | ||
378 | 63.0 15557 4750 0 3319 5376 246 1281 862 808 2688 | ||
379 | 63.1 20016 8288 0 4620 6628 511 2569 1723 1510 3302 | ||
380 | 63.2 24945 11127 10 6588 8098 1120 4470 2968 2990 4724 | ||
381 | 63.3 25686 12382 69 8774 10682 1906 6775 4482 5474 7236 | ||
382 | 63.4 29219 14722 79 11109 12311 3094 9414 7259 8912 10587 | ||
383 | 63.5 30458 14936 393 13802 15014 5874 12666 9609 13762 15262 | ||
384 | 63.6 32439 16448 1277 17945 19028 10226 15537 14657 19158 20346 | ||
385 | 63.7 35031 16805 4067 23003 23947 18910 20116 21333 25001 26209 | ||
386 | 63.8 33251 15820 7673 24781 25675 24617 25354 24440 29433 30329 | ||
387 | 63.9 33293 16833 18529 28318 29233 31267 31470 27748 29676 30601 | ||
388 | |||
389 | Per cent with error: | ||
390 | 30.9 3.2 18.5 9.8 13.1 11.6 17.4 | ||
391 | Total arguments tested: | ||
392 | 70194 70099 101784 100641 100641 101799 128853 114893 102675 102675 | ||
393 | |||
394 | |||
395 | ------------------------- Contributors ------------------------------- | ||
396 | |||
397 | A number of people have contributed to the development of the | ||
398 | emulator, often by just reporting bugs, sometimes with suggested | ||
399 | fixes, and a few kind people have provided me with access in one way | ||
400 | or another to an 80486 machine. Contributors include (to those people | ||
401 | who I may have forgotten, please forgive me): | ||
402 | |||
403 | Linus Torvalds | ||
404 | Tommy.Thorn@daimi.aau.dk | ||
405 | Andrew.Tridgell@anu.edu.au | ||
406 | Nick Holloway, alfie@dcs.warwick.ac.uk | ||
407 | Hermano Moura, moura@dcs.gla.ac.uk | ||
408 | Jon Jagger, J.Jagger@scp.ac.uk | ||
409 | Lennart Benschop | ||
410 | Brian Gallew, geek+@CMU.EDU | ||
411 | Thomas Staniszewski, ts3v+@andrew.cmu.edu | ||
412 | Martin Howell, mph@plasma.apana.org.au | ||
413 | M Saggaf, alsaggaf@athena.mit.edu | ||
414 | Peter Barker, PETER@socpsy.sci.fau.edu | ||
415 | tom@vlsivie.tuwien.ac.at | ||
416 | Dan Russel, russed@rpi.edu | ||
417 | Daniel Carosone, danielce@ee.mu.oz.au | ||
418 | cae@jpmorgan.com | ||
419 | Hamish Coleman, t933093@minyos.xx.rmit.oz.au | ||
420 | Bruce Evans, bde@kralizec.zeta.org.au | ||
421 | Timo Korvola, Timo.Korvola@hut.fi | ||
422 | Rick Lyons, rick@razorback.brisnet.org.au | ||
423 | Rick, jrs@world.std.com | ||
424 | |||
425 | ...and numerous others who responded to my request for help with | ||
426 | a real 80486. | ||
427 | |||
diff --git a/arch/x86/math-emu/control_w.h b/arch/x86/math-emu/control_w.h new file mode 100644 index 000000000000..ae2274dbd305 --- /dev/null +++ b/arch/x86/math-emu/control_w.h | |||
@@ -0,0 +1,45 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | control_w.h | | ||
3 | | | | ||
4 | | Copyright (C) 1992,1993 | | ||
5 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | ||
6 | | Australia. E-mail billm@vaxc.cc.monash.edu.au | | ||
7 | | | | ||
8 | +---------------------------------------------------------------------------*/ | ||
9 | |||
10 | #ifndef _CONTROLW_H_ | ||
11 | #define _CONTROLW_H_ | ||
12 | |||
13 | #ifdef __ASSEMBLY__ | ||
14 | #define _Const_(x) $##x | ||
15 | #else | ||
16 | #define _Const_(x) x | ||
17 | #endif | ||
18 | |||
19 | #define CW_RC _Const_(0x0C00) /* rounding control */ | ||
20 | #define CW_PC _Const_(0x0300) /* precision control */ | ||
21 | |||
22 | #define CW_Precision Const_(0x0020) /* loss of precision mask */ | ||
23 | #define CW_Underflow Const_(0x0010) /* underflow mask */ | ||
24 | #define CW_Overflow Const_(0x0008) /* overflow mask */ | ||
25 | #define CW_ZeroDiv Const_(0x0004) /* divide by zero mask */ | ||
26 | #define CW_Denormal Const_(0x0002) /* denormalized operand mask */ | ||
27 | #define CW_Invalid Const_(0x0001) /* invalid operation mask */ | ||
28 | |||
29 | #define CW_Exceptions _Const_(0x003f) /* all masks */ | ||
30 | |||
31 | #define RC_RND _Const_(0x0000) | ||
32 | #define RC_DOWN _Const_(0x0400) | ||
33 | #define RC_UP _Const_(0x0800) | ||
34 | #define RC_CHOP _Const_(0x0C00) | ||
35 | |||
36 | /* p 15-5: Precision control bits affect only the following: | ||
37 | ADD, SUB(R), MUL, DIV(R), and SQRT */ | ||
38 | #define PR_24_BITS _Const_(0x000) | ||
39 | #define PR_53_BITS _Const_(0x200) | ||
40 | #define PR_64_BITS _Const_(0x300) | ||
41 | #define PR_RESERVED_BITS _Const_(0x100) | ||
42 | /* FULL_PRECISION simulates all exceptions masked */ | ||
43 | #define FULL_PRECISION (PR_64_BITS | RC_RND | 0x3f) | ||
44 | |||
45 | #endif /* _CONTROLW_H_ */ | ||
diff --git a/arch/x86/math-emu/div_Xsig.S b/arch/x86/math-emu/div_Xsig.S new file mode 100644 index 000000000000..f77ba3058b31 --- /dev/null +++ b/arch/x86/math-emu/div_Xsig.S | |||
@@ -0,0 +1,365 @@ | |||
1 | .file "div_Xsig.S" | ||
2 | /*---------------------------------------------------------------------------+ | ||
3 | | div_Xsig.S | | ||
4 | | | | ||
5 | | Division subroutine for 96 bit quantities | | ||
6 | | | | ||
7 | | Copyright (C) 1994,1995 | | ||
8 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | ||
9 | | Australia. E-mail billm@jacobi.maths.monash.edu.au | | ||
10 | | | | ||
11 | | | | ||
12 | +---------------------------------------------------------------------------*/ | ||
13 | |||
14 | /*---------------------------------------------------------------------------+ | ||
15 | | Divide the 96 bit quantity pointed to by a, by that pointed to by b, and | | ||
16 | | put the 96 bit result at the location d. | | ||
17 | | | | ||
18 | | The result may not be accurate to 96 bits. It is intended for use where | | ||
19 | | a result better than 64 bits is required. The result should usually be | | ||
20 | | good to at least 94 bits. | | ||
21 | | The returned result is actually divided by one half. This is done to | | ||
22 | | prevent overflow. | | ||
23 | | | | ||
24 | | .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb -> .dddddddddddd | | ||
25 | | | | ||
26 | | void div_Xsig(Xsig *a, Xsig *b, Xsig *dest) | | ||
27 | | | | ||
28 | +---------------------------------------------------------------------------*/ | ||
29 | |||
30 | #include "exception.h" | ||
31 | #include "fpu_emu.h" | ||
32 | |||
33 | |||
34 | #define XsigLL(x) (x) | ||
35 | #define XsigL(x) 4(x) | ||
36 | #define XsigH(x) 8(x) | ||
37 | |||
38 | |||
39 | #ifndef NON_REENTRANT_FPU | ||
40 | /* | ||
41 | Local storage on the stack: | ||
42 | Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 | ||
43 | */ | ||
44 | #define FPU_accum_3 -4(%ebp) | ||
45 | #define FPU_accum_2 -8(%ebp) | ||
46 | #define FPU_accum_1 -12(%ebp) | ||
47 | #define FPU_accum_0 -16(%ebp) | ||
48 | #define FPU_result_3 -20(%ebp) | ||
49 | #define FPU_result_2 -24(%ebp) | ||
50 | #define FPU_result_1 -28(%ebp) | ||
51 | |||
52 | #else | ||
53 | .data | ||
54 | /* | ||
55 | Local storage in a static area: | ||
56 | Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 | ||
57 | */ | ||
58 | .align 4,0 | ||
59 | FPU_accum_3: | ||
60 | .long 0 | ||
61 | FPU_accum_2: | ||
62 | .long 0 | ||
63 | FPU_accum_1: | ||
64 | .long 0 | ||
65 | FPU_accum_0: | ||
66 | .long 0 | ||
67 | FPU_result_3: | ||
68 | .long 0 | ||
69 | FPU_result_2: | ||
70 | .long 0 | ||
71 | FPU_result_1: | ||
72 | .long 0 | ||
73 | #endif /* NON_REENTRANT_FPU */ | ||
74 | |||
75 | |||
76 | .text | ||
77 | ENTRY(div_Xsig) | ||
78 | pushl %ebp | ||
79 | movl %esp,%ebp | ||
80 | #ifndef NON_REENTRANT_FPU | ||
81 | subl $28,%esp | ||
82 | #endif /* NON_REENTRANT_FPU */ | ||
83 | |||
84 | pushl %esi | ||
85 | pushl %edi | ||
86 | pushl %ebx | ||
87 | |||
88 | movl PARAM1,%esi /* pointer to num */ | ||
89 | movl PARAM2,%ebx /* pointer to denom */ | ||
90 | |||
91 | #ifdef PARANOID | ||
92 | testl $0x80000000, XsigH(%ebx) /* Divisor */ | ||
93 | je L_bugged | ||
94 | #endif /* PARANOID */ | ||
95 | |||
96 | |||
97 | /*---------------------------------------------------------------------------+ | ||
98 | | Divide: Return arg1/arg2 to arg3. | | ||
99 | | | | ||
100 | | The maximum returned value is (ignoring exponents) | | ||
101 | | .ffffffff ffffffff | | ||
102 | | ------------------ = 1.ffffffff fffffffe | | ||
103 | | .80000000 00000000 | | ||
104 | | and the minimum is | | ||
105 | | .80000000 00000000 | | ||
106 | | ------------------ = .80000000 00000001 (rounded) | | ||
107 | | .ffffffff ffffffff | | ||
108 | | | | ||
109 | +---------------------------------------------------------------------------*/ | ||
110 | |||
111 | /* Save extended dividend in local register */ | ||
112 | |||
113 | /* Divide by 2 to prevent overflow */ | ||
114 | clc | ||
115 | movl XsigH(%esi),%eax | ||
116 | rcrl %eax | ||
117 | movl %eax,FPU_accum_3 | ||
118 | movl XsigL(%esi),%eax | ||
119 | rcrl %eax | ||
120 | movl %eax,FPU_accum_2 | ||
121 | movl XsigLL(%esi),%eax | ||
122 | rcrl %eax | ||
123 | movl %eax,FPU_accum_1 | ||
124 | movl $0,%eax | ||
125 | rcrl %eax | ||
126 | movl %eax,FPU_accum_0 | ||
127 | |||
128 | movl FPU_accum_2,%eax /* Get the current num */ | ||
129 | movl FPU_accum_3,%edx | ||
130 | |||
131 | /*----------------------------------------------------------------------*/ | ||
132 | /* Initialization done. | ||
133 | Do the first 32 bits. */ | ||
134 | |||
135 | /* We will divide by a number which is too large */ | ||
136 | movl XsigH(%ebx),%ecx | ||
137 | addl $1,%ecx | ||
138 | jnc LFirst_div_not_1 | ||
139 | |||
140 | /* here we need to divide by 100000000h, | ||
141 | i.e., no division at all.. */ | ||
142 | mov %edx,%eax | ||
143 | jmp LFirst_div_done | ||
144 | |||
145 | LFirst_div_not_1: | ||
146 | divl %ecx /* Divide the numerator by the augmented | ||
147 | denom ms dw */ | ||
148 | |||
149 | LFirst_div_done: | ||
150 | movl %eax,FPU_result_3 /* Put the result in the answer */ | ||
151 | |||
152 | mull XsigH(%ebx) /* mul by the ms dw of the denom */ | ||
153 | |||
154 | subl %eax,FPU_accum_2 /* Subtract from the num local reg */ | ||
155 | sbbl %edx,FPU_accum_3 | ||
156 | |||
157 | movl FPU_result_3,%eax /* Get the result back */ | ||
158 | mull XsigL(%ebx) /* now mul the ls dw of the denom */ | ||
159 | |||
160 | subl %eax,FPU_accum_1 /* Subtract from the num local reg */ | ||
161 | sbbl %edx,FPU_accum_2 | ||
162 | sbbl $0,FPU_accum_3 | ||
163 | je LDo_2nd_32_bits /* Must check for non-zero result here */ | ||
164 | |||
165 | #ifdef PARANOID | ||
166 | jb L_bugged_1 | ||
167 | #endif /* PARANOID */ | ||
168 | |||
169 | /* need to subtract another once of the denom */ | ||
170 | incl FPU_result_3 /* Correct the answer */ | ||
171 | |||
172 | movl XsigL(%ebx),%eax | ||
173 | movl XsigH(%ebx),%edx | ||
174 | subl %eax,FPU_accum_1 /* Subtract from the num local reg */ | ||
175 | sbbl %edx,FPU_accum_2 | ||
176 | |||
177 | #ifdef PARANOID | ||
178 | sbbl $0,FPU_accum_3 | ||
179 | jne L_bugged_1 /* Must check for non-zero result here */ | ||
180 | #endif /* PARANOID */ | ||
181 | |||
182 | /*----------------------------------------------------------------------*/ | ||
183 | /* Half of the main problem is done, there is just a reduced numerator | ||
184 | to handle now. | ||
185 | Work with the second 32 bits, FPU_accum_0 not used from now on */ | ||
186 | LDo_2nd_32_bits: | ||
187 | movl FPU_accum_2,%edx /* get the reduced num */ | ||
188 | movl FPU_accum_1,%eax | ||
189 | |||
190 | /* need to check for possible subsequent overflow */ | ||
191 | cmpl XsigH(%ebx),%edx | ||
192 | jb LDo_2nd_div | ||
193 | ja LPrevent_2nd_overflow | ||
194 | |||
195 | cmpl XsigL(%ebx),%eax | ||
196 | jb LDo_2nd_div | ||
197 | |||
198 | LPrevent_2nd_overflow: | ||
199 | /* The numerator is greater or equal, would cause overflow */ | ||
200 | /* prevent overflow */ | ||
201 | subl XsigL(%ebx),%eax | ||
202 | sbbl XsigH(%ebx),%edx | ||
203 | movl %edx,FPU_accum_2 | ||
204 | movl %eax,FPU_accum_1 | ||
205 | |||
206 | incl FPU_result_3 /* Reflect the subtraction in the answer */ | ||
207 | |||
208 | #ifdef PARANOID | ||
209 | je L_bugged_2 /* Can't bump the result to 1.0 */ | ||
210 | #endif /* PARANOID */ | ||
211 | |||
212 | LDo_2nd_div: | ||
213 | cmpl $0,%ecx /* augmented denom msw */ | ||
214 | jnz LSecond_div_not_1 | ||
215 | |||
216 | /* %ecx == 0, we are dividing by 1.0 */ | ||
217 | mov %edx,%eax | ||
218 | jmp LSecond_div_done | ||
219 | |||
220 | LSecond_div_not_1: | ||
221 | divl %ecx /* Divide the numerator by the denom ms dw */ | ||
222 | |||
223 | LSecond_div_done: | ||
224 | movl %eax,FPU_result_2 /* Put the result in the answer */ | ||
225 | |||
226 | mull XsigH(%ebx) /* mul by the ms dw of the denom */ | ||
227 | |||
228 | subl %eax,FPU_accum_1 /* Subtract from the num local reg */ | ||
229 | sbbl %edx,FPU_accum_2 | ||
230 | |||
231 | #ifdef PARANOID | ||
232 | jc L_bugged_2 | ||
233 | #endif /* PARANOID */ | ||
234 | |||
235 | movl FPU_result_2,%eax /* Get the result back */ | ||
236 | mull XsigL(%ebx) /* now mul the ls dw of the denom */ | ||
237 | |||
238 | subl %eax,FPU_accum_0 /* Subtract from the num local reg */ | ||
239 | sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */ | ||
240 | sbbl $0,FPU_accum_2 | ||
241 | |||
242 | #ifdef PARANOID | ||
243 | jc L_bugged_2 | ||
244 | #endif /* PARANOID */ | ||
245 | |||
246 | jz LDo_3rd_32_bits | ||
247 | |||
248 | #ifdef PARANOID | ||
249 | cmpl $1,FPU_accum_2 | ||
250 | jne L_bugged_2 | ||
251 | #endif /* PARANOID */ | ||
252 | |||
253 | /* need to subtract another once of the denom */ | ||
254 | movl XsigL(%ebx),%eax | ||
255 | movl XsigH(%ebx),%edx | ||
256 | subl %eax,FPU_accum_0 /* Subtract from the num local reg */ | ||
257 | sbbl %edx,FPU_accum_1 | ||
258 | sbbl $0,FPU_accum_2 | ||
259 | |||
260 | #ifdef PARANOID | ||
261 | jc L_bugged_2 | ||
262 | jne L_bugged_2 | ||
263 | #endif /* PARANOID */ | ||
264 | |||
265 | addl $1,FPU_result_2 /* Correct the answer */ | ||
266 | adcl $0,FPU_result_3 | ||
267 | |||
268 | #ifdef PARANOID | ||
269 | jc L_bugged_2 /* Must check for non-zero result here */ | ||
270 | #endif /* PARANOID */ | ||
271 | |||
272 | /*----------------------------------------------------------------------*/ | ||
273 | /* The division is essentially finished here, we just need to perform | ||
274 | tidying operations. | ||
275 | Deal with the 3rd 32 bits */ | ||
276 | LDo_3rd_32_bits: | ||
277 | /* We use an approximation for the third 32 bits. | ||
278 | To take account of the 3rd 32 bits of the divisor | ||
279 | (call them del), we subtract del * (a/b) */ | ||
280 | |||
281 | movl FPU_result_3,%eax /* a/b */ | ||
282 | mull XsigLL(%ebx) /* del */ | ||
283 | |||
284 | subl %edx,FPU_accum_1 | ||
285 | |||
286 | /* A borrow indicates that the result is negative */ | ||
287 | jnb LTest_over | ||
288 | |||
289 | movl XsigH(%ebx),%edx | ||
290 | addl %edx,FPU_accum_1 | ||
291 | |||
292 | subl $1,FPU_result_2 /* Adjust the answer */ | ||
293 | sbbl $0,FPU_result_3 | ||
294 | |||
295 | /* The above addition might not have been enough, check again. */ | ||
296 | movl FPU_accum_1,%edx /* get the reduced num */ | ||
297 | cmpl XsigH(%ebx),%edx /* denom */ | ||
298 | jb LDo_3rd_div | ||
299 | |||
300 | movl XsigH(%ebx),%edx | ||
301 | addl %edx,FPU_accum_1 | ||
302 | |||
303 | subl $1,FPU_result_2 /* Adjust the answer */ | ||
304 | sbbl $0,FPU_result_3 | ||
305 | jmp LDo_3rd_div | ||
306 | |||
307 | LTest_over: | ||
308 | movl FPU_accum_1,%edx /* get the reduced num */ | ||
309 | |||
310 | /* need to check for possible subsequent overflow */ | ||
311 | cmpl XsigH(%ebx),%edx /* denom */ | ||
312 | jb LDo_3rd_div | ||
313 | |||
314 | /* prevent overflow */ | ||
315 | subl XsigH(%ebx),%edx | ||
316 | movl %edx,FPU_accum_1 | ||
317 | |||
318 | addl $1,FPU_result_2 /* Reflect the subtraction in the answer */ | ||
319 | adcl $0,FPU_result_3 | ||
320 | |||
321 | LDo_3rd_div: | ||
322 | movl FPU_accum_0,%eax | ||
323 | movl FPU_accum_1,%edx | ||
324 | divl XsigH(%ebx) | ||
325 | |||
326 | movl %eax,FPU_result_1 /* Rough estimate of third word */ | ||
327 | |||
328 | movl PARAM3,%esi /* pointer to answer */ | ||
329 | |||
330 | movl FPU_result_1,%eax | ||
331 | movl %eax,XsigLL(%esi) | ||
332 | movl FPU_result_2,%eax | ||
333 | movl %eax,XsigL(%esi) | ||
334 | movl FPU_result_3,%eax | ||
335 | movl %eax,XsigH(%esi) | ||
336 | |||
337 | L_exit: | ||
338 | popl %ebx | ||
339 | popl %edi | ||
340 | popl %esi | ||
341 | |||
342 | leave | ||
343 | ret | ||
344 | |||
345 | |||
346 | #ifdef PARANOID | ||
347 | /* The logic is wrong if we got here */ | ||
348 | L_bugged: | ||
349 | pushl EX_INTERNAL|0x240 | ||
350 | call EXCEPTION | ||
351 | pop %ebx | ||
352 | jmp L_exit | ||
353 | |||
354 | L_bugged_1: | ||
355 | pushl EX_INTERNAL|0x241 | ||
356 | call EXCEPTION | ||
357 | pop %ebx | ||
358 | jmp L_exit | ||
359 | |||
360 | L_bugged_2: | ||
361 | pushl EX_INTERNAL|0x242 | ||
362 | call EXCEPTION | ||
363 | pop %ebx | ||
364 | jmp L_exit | ||
365 | #endif /* PARANOID */ | ||
diff --git a/arch/x86/math-emu/div_small.S b/arch/x86/math-emu/div_small.S new file mode 100644 index 000000000000..47099628fa4c --- /dev/null +++ b/arch/x86/math-emu/div_small.S | |||
@@ -0,0 +1,47 @@ | |||
1 | .file "div_small.S" | ||
2 | /*---------------------------------------------------------------------------+ | ||
3 | | div_small.S | | ||
4 | | | | ||
5 | | Divide a 64 bit integer by a 32 bit integer & return remainder. | | ||
6 | | | | ||
7 | | Copyright (C) 1992,1995 | | ||
8 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | ||
9 | | Australia. E-mail billm@jacobi.maths.monash.edu.au | | ||
10 | | | | ||
11 | | | | ||
12 | +---------------------------------------------------------------------------*/ | ||
13 | |||
14 | /*---------------------------------------------------------------------------+ | ||
15 | | unsigned long FPU_div_small(unsigned long long *x, unsigned long y) | | ||
16 | +---------------------------------------------------------------------------*/ | ||
17 | |||
18 | #include "fpu_emu.h" | ||
19 | |||
20 | .text | ||
21 | ENTRY(FPU_div_small) | ||
22 | pushl %ebp | ||
23 | movl %esp,%ebp | ||
24 | |||
25 | pushl %esi | ||
26 | |||
27 | movl PARAM1,%esi /* pointer to num */ | ||
28 | movl PARAM2,%ecx /* The denominator */ | ||
29 | |||
30 | movl 4(%esi),%eax /* Get the current num msw */ | ||
31 | xorl %edx,%edx | ||
32 | divl %ecx | ||
33 | |||
34 | movl %eax,4(%esi) | ||
35 | |||
36 | movl (%esi),%eax /* Get the num lsw */ | ||
37 | divl %ecx | ||
38 | |||
39 | movl %eax,(%esi) | ||
40 | |||
41 | movl %edx,%eax /* Return the remainder in eax */ | ||
42 | |||
43 | popl %esi | ||
44 | |||
45 | leave | ||
46 | ret | ||
47 | |||
diff --git a/arch/x86/math-emu/errors.c b/arch/x86/math-emu/errors.c new file mode 100644 index 000000000000..a1b0d22f6978 --- /dev/null +++ b/arch/x86/math-emu/errors.c | |||
@@ -0,0 +1,739 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | errors.c | | ||
3 | | | | ||
4 | | The error handling functions for wm-FPU-emu | | ||
5 | | | | ||
6 | | Copyright (C) 1992,1993,1994,1996 | | ||
7 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | | ||
8 | | E-mail billm@jacobi.maths.monash.edu.au | | ||
9 | | | | ||
10 | | | | ||
11 | +---------------------------------------------------------------------------*/ | ||
12 | |||
13 | /*---------------------------------------------------------------------------+ | ||
14 | | Note: | | ||
15 | | The file contains code which accesses user memory. | | ||
16 | | Emulator static data may change when user memory is accessed, due to | | ||
17 | | other processes using the emulator while swapping is in progress. | | ||
18 | +---------------------------------------------------------------------------*/ | ||
19 | |||
20 | #include <linux/signal.h> | ||
21 | |||
22 | #include <asm/uaccess.h> | ||
23 | |||
24 | #include "fpu_emu.h" | ||
25 | #include "fpu_system.h" | ||
26 | #include "exception.h" | ||
27 | #include "status_w.h" | ||
28 | #include "control_w.h" | ||
29 | #include "reg_constant.h" | ||
30 | #include "version.h" | ||
31 | |||
32 | /* */ | ||
33 | #undef PRINT_MESSAGES | ||
34 | /* */ | ||
35 | |||
36 | |||
37 | #if 0 | ||
38 | void Un_impl(void) | ||
39 | { | ||
40 | u_char byte1, FPU_modrm; | ||
41 | unsigned long address = FPU_ORIG_EIP; | ||
42 | |||
43 | RE_ENTRANT_CHECK_OFF; | ||
44 | /* No need to check access_ok(), we have previously fetched these bytes. */ | ||
45 | printk("Unimplemented FPU Opcode at eip=%p : ", (void __user *) address); | ||
46 | if ( FPU_CS == __USER_CS ) | ||
47 | { | ||
48 | while ( 1 ) | ||
49 | { | ||
50 | FPU_get_user(byte1, (u_char __user *) address); | ||
51 | if ( (byte1 & 0xf8) == 0xd8 ) break; | ||
52 | printk("[%02x]", byte1); | ||
53 | address++; | ||
54 | } | ||
55 | printk("%02x ", byte1); | ||
56 | FPU_get_user(FPU_modrm, 1 + (u_char __user *) address); | ||
57 | |||
58 | if (FPU_modrm >= 0300) | ||
59 | printk("%02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7); | ||
60 | else | ||
61 | printk("/%d\n", (FPU_modrm >> 3) & 7); | ||
62 | } | ||
63 | else | ||
64 | { | ||
65 | printk("cs selector = %04x\n", FPU_CS); | ||
66 | } | ||
67 | |||
68 | RE_ENTRANT_CHECK_ON; | ||
69 | |||
70 | EXCEPTION(EX_Invalid); | ||
71 | |||
72 | } | ||
73 | #endif /* 0 */ | ||
74 | |||
75 | |||
76 | /* | ||
77 | Called for opcodes which are illegal and which are known to result in a | ||
78 | SIGILL with a real 80486. | ||
79 | */ | ||
80 | void FPU_illegal(void) | ||
81 | { | ||
82 | math_abort(FPU_info,SIGILL); | ||
83 | } | ||
84 | |||
85 | |||
86 | |||
87 | void FPU_printall(void) | ||
88 | { | ||
89 | int i; | ||
90 | static const char *tag_desc[] = { "Valid", "Zero", "ERROR", "Empty", | ||
91 | "DeNorm", "Inf", "NaN" }; | ||
92 | u_char byte1, FPU_modrm; | ||
93 | unsigned long address = FPU_ORIG_EIP; | ||
94 | |||
95 | RE_ENTRANT_CHECK_OFF; | ||
96 | /* No need to check access_ok(), we have previously fetched these bytes. */ | ||
97 | printk("At %p:", (void *) address); | ||
98 | if ( FPU_CS == __USER_CS ) | ||
99 | { | ||
100 | #define MAX_PRINTED_BYTES 20 | ||
101 | for ( i = 0; i < MAX_PRINTED_BYTES; i++ ) | ||
102 | { | ||
103 | FPU_get_user(byte1, (u_char __user *) address); | ||
104 | if ( (byte1 & 0xf8) == 0xd8 ) | ||
105 | { | ||
106 | printk(" %02x", byte1); | ||
107 | break; | ||
108 | } | ||
109 | printk(" [%02x]", byte1); | ||
110 | address++; | ||
111 | } | ||
112 | if ( i == MAX_PRINTED_BYTES ) | ||
113 | printk(" [more..]\n"); | ||
114 | else | ||
115 | { | ||
116 | FPU_get_user(FPU_modrm, 1 + (u_char __user *) address); | ||
117 | |||
118 | if (FPU_modrm >= 0300) | ||
119 | printk(" %02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7); | ||
120 | else | ||
121 | printk(" /%d, mod=%d rm=%d\n", | ||
122 | (FPU_modrm >> 3) & 7, (FPU_modrm >> 6) & 3, FPU_modrm & 7); | ||
123 | } | ||
124 | } | ||
125 | else | ||
126 | { | ||
127 | printk("%04x\n", FPU_CS); | ||
128 | } | ||
129 | |||
130 | partial_status = status_word(); | ||
131 | |||
132 | #ifdef DEBUGGING | ||
133 | if ( partial_status & SW_Backward ) printk("SW: backward compatibility\n"); | ||
134 | if ( partial_status & SW_C3 ) printk("SW: condition bit 3\n"); | ||
135 | if ( partial_status & SW_C2 ) printk("SW: condition bit 2\n"); | ||
136 | if ( partial_status & SW_C1 ) printk("SW: condition bit 1\n"); | ||
137 | if ( partial_status & SW_C0 ) printk("SW: condition bit 0\n"); | ||
138 | if ( partial_status & SW_Summary ) printk("SW: exception summary\n"); | ||
139 | if ( partial_status & SW_Stack_Fault ) printk("SW: stack fault\n"); | ||
140 | if ( partial_status & SW_Precision ) printk("SW: loss of precision\n"); | ||
141 | if ( partial_status & SW_Underflow ) printk("SW: underflow\n"); | ||
142 | if ( partial_status & SW_Overflow ) printk("SW: overflow\n"); | ||
143 | if ( partial_status & SW_Zero_Div ) printk("SW: divide by zero\n"); | ||
144 | if ( partial_status & SW_Denorm_Op ) printk("SW: denormalized operand\n"); | ||
145 | if ( partial_status & SW_Invalid ) printk("SW: invalid operation\n"); | ||
146 | #endif /* DEBUGGING */ | ||
147 | |||
148 | printk(" SW: b=%d st=%ld es=%d sf=%d cc=%d%d%d%d ef=%d%d%d%d%d%d\n", | ||
149 | partial_status & 0x8000 ? 1 : 0, /* busy */ | ||
150 | (partial_status & 0x3800) >> 11, /* stack top pointer */ | ||
151 | partial_status & 0x80 ? 1 : 0, /* Error summary status */ | ||
152 | partial_status & 0x40 ? 1 : 0, /* Stack flag */ | ||
153 | partial_status & SW_C3?1:0, partial_status & SW_C2?1:0, /* cc */ | ||
154 | partial_status & SW_C1?1:0, partial_status & SW_C0?1:0, /* cc */ | ||
155 | partial_status & SW_Precision?1:0, partial_status & SW_Underflow?1:0, | ||
156 | partial_status & SW_Overflow?1:0, partial_status & SW_Zero_Div?1:0, | ||
157 | partial_status & SW_Denorm_Op?1:0, partial_status & SW_Invalid?1:0); | ||
158 | |||
159 | printk(" CW: ic=%d rc=%ld%ld pc=%ld%ld iem=%d ef=%d%d%d%d%d%d\n", | ||
160 | control_word & 0x1000 ? 1 : 0, | ||
161 | (control_word & 0x800) >> 11, (control_word & 0x400) >> 10, | ||
162 | (control_word & 0x200) >> 9, (control_word & 0x100) >> 8, | ||
163 | control_word & 0x80 ? 1 : 0, | ||
164 | control_word & SW_Precision?1:0, control_word & SW_Underflow?1:0, | ||
165 | control_word & SW_Overflow?1:0, control_word & SW_Zero_Div?1:0, | ||
166 | control_word & SW_Denorm_Op?1:0, control_word & SW_Invalid?1:0); | ||
167 | |||
168 | for ( i = 0; i < 8; i++ ) | ||
169 | { | ||
170 | FPU_REG *r = &st(i); | ||
171 | u_char tagi = FPU_gettagi(i); | ||
172 | switch (tagi) | ||
173 | { | ||
174 | case TAG_Empty: | ||
175 | continue; | ||
176 | break; | ||
177 | case TAG_Zero: | ||
178 | case TAG_Special: | ||
179 | tagi = FPU_Special(r); | ||
180 | case TAG_Valid: | ||
181 | printk("st(%d) %c .%04lx %04lx %04lx %04lx e%+-6d ", i, | ||
182 | getsign(r) ? '-' : '+', | ||
183 | (long)(r->sigh >> 16), | ||
184 | (long)(r->sigh & 0xFFFF), | ||
185 | (long)(r->sigl >> 16), | ||
186 | (long)(r->sigl & 0xFFFF), | ||
187 | exponent(r) - EXP_BIAS + 1); | ||
188 | break; | ||
189 | default: | ||
190 | printk("Whoops! Error in errors.c: tag%d is %d ", i, tagi); | ||
191 | continue; | ||
192 | break; | ||
193 | } | ||
194 | printk("%s\n", tag_desc[(int) (unsigned) tagi]); | ||
195 | } | ||
196 | |||
197 | RE_ENTRANT_CHECK_ON; | ||
198 | |||
199 | } | ||
200 | |||
201 | static struct { | ||
202 | int type; | ||
203 | const char *name; | ||
204 | } exception_names[] = { | ||
205 | { EX_StackOver, "stack overflow" }, | ||
206 | { EX_StackUnder, "stack underflow" }, | ||
207 | { EX_Precision, "loss of precision" }, | ||
208 | { EX_Underflow, "underflow" }, | ||
209 | { EX_Overflow, "overflow" }, | ||
210 | { EX_ZeroDiv, "divide by zero" }, | ||
211 | { EX_Denormal, "denormalized operand" }, | ||
212 | { EX_Invalid, "invalid operation" }, | ||
213 | { EX_INTERNAL, "INTERNAL BUG in "FPU_VERSION }, | ||
214 | { 0, NULL } | ||
215 | }; | ||
216 | |||
217 | /* | ||
218 | EX_INTERNAL is always given with a code which indicates where the | ||
219 | error was detected. | ||
220 | |||
221 | Internal error types: | ||
222 | 0x14 in fpu_etc.c | ||
223 | 0x1nn in a *.c file: | ||
224 | 0x101 in reg_add_sub.c | ||
225 | 0x102 in reg_mul.c | ||
226 | 0x104 in poly_atan.c | ||
227 | 0x105 in reg_mul.c | ||
228 | 0x107 in fpu_trig.c | ||
229 | 0x108 in reg_compare.c | ||
230 | 0x109 in reg_compare.c | ||
231 | 0x110 in reg_add_sub.c | ||
232 | 0x111 in fpe_entry.c | ||
233 | 0x112 in fpu_trig.c | ||
234 | 0x113 in errors.c | ||
235 | 0x115 in fpu_trig.c | ||
236 | 0x116 in fpu_trig.c | ||
237 | 0x117 in fpu_trig.c | ||
238 | 0x118 in fpu_trig.c | ||
239 | 0x119 in fpu_trig.c | ||
240 | 0x120 in poly_atan.c | ||
241 | 0x121 in reg_compare.c | ||
242 | 0x122 in reg_compare.c | ||
243 | 0x123 in reg_compare.c | ||
244 | 0x125 in fpu_trig.c | ||
245 | 0x126 in fpu_entry.c | ||
246 | 0x127 in poly_2xm1.c | ||
247 | 0x128 in fpu_entry.c | ||
248 | 0x129 in fpu_entry.c | ||
249 | 0x130 in get_address.c | ||
250 | 0x131 in get_address.c | ||
251 | 0x132 in get_address.c | ||
252 | 0x133 in get_address.c | ||
253 | 0x140 in load_store.c | ||
254 | 0x141 in load_store.c | ||
255 | 0x150 in poly_sin.c | ||
256 | 0x151 in poly_sin.c | ||
257 | 0x160 in reg_ld_str.c | ||
258 | 0x161 in reg_ld_str.c | ||
259 | 0x162 in reg_ld_str.c | ||
260 | 0x163 in reg_ld_str.c | ||
261 | 0x164 in reg_ld_str.c | ||
262 | 0x170 in fpu_tags.c | ||
263 | 0x171 in fpu_tags.c | ||
264 | 0x172 in fpu_tags.c | ||
265 | 0x180 in reg_convert.c | ||
266 | 0x2nn in an *.S file: | ||
267 | 0x201 in reg_u_add.S | ||
268 | 0x202 in reg_u_div.S | ||
269 | 0x203 in reg_u_div.S | ||
270 | 0x204 in reg_u_div.S | ||
271 | 0x205 in reg_u_mul.S | ||
272 | 0x206 in reg_u_sub.S | ||
273 | 0x207 in wm_sqrt.S | ||
274 | 0x208 in reg_div.S | ||
275 | 0x209 in reg_u_sub.S | ||
276 | 0x210 in reg_u_sub.S | ||
277 | 0x211 in reg_u_sub.S | ||
278 | 0x212 in reg_u_sub.S | ||
279 | 0x213 in wm_sqrt.S | ||
280 | 0x214 in wm_sqrt.S | ||
281 | 0x215 in wm_sqrt.S | ||
282 | 0x220 in reg_norm.S | ||
283 | 0x221 in reg_norm.S | ||
284 | 0x230 in reg_round.S | ||
285 | 0x231 in reg_round.S | ||
286 | 0x232 in reg_round.S | ||
287 | 0x233 in reg_round.S | ||
288 | 0x234 in reg_round.S | ||
289 | 0x235 in reg_round.S | ||
290 | 0x236 in reg_round.S | ||
291 | 0x240 in div_Xsig.S | ||
292 | 0x241 in div_Xsig.S | ||
293 | 0x242 in div_Xsig.S | ||
294 | */ | ||
295 | |||
296 | asmlinkage void FPU_exception(int n) | ||
297 | { | ||
298 | int i, int_type; | ||
299 | |||
300 | int_type = 0; /* Needed only to stop compiler warnings */ | ||
301 | if ( n & EX_INTERNAL ) | ||
302 | { | ||
303 | int_type = n - EX_INTERNAL; | ||
304 | n = EX_INTERNAL; | ||
305 | /* Set lots of exception bits! */ | ||
306 | partial_status |= (SW_Exc_Mask | SW_Summary | SW_Backward); | ||
307 | } | ||
308 | else | ||
309 | { | ||
310 | /* Extract only the bits which we use to set the status word */ | ||
311 | n &= (SW_Exc_Mask); | ||
312 | /* Set the corresponding exception bit */ | ||
313 | partial_status |= n; | ||
314 | /* Set summary bits iff exception isn't masked */ | ||
315 | if ( partial_status & ~control_word & CW_Exceptions ) | ||
316 | partial_status |= (SW_Summary | SW_Backward); | ||
317 | if ( n & (SW_Stack_Fault | EX_Precision) ) | ||
318 | { | ||
319 | if ( !(n & SW_C1) ) | ||
320 | /* This bit distinguishes over- from underflow for a stack fault, | ||
321 | and roundup from round-down for precision loss. */ | ||
322 | partial_status &= ~SW_C1; | ||
323 | } | ||
324 | } | ||
325 | |||
326 | RE_ENTRANT_CHECK_OFF; | ||
327 | if ( (~control_word & n & CW_Exceptions) || (n == EX_INTERNAL) ) | ||
328 | { | ||
329 | #ifdef PRINT_MESSAGES | ||
330 | /* My message from the sponsor */ | ||
331 | printk(FPU_VERSION" "__DATE__" (C) W. Metzenthen.\n"); | ||
332 | #endif /* PRINT_MESSAGES */ | ||
333 | |||
334 | /* Get a name string for error reporting */ | ||
335 | for (i=0; exception_names[i].type; i++) | ||
336 | if ( (exception_names[i].type & n) == exception_names[i].type ) | ||
337 | break; | ||
338 | |||
339 | if (exception_names[i].type) | ||
340 | { | ||
341 | #ifdef PRINT_MESSAGES | ||
342 | printk("FP Exception: %s!\n", exception_names[i].name); | ||
343 | #endif /* PRINT_MESSAGES */ | ||
344 | } | ||
345 | else | ||
346 | printk("FPU emulator: Unknown Exception: 0x%04x!\n", n); | ||
347 | |||
348 | if ( n == EX_INTERNAL ) | ||
349 | { | ||
350 | printk("FPU emulator: Internal error type 0x%04x\n", int_type); | ||
351 | FPU_printall(); | ||
352 | } | ||
353 | #ifdef PRINT_MESSAGES | ||
354 | else | ||
355 | FPU_printall(); | ||
356 | #endif /* PRINT_MESSAGES */ | ||
357 | |||
358 | /* | ||
359 | * The 80486 generates an interrupt on the next non-control FPU | ||
360 | * instruction. So we need some means of flagging it. | ||
361 | * We use the ES (Error Summary) bit for this. | ||
362 | */ | ||
363 | } | ||
364 | RE_ENTRANT_CHECK_ON; | ||
365 | |||
366 | #ifdef __DEBUG__ | ||
367 | math_abort(FPU_info,SIGFPE); | ||
368 | #endif /* __DEBUG__ */ | ||
369 | |||
370 | } | ||
371 | |||
372 | |||
373 | /* Real operation attempted on a NaN. */ | ||
374 | /* Returns < 0 if the exception is unmasked */ | ||
375 | int real_1op_NaN(FPU_REG *a) | ||
376 | { | ||
377 | int signalling, isNaN; | ||
378 | |||
379 | isNaN = (exponent(a) == EXP_OVER) && (a->sigh & 0x80000000); | ||
380 | |||
381 | /* The default result for the case of two "equal" NaNs (signs may | ||
382 | differ) is chosen to reproduce 80486 behaviour */ | ||
383 | signalling = isNaN && !(a->sigh & 0x40000000); | ||
384 | |||
385 | if ( !signalling ) | ||
386 | { | ||
387 | if ( !isNaN ) /* pseudo-NaN, or other unsupported? */ | ||
388 | { | ||
389 | if ( control_word & CW_Invalid ) | ||
390 | { | ||
391 | /* Masked response */ | ||
392 | reg_copy(&CONST_QNaN, a); | ||
393 | } | ||
394 | EXCEPTION(EX_Invalid); | ||
395 | return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special; | ||
396 | } | ||
397 | return TAG_Special; | ||
398 | } | ||
399 | |||
400 | if ( control_word & CW_Invalid ) | ||
401 | { | ||
402 | /* The masked response */ | ||
403 | if ( !(a->sigh & 0x80000000) ) /* pseudo-NaN ? */ | ||
404 | { | ||
405 | reg_copy(&CONST_QNaN, a); | ||
406 | } | ||
407 | /* ensure a Quiet NaN */ | ||
408 | a->sigh |= 0x40000000; | ||
409 | } | ||
410 | |||
411 | EXCEPTION(EX_Invalid); | ||
412 | |||
413 | return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special; | ||
414 | } | ||
415 | |||
416 | |||
417 | /* Real operation attempted on two operands, one a NaN. */ | ||
418 | /* Returns < 0 if the exception is unmasked */ | ||
419 | int real_2op_NaN(FPU_REG const *b, u_char tagb, | ||
420 | int deststnr, | ||
421 | FPU_REG const *defaultNaN) | ||
422 | { | ||
423 | FPU_REG *dest = &st(deststnr); | ||
424 | FPU_REG const *a = dest; | ||
425 | u_char taga = FPU_gettagi(deststnr); | ||
426 | FPU_REG const *x; | ||
427 | int signalling, unsupported; | ||
428 | |||
429 | if ( taga == TAG_Special ) | ||
430 | taga = FPU_Special(a); | ||
431 | if ( tagb == TAG_Special ) | ||
432 | tagb = FPU_Special(b); | ||
433 | |||
434 | /* TW_NaN is also used for unsupported data types. */ | ||
435 | unsupported = ((taga == TW_NaN) | ||
436 | && !((exponent(a) == EXP_OVER) && (a->sigh & 0x80000000))) | ||
437 | || ((tagb == TW_NaN) | ||
438 | && !((exponent(b) == EXP_OVER) && (b->sigh & 0x80000000))); | ||
439 | if ( unsupported ) | ||
440 | { | ||
441 | if ( control_word & CW_Invalid ) | ||
442 | { | ||
443 | /* Masked response */ | ||
444 | FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr); | ||
445 | } | ||
446 | EXCEPTION(EX_Invalid); | ||
447 | return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special; | ||
448 | } | ||
449 | |||
450 | if (taga == TW_NaN) | ||
451 | { | ||
452 | x = a; | ||
453 | if (tagb == TW_NaN) | ||
454 | { | ||
455 | signalling = !(a->sigh & b->sigh & 0x40000000); | ||
456 | if ( significand(b) > significand(a) ) | ||
457 | x = b; | ||
458 | else if ( significand(b) == significand(a) ) | ||
459 | { | ||
460 | /* The default result for the case of two "equal" NaNs (signs may | ||
461 | differ) is chosen to reproduce 80486 behaviour */ | ||
462 | x = defaultNaN; | ||
463 | } | ||
464 | } | ||
465 | else | ||
466 | { | ||
467 | /* return the quiet version of the NaN in a */ | ||
468 | signalling = !(a->sigh & 0x40000000); | ||
469 | } | ||
470 | } | ||
471 | else | ||
472 | #ifdef PARANOID | ||
473 | if (tagb == TW_NaN) | ||
474 | #endif /* PARANOID */ | ||
475 | { | ||
476 | signalling = !(b->sigh & 0x40000000); | ||
477 | x = b; | ||
478 | } | ||
479 | #ifdef PARANOID | ||
480 | else | ||
481 | { | ||
482 | signalling = 0; | ||
483 | EXCEPTION(EX_INTERNAL|0x113); | ||
484 | x = &CONST_QNaN; | ||
485 | } | ||
486 | #endif /* PARANOID */ | ||
487 | |||
488 | if ( (!signalling) || (control_word & CW_Invalid) ) | ||
489 | { | ||
490 | if ( ! x ) | ||
491 | x = b; | ||
492 | |||
493 | if ( !(x->sigh & 0x80000000) ) /* pseudo-NaN ? */ | ||
494 | x = &CONST_QNaN; | ||
495 | |||
496 | FPU_copy_to_regi(x, TAG_Special, deststnr); | ||
497 | |||
498 | if ( !signalling ) | ||
499 | return TAG_Special; | ||
500 | |||
501 | /* ensure a Quiet NaN */ | ||
502 | dest->sigh |= 0x40000000; | ||
503 | } | ||
504 | |||
505 | EXCEPTION(EX_Invalid); | ||
506 | |||
507 | return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special; | ||
508 | } | ||
509 | |||
510 | |||
511 | /* Invalid arith operation on Valid registers */ | ||
512 | /* Returns < 0 if the exception is unmasked */ | ||
513 | asmlinkage int arith_invalid(int deststnr) | ||
514 | { | ||
515 | |||
516 | EXCEPTION(EX_Invalid); | ||
517 | |||
518 | if ( control_word & CW_Invalid ) | ||
519 | { | ||
520 | /* The masked response */ | ||
521 | FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr); | ||
522 | } | ||
523 | |||
524 | return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Valid; | ||
525 | |||
526 | } | ||
527 | |||
528 | |||
529 | /* Divide a finite number by zero */ | ||
530 | asmlinkage int FPU_divide_by_zero(int deststnr, u_char sign) | ||
531 | { | ||
532 | FPU_REG *dest = &st(deststnr); | ||
533 | int tag = TAG_Valid; | ||
534 | |||
535 | if ( control_word & CW_ZeroDiv ) | ||
536 | { | ||
537 | /* The masked response */ | ||
538 | FPU_copy_to_regi(&CONST_INF, TAG_Special, deststnr); | ||
539 | setsign(dest, sign); | ||
540 | tag = TAG_Special; | ||
541 | } | ||
542 | |||
543 | EXCEPTION(EX_ZeroDiv); | ||
544 | |||
545 | return (!(control_word & CW_ZeroDiv) ? FPU_Exception : 0) | tag; | ||
546 | |||
547 | } | ||
548 | |||
549 | |||
550 | /* This may be called often, so keep it lean */ | ||
551 | int set_precision_flag(int flags) | ||
552 | { | ||
553 | if ( control_word & CW_Precision ) | ||
554 | { | ||
555 | partial_status &= ~(SW_C1 & flags); | ||
556 | partial_status |= flags; /* The masked response */ | ||
557 | return 0; | ||
558 | } | ||
559 | else | ||
560 | { | ||
561 | EXCEPTION(flags); | ||
562 | return 1; | ||
563 | } | ||
564 | } | ||
565 | |||
566 | |||
567 | /* This may be called often, so keep it lean */ | ||
568 | asmlinkage void set_precision_flag_up(void) | ||
569 | { | ||
570 | if ( control_word & CW_Precision ) | ||
571 | partial_status |= (SW_Precision | SW_C1); /* The masked response */ | ||
572 | else | ||
573 | EXCEPTION(EX_Precision | SW_C1); | ||
574 | } | ||
575 | |||
576 | |||
577 | /* This may be called often, so keep it lean */ | ||
578 | asmlinkage void set_precision_flag_down(void) | ||
579 | { | ||
580 | if ( control_word & CW_Precision ) | ||
581 | { /* The masked response */ | ||
582 | partial_status &= ~SW_C1; | ||
583 | partial_status |= SW_Precision; | ||
584 | } | ||
585 | else | ||
586 | EXCEPTION(EX_Precision); | ||
587 | } | ||
588 | |||
589 | |||
590 | asmlinkage int denormal_operand(void) | ||
591 | { | ||
592 | if ( control_word & CW_Denormal ) | ||
593 | { /* The masked response */ | ||
594 | partial_status |= SW_Denorm_Op; | ||
595 | return TAG_Special; | ||
596 | } | ||
597 | else | ||
598 | { | ||
599 | EXCEPTION(EX_Denormal); | ||
600 | return TAG_Special | FPU_Exception; | ||
601 | } | ||
602 | } | ||
603 | |||
604 | |||
605 | asmlinkage int arith_overflow(FPU_REG *dest) | ||
606 | { | ||
607 | int tag = TAG_Valid; | ||
608 | |||
609 | if ( control_word & CW_Overflow ) | ||
610 | { | ||
611 | /* The masked response */ | ||
612 | /* ###### The response here depends upon the rounding mode */ | ||
613 | reg_copy(&CONST_INF, dest); | ||
614 | tag = TAG_Special; | ||
615 | } | ||
616 | else | ||
617 | { | ||
618 | /* Subtract the magic number from the exponent */ | ||
619 | addexponent(dest, (-3 * (1 << 13))); | ||
620 | } | ||
621 | |||
622 | EXCEPTION(EX_Overflow); | ||
623 | if ( control_word & CW_Overflow ) | ||
624 | { | ||
625 | /* The overflow exception is masked. */ | ||
626 | /* By definition, precision is lost. | ||
627 | The roundup bit (C1) is also set because we have | ||
628 | "rounded" upwards to Infinity. */ | ||
629 | EXCEPTION(EX_Precision | SW_C1); | ||
630 | return tag; | ||
631 | } | ||
632 | |||
633 | return tag; | ||
634 | |||
635 | } | ||
636 | |||
637 | |||
638 | asmlinkage int arith_underflow(FPU_REG *dest) | ||
639 | { | ||
640 | int tag = TAG_Valid; | ||
641 | |||
642 | if ( control_word & CW_Underflow ) | ||
643 | { | ||
644 | /* The masked response */ | ||
645 | if ( exponent16(dest) <= EXP_UNDER - 63 ) | ||
646 | { | ||
647 | reg_copy(&CONST_Z, dest); | ||
648 | partial_status &= ~SW_C1; /* Round down. */ | ||
649 | tag = TAG_Zero; | ||
650 | } | ||
651 | else | ||
652 | { | ||
653 | stdexp(dest); | ||
654 | } | ||
655 | } | ||
656 | else | ||
657 | { | ||
658 | /* Add the magic number to the exponent. */ | ||
659 | addexponent(dest, (3 * (1 << 13)) + EXTENDED_Ebias); | ||
660 | } | ||
661 | |||
662 | EXCEPTION(EX_Underflow); | ||
663 | if ( control_word & CW_Underflow ) | ||
664 | { | ||
665 | /* The underflow exception is masked. */ | ||
666 | EXCEPTION(EX_Precision); | ||
667 | return tag; | ||
668 | } | ||
669 | |||
670 | return tag; | ||
671 | |||
672 | } | ||
673 | |||
674 | |||
675 | void FPU_stack_overflow(void) | ||
676 | { | ||
677 | |||
678 | if ( control_word & CW_Invalid ) | ||
679 | { | ||
680 | /* The masked response */ | ||
681 | top--; | ||
682 | FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); | ||
683 | } | ||
684 | |||
685 | EXCEPTION(EX_StackOver); | ||
686 | |||
687 | return; | ||
688 | |||
689 | } | ||
690 | |||
691 | |||
692 | void FPU_stack_underflow(void) | ||
693 | { | ||
694 | |||
695 | if ( control_word & CW_Invalid ) | ||
696 | { | ||
697 | /* The masked response */ | ||
698 | FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); | ||
699 | } | ||
700 | |||
701 | EXCEPTION(EX_StackUnder); | ||
702 | |||
703 | return; | ||
704 | |||
705 | } | ||
706 | |||
707 | |||
708 | void FPU_stack_underflow_i(int i) | ||
709 | { | ||
710 | |||
711 | if ( control_word & CW_Invalid ) | ||
712 | { | ||
713 | /* The masked response */ | ||
714 | FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i); | ||
715 | } | ||
716 | |||
717 | EXCEPTION(EX_StackUnder); | ||
718 | |||
719 | return; | ||
720 | |||
721 | } | ||
722 | |||
723 | |||
724 | void FPU_stack_underflow_pop(int i) | ||
725 | { | ||
726 | |||
727 | if ( control_word & CW_Invalid ) | ||
728 | { | ||
729 | /* The masked response */ | ||
730 | FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i); | ||
731 | FPU_pop(); | ||
732 | } | ||
733 | |||
734 | EXCEPTION(EX_StackUnder); | ||
735 | |||
736 | return; | ||
737 | |||
738 | } | ||
739 | |||
diff --git a/arch/x86/math-emu/exception.h b/arch/x86/math-emu/exception.h new file mode 100644 index 000000000000..b463f21a811e --- /dev/null +++ b/arch/x86/math-emu/exception.h | |||
@@ -0,0 +1,53 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | exception.h | | ||
3 | | | | ||
4 | | Copyright (C) 1992 W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | ||
5 | | Australia. E-mail billm@vaxc.cc.monash.edu.au | | ||
6 | | | | ||
7 | +---------------------------------------------------------------------------*/ | ||
8 | |||
9 | #ifndef _EXCEPTION_H_ | ||
10 | #define _EXCEPTION_H_ | ||
11 | |||
12 | |||
13 | #ifdef __ASSEMBLY__ | ||
14 | #define Const_(x) $##x | ||
15 | #else | ||
16 | #define Const_(x) x | ||
17 | #endif | ||
18 | |||
19 | #ifndef SW_C1 | ||
20 | #include "fpu_emu.h" | ||
21 | #endif /* SW_C1 */ | ||
22 | |||
23 | #define FPU_BUSY Const_(0x8000) /* FPU busy bit (8087 compatibility) */ | ||
24 | #define EX_ErrorSummary Const_(0x0080) /* Error summary status */ | ||
25 | /* Special exceptions: */ | ||
26 | #define EX_INTERNAL Const_(0x8000) /* Internal error in wm-FPU-emu */ | ||
27 | #define EX_StackOver Const_(0x0041|SW_C1) /* stack overflow */ | ||
28 | #define EX_StackUnder Const_(0x0041) /* stack underflow */ | ||
29 | /* Exception flags: */ | ||
30 | #define EX_Precision Const_(0x0020) /* loss of precision */ | ||
31 | #define EX_Underflow Const_(0x0010) /* underflow */ | ||
32 | #define EX_Overflow Const_(0x0008) /* overflow */ | ||
33 | #define EX_ZeroDiv Const_(0x0004) /* divide by zero */ | ||
34 | #define EX_Denormal Const_(0x0002) /* denormalized operand */ | ||
35 | #define EX_Invalid Const_(0x0001) /* invalid operation */ | ||
36 | |||
37 | |||
38 | #define PRECISION_LOST_UP Const_((EX_Precision | SW_C1)) | ||
39 | #define PRECISION_LOST_DOWN Const_(EX_Precision) | ||
40 | |||
41 | |||
42 | #ifndef __ASSEMBLY__ | ||
43 | |||
44 | #ifdef DEBUG | ||
45 | #define EXCEPTION(x) { printk("exception in %s at line %d\n", \ | ||
46 | __FILE__, __LINE__); FPU_exception(x); } | ||
47 | #else | ||
48 | #define EXCEPTION(x) FPU_exception(x) | ||
49 | #endif | ||
50 | |||
51 | #endif /* __ASSEMBLY__ */ | ||
52 | |||
53 | #endif /* _EXCEPTION_H_ */ | ||
diff --git a/arch/x86/math-emu/fpu_arith.c b/arch/x86/math-emu/fpu_arith.c new file mode 100644 index 000000000000..6972dec01af6 --- /dev/null +++ b/arch/x86/math-emu/fpu_arith.c | |||
@@ -0,0 +1,174 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | fpu_arith.c | | ||
3 | | | | ||
4 | | Code to implement the FPU register/register arithmetic instructions | | ||
5 | | | | ||
6 | | Copyright (C) 1992,1993,1997 | | ||
7 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | | ||
8 | | E-mail billm@suburbia.net | | ||
9 | | | | ||
10 | | | | ||
11 | +---------------------------------------------------------------------------*/ | ||
12 | |||
13 | #include "fpu_system.h" | ||
14 | #include "fpu_emu.h" | ||
15 | #include "control_w.h" | ||
16 | #include "status_w.h" | ||
17 | |||
18 | |||
19 | void fadd__(void) | ||
20 | { | ||
21 | /* fadd st,st(i) */ | ||
22 | int i = FPU_rm; | ||
23 | clear_C1(); | ||
24 | FPU_add(&st(i), FPU_gettagi(i), 0, control_word); | ||
25 | } | ||
26 | |||
27 | |||
28 | void fmul__(void) | ||
29 | { | ||
30 | /* fmul st,st(i) */ | ||
31 | int i = FPU_rm; | ||
32 | clear_C1(); | ||
33 | FPU_mul(&st(i), FPU_gettagi(i), 0, control_word); | ||
34 | } | ||
35 | |||
36 | |||
37 | |||
38 | void fsub__(void) | ||
39 | { | ||
40 | /* fsub st,st(i) */ | ||
41 | clear_C1(); | ||
42 | FPU_sub(0, FPU_rm, control_word); | ||
43 | } | ||
44 | |||
45 | |||
46 | void fsubr_(void) | ||
47 | { | ||
48 | /* fsubr st,st(i) */ | ||
49 | clear_C1(); | ||
50 | FPU_sub(REV, FPU_rm, control_word); | ||
51 | } | ||
52 | |||
53 | |||
54 | void fdiv__(void) | ||
55 | { | ||
56 | /* fdiv st,st(i) */ | ||
57 | clear_C1(); | ||
58 | FPU_div(0, FPU_rm, control_word); | ||
59 | } | ||
60 | |||
61 | |||
62 | void fdivr_(void) | ||
63 | { | ||
64 | /* fdivr st,st(i) */ | ||
65 | clear_C1(); | ||
66 | FPU_div(REV, FPU_rm, control_word); | ||
67 | } | ||
68 | |||
69 | |||
70 | |||
71 | void fadd_i(void) | ||
72 | { | ||
73 | /* fadd st(i),st */ | ||
74 | int i = FPU_rm; | ||
75 | clear_C1(); | ||
76 | FPU_add(&st(i), FPU_gettagi(i), i, control_word); | ||
77 | } | ||
78 | |||
79 | |||
80 | void fmul_i(void) | ||
81 | { | ||
82 | /* fmul st(i),st */ | ||
83 | clear_C1(); | ||
84 | FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word); | ||
85 | } | ||
86 | |||
87 | |||
88 | void fsubri(void) | ||
89 | { | ||
90 | /* fsubr st(i),st */ | ||
91 | clear_C1(); | ||
92 | FPU_sub(DEST_RM, FPU_rm, control_word); | ||
93 | } | ||
94 | |||
95 | |||
96 | void fsub_i(void) | ||
97 | { | ||
98 | /* fsub st(i),st */ | ||
99 | clear_C1(); | ||
100 | FPU_sub(REV|DEST_RM, FPU_rm, control_word); | ||
101 | } | ||
102 | |||
103 | |||
104 | void fdivri(void) | ||
105 | { | ||
106 | /* fdivr st(i),st */ | ||
107 | clear_C1(); | ||
108 | FPU_div(DEST_RM, FPU_rm, control_word); | ||
109 | } | ||
110 | |||
111 | |||
112 | void fdiv_i(void) | ||
113 | { | ||
114 | /* fdiv st(i),st */ | ||
115 | clear_C1(); | ||
116 | FPU_div(REV|DEST_RM, FPU_rm, control_word); | ||
117 | } | ||
118 | |||
119 | |||
120 | |||
121 | void faddp_(void) | ||
122 | { | ||
123 | /* faddp st(i),st */ | ||
124 | int i = FPU_rm; | ||
125 | clear_C1(); | ||
126 | if ( FPU_add(&st(i), FPU_gettagi(i), i, control_word) >= 0 ) | ||
127 | FPU_pop(); | ||
128 | } | ||
129 | |||
130 | |||
131 | void fmulp_(void) | ||
132 | { | ||
133 | /* fmulp st(i),st */ | ||
134 | clear_C1(); | ||
135 | if ( FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word) >= 0 ) | ||
136 | FPU_pop(); | ||
137 | } | ||
138 | |||
139 | |||
140 | |||
141 | void fsubrp(void) | ||
142 | { | ||
143 | /* fsubrp st(i),st */ | ||
144 | clear_C1(); | ||
145 | if ( FPU_sub(DEST_RM, FPU_rm, control_word) >= 0 ) | ||
146 | FPU_pop(); | ||
147 | } | ||
148 | |||
149 | |||
150 | void fsubp_(void) | ||
151 | { | ||
152 | /* fsubp st(i),st */ | ||
153 | clear_C1(); | ||
154 | if ( FPU_sub(REV|DEST_RM, FPU_rm, control_word) >= 0 ) | ||
155 | FPU_pop(); | ||
156 | } | ||
157 | |||
158 | |||
159 | void fdivrp(void) | ||
160 | { | ||
161 | /* fdivrp st(i),st */ | ||
162 | clear_C1(); | ||
163 | if ( FPU_div(DEST_RM, FPU_rm, control_word) >= 0 ) | ||
164 | FPU_pop(); | ||
165 | } | ||
166 | |||
167 | |||
168 | void fdivp_(void) | ||
169 | { | ||
170 | /* fdivp st(i),st */ | ||
171 | clear_C1(); | ||
172 | if ( FPU_div(REV|DEST_RM, FPU_rm, control_word) >= 0 ) | ||
173 | FPU_pop(); | ||
174 | } | ||
diff --git a/arch/x86/math-emu/fpu_asm.h b/arch/x86/math-emu/fpu_asm.h new file mode 100644 index 000000000000..9ba12416df12 --- /dev/null +++ b/arch/x86/math-emu/fpu_asm.h | |||
@@ -0,0 +1,32 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | fpu_asm.h | | ||
3 | | | | ||
4 | | Copyright (C) 1992,1995,1997 | | ||
5 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | ||
6 | | Australia. E-mail billm@suburbia.net | | ||
7 | | | | ||
8 | +---------------------------------------------------------------------------*/ | ||
9 | |||
10 | #ifndef _FPU_ASM_H_ | ||
11 | #define _FPU_ASM_H_ | ||
12 | |||
13 | #include <linux/linkage.h> | ||
14 | |||
15 | #define EXCEPTION FPU_exception | ||
16 | |||
17 | |||
18 | #define PARAM1 8(%ebp) | ||
19 | #define PARAM2 12(%ebp) | ||
20 | #define PARAM3 16(%ebp) | ||
21 | #define PARAM4 20(%ebp) | ||
22 | #define PARAM5 24(%ebp) | ||
23 | #define PARAM6 28(%ebp) | ||
24 | #define PARAM7 32(%ebp) | ||
25 | |||
26 | #define SIGL_OFFSET 0 | ||
27 | #define EXP(x) 8(x) | ||
28 | #define SIG(x) SIGL_OFFSET##(x) | ||
29 | #define SIGL(x) SIGL_OFFSET##(x) | ||
30 | #define SIGH(x) 4(x) | ||
31 | |||
32 | #endif /* _FPU_ASM_H_ */ | ||
diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c new file mode 100644 index 000000000000..20886cfb9f76 --- /dev/null +++ b/arch/x86/math-emu/fpu_aux.c | |||
@@ -0,0 +1,204 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | fpu_aux.c | | ||
3 | | | | ||
4 | | Code to implement some of the FPU auxiliary instructions. | | ||
5 | | | | ||
6 | | Copyright (C) 1992,1993,1994,1997 | | ||
7 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | | ||
8 | | E-mail billm@suburbia.net | | ||
9 | | | | ||
10 | | | | ||
11 | +---------------------------------------------------------------------------*/ | ||
12 | |||
13 | #include "fpu_system.h" | ||
14 | #include "exception.h" | ||
15 | #include "fpu_emu.h" | ||
16 | #include "status_w.h" | ||
17 | #include "control_w.h" | ||
18 | |||
19 | |||
20 | static void fnop(void) | ||
21 | { | ||
22 | } | ||
23 | |||
24 | static void fclex(void) | ||
25 | { | ||
26 | partial_status &= ~(SW_Backward|SW_Summary|SW_Stack_Fault|SW_Precision| | ||
27 | SW_Underflow|SW_Overflow|SW_Zero_Div|SW_Denorm_Op| | ||
28 | SW_Invalid); | ||
29 | no_ip_update = 1; | ||
30 | } | ||
31 | |||
32 | /* Needs to be externally visible */ | ||
33 | void finit(void) | ||
34 | { | ||
35 | control_word = 0x037f; | ||
36 | partial_status = 0; | ||
37 | top = 0; /* We don't keep top in the status word internally. */ | ||
38 | fpu_tag_word = 0xffff; | ||
39 | /* The behaviour is different from that detailed in | ||
40 | Section 15.1.6 of the Intel manual */ | ||
41 | operand_address.offset = 0; | ||
42 | operand_address.selector = 0; | ||
43 | instruction_address.offset = 0; | ||
44 | instruction_address.selector = 0; | ||
45 | instruction_address.opcode = 0; | ||
46 | no_ip_update = 1; | ||
47 | } | ||
48 | |||
49 | /* | ||
50 | * These are nops on the i387.. | ||
51 | */ | ||
52 | #define feni fnop | ||
53 | #define fdisi fnop | ||
54 | #define fsetpm fnop | ||
55 | |||
56 | static FUNC const finit_table[] = { | ||
57 | feni, fdisi, fclex, finit, | ||
58 | fsetpm, FPU_illegal, FPU_illegal, FPU_illegal | ||
59 | }; | ||
60 | |||
61 | void finit_(void) | ||
62 | { | ||
63 | (finit_table[FPU_rm])(); | ||
64 | } | ||
65 | |||
66 | |||
67 | static void fstsw_ax(void) | ||
68 | { | ||
69 | *(short *) &FPU_EAX = status_word(); | ||
70 | no_ip_update = 1; | ||
71 | } | ||
72 | |||
73 | static FUNC const fstsw_table[] = { | ||
74 | fstsw_ax, FPU_illegal, FPU_illegal, FPU_illegal, | ||
75 | FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal | ||
76 | }; | ||
77 | |||
78 | void fstsw_(void) | ||
79 | { | ||
80 | (fstsw_table[FPU_rm])(); | ||
81 | } | ||
82 | |||
83 | |||
84 | static FUNC const fp_nop_table[] = { | ||
85 | fnop, FPU_illegal, FPU_illegal, FPU_illegal, | ||
86 | FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal | ||
87 | }; | ||
88 | |||
89 | void fp_nop(void) | ||
90 | { | ||
91 | (fp_nop_table[FPU_rm])(); | ||
92 | } | ||
93 | |||
94 | |||
95 | void fld_i_(void) | ||
96 | { | ||
97 | FPU_REG *st_new_ptr; | ||
98 | int i; | ||
99 | u_char tag; | ||
100 | |||
101 | if ( STACK_OVERFLOW ) | ||
102 | { FPU_stack_overflow(); return; } | ||
103 | |||
104 | /* fld st(i) */ | ||
105 | i = FPU_rm; | ||
106 | if ( NOT_EMPTY(i) ) | ||
107 | { | ||
108 | reg_copy(&st(i), st_new_ptr); | ||
109 | tag = FPU_gettagi(i); | ||
110 | push(); | ||
111 | FPU_settag0(tag); | ||
112 | } | ||
113 | else | ||
114 | { | ||
115 | if ( control_word & CW_Invalid ) | ||
116 | { | ||
117 | /* The masked response */ | ||
118 | FPU_stack_underflow(); | ||
119 | } | ||
120 | else | ||
121 | EXCEPTION(EX_StackUnder); | ||
122 | } | ||
123 | |||
124 | } | ||
125 | |||
126 | |||
127 | void fxch_i(void) | ||
128 | { | ||
129 | /* fxch st(i) */ | ||
130 | FPU_REG t; | ||
131 | int i = FPU_rm; | ||
132 | FPU_REG *st0_ptr = &st(0), *sti_ptr = &st(i); | ||
133 | long tag_word = fpu_tag_word; | ||
134 | int regnr = top & 7, regnri = ((regnr + i) & 7); | ||
135 | u_char st0_tag = (tag_word >> (regnr*2)) & 3; | ||
136 | u_char sti_tag = (tag_word >> (regnri*2)) & 3; | ||
137 | |||
138 | if ( st0_tag == TAG_Empty ) | ||
139 | { | ||
140 | if ( sti_tag == TAG_Empty ) | ||
141 | { | ||
142 | FPU_stack_underflow(); | ||
143 | FPU_stack_underflow_i(i); | ||
144 | return; | ||
145 | } | ||
146 | if ( control_word & CW_Invalid ) | ||
147 | { | ||
148 | /* Masked response */ | ||
149 | FPU_copy_to_reg0(sti_ptr, sti_tag); | ||
150 | } | ||
151 | FPU_stack_underflow_i(i); | ||
152 | return; | ||
153 | } | ||
154 | if ( sti_tag == TAG_Empty ) | ||
155 | { | ||
156 | if ( control_word & CW_Invalid ) | ||
157 | { | ||
158 | /* Masked response */ | ||
159 | FPU_copy_to_regi(st0_ptr, st0_tag, i); | ||
160 | } | ||
161 | FPU_stack_underflow(); | ||
162 | return; | ||
163 | } | ||
164 | clear_C1(); | ||
165 | |||
166 | reg_copy(st0_ptr, &t); | ||
167 | reg_copy(sti_ptr, st0_ptr); | ||
168 | reg_copy(&t, sti_ptr); | ||
169 | |||
170 | tag_word &= ~(3 << (regnr*2)) & ~(3 << (regnri*2)); | ||
171 | tag_word |= (sti_tag << (regnr*2)) | (st0_tag << (regnri*2)); | ||
172 | fpu_tag_word = tag_word; | ||
173 | } | ||
174 | |||
175 | |||
176 | void ffree_(void) | ||
177 | { | ||
178 | /* ffree st(i) */ | ||
179 | FPU_settagi(FPU_rm, TAG_Empty); | ||
180 | } | ||
181 | |||
182 | |||
183 | void ffreep(void) | ||
184 | { | ||
185 | /* ffree st(i) + pop - unofficial code */ | ||
186 | FPU_settagi(FPU_rm, TAG_Empty); | ||
187 | FPU_pop(); | ||
188 | } | ||
189 | |||
190 | |||
191 | void fst_i_(void) | ||
192 | { | ||
193 | /* fst st(i) */ | ||
194 | FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm); | ||
195 | } | ||
196 | |||
197 | |||
198 | void fstp_i(void) | ||
199 | { | ||
200 | /* fstp st(i) */ | ||
201 | FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm); | ||
202 | FPU_pop(); | ||
203 | } | ||
204 | |||
diff --git a/arch/x86/math-emu/fpu_emu.h b/arch/x86/math-emu/fpu_emu.h new file mode 100644 index 000000000000..65120f523853 --- /dev/null +++ b/arch/x86/math-emu/fpu_emu.h | |||
@@ -0,0 +1,218 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | fpu_emu.h | | ||
3 | | | | ||
4 | | Copyright (C) 1992,1993,1994,1997 | | ||
5 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | ||
6 | | Australia. E-mail billm@suburbia.net | | ||
7 | | | | ||
8 | +---------------------------------------------------------------------------*/ | ||
9 | |||
10 | |||
11 | #ifndef _FPU_EMU_H_ | ||
12 | #define _FPU_EMU_H_ | ||
13 | |||
14 | /* | ||
15 | * Define PECULIAR_486 to get a closer approximation to 80486 behaviour, | ||
16 | * rather than behaviour which appears to be cleaner. | ||
17 | * This is a matter of opinion: for all I know, the 80486 may simply | ||
18 | * be complying with the IEEE spec. Maybe one day I'll get to see the | ||
19 | * spec... | ||
20 | */ | ||
21 | #define PECULIAR_486 | ||
22 | |||
23 | #ifdef __ASSEMBLY__ | ||
24 | #include "fpu_asm.h" | ||
25 | #define Const(x) $##x | ||
26 | #else | ||
27 | #define Const(x) x | ||
28 | #endif | ||
29 | |||
30 | #define EXP_BIAS Const(0) | ||
31 | #define EXP_OVER Const(0x4000) /* smallest invalid large exponent */ | ||
32 | #define EXP_UNDER Const(-0x3fff) /* largest invalid small exponent */ | ||
33 | #define EXP_WAY_UNDER Const(-0x6000) /* Below the smallest denormal, but | ||
34 | still a 16 bit nr. */ | ||
35 | #define EXP_Infinity EXP_OVER | ||
36 | #define EXP_NaN EXP_OVER | ||
37 | |||
38 | #define EXTENDED_Ebias Const(0x3fff) | ||
39 | #define EXTENDED_Emin (-0x3ffe) /* smallest valid exponent */ | ||
40 | |||
41 | #define SIGN_POS Const(0) | ||
42 | #define SIGN_NEG Const(0x80) | ||
43 | |||
44 | #define SIGN_Positive Const(0) | ||
45 | #define SIGN_Negative Const(0x8000) | ||
46 | |||
47 | |||
48 | /* Keep the order TAG_Valid, TAG_Zero, TW_Denormal */ | ||
49 | /* The following fold to 2 (Special) in the Tag Word */ | ||
50 | #define TW_Denormal Const(4) /* De-normal */ | ||
51 | #define TW_Infinity Const(5) /* + or - infinity */ | ||
52 | #define TW_NaN Const(6) /* Not a Number */ | ||
53 | #define TW_Unsupported Const(7) /* Not supported by an 80486 */ | ||
54 | |||
55 | #define TAG_Valid Const(0) /* valid */ | ||
56 | #define TAG_Zero Const(1) /* zero */ | ||
57 | #define TAG_Special Const(2) /* De-normal, + or - infinity, | ||
58 | or Not a Number */ | ||
59 | #define TAG_Empty Const(3) /* empty */ | ||
60 | #define TAG_Error Const(0x80) /* probably need to abort */ | ||
61 | |||
62 | #define LOADED_DATA Const(10101) /* Special st() number to identify | ||
63 | loaded data (not on stack). */ | ||
64 | |||
65 | /* A few flags (must be >= 0x10). */ | ||
66 | #define REV 0x10 | ||
67 | #define DEST_RM 0x20 | ||
68 | #define LOADED 0x40 | ||
69 | |||
70 | #define FPU_Exception Const(0x80000000) /* Added to tag returns. */ | ||
71 | |||
72 | |||
73 | #ifndef __ASSEMBLY__ | ||
74 | |||
75 | #include "fpu_system.h" | ||
76 | |||
77 | #include <asm/sigcontext.h> /* for struct _fpstate */ | ||
78 | #include <asm/math_emu.h> | ||
79 | #include <linux/linkage.h> | ||
80 | |||
81 | /* | ||
82 | #define RE_ENTRANT_CHECKING | ||
83 | */ | ||
84 | |||
85 | #ifdef RE_ENTRANT_CHECKING | ||
86 | extern u_char emulating; | ||
87 | # define RE_ENTRANT_CHECK_OFF emulating = 0 | ||
88 | # define RE_ENTRANT_CHECK_ON emulating = 1 | ||
89 | #else | ||
90 | # define RE_ENTRANT_CHECK_OFF | ||
91 | # define RE_ENTRANT_CHECK_ON | ||
92 | #endif /* RE_ENTRANT_CHECKING */ | ||
93 | |||
94 | #define FWAIT_OPCODE 0x9b | ||
95 | #define OP_SIZE_PREFIX 0x66 | ||
96 | #define ADDR_SIZE_PREFIX 0x67 | ||
97 | #define PREFIX_CS 0x2e | ||
98 | #define PREFIX_DS 0x3e | ||
99 | #define PREFIX_ES 0x26 | ||
100 | #define PREFIX_SS 0x36 | ||
101 | #define PREFIX_FS 0x64 | ||
102 | #define PREFIX_GS 0x65 | ||
103 | #define PREFIX_REPE 0xf3 | ||
104 | #define PREFIX_REPNE 0xf2 | ||
105 | #define PREFIX_LOCK 0xf0 | ||
106 | #define PREFIX_CS_ 1 | ||
107 | #define PREFIX_DS_ 2 | ||
108 | #define PREFIX_ES_ 3 | ||
109 | #define PREFIX_FS_ 4 | ||
110 | #define PREFIX_GS_ 5 | ||
111 | #define PREFIX_SS_ 6 | ||
112 | #define PREFIX_DEFAULT 7 | ||
113 | |||
114 | struct address { | ||
115 | unsigned int offset; | ||
116 | unsigned int selector:16; | ||
117 | unsigned int opcode:11; | ||
118 | unsigned int empty:5; | ||
119 | }; | ||
120 | struct fpu__reg { | ||
121 | unsigned sigl; | ||
122 | unsigned sigh; | ||
123 | short exp; | ||
124 | }; | ||
125 | |||
126 | typedef void (*FUNC)(void); | ||
127 | typedef struct fpu__reg FPU_REG; | ||
128 | typedef void (*FUNC_ST0)(FPU_REG *st0_ptr, u_char st0_tag); | ||
129 | typedef struct { u_char address_size, operand_size, segment; } | ||
130 | overrides; | ||
131 | /* This structure is 32 bits: */ | ||
132 | typedef struct { overrides override; | ||
133 | u_char default_mode; } fpu_addr_modes; | ||
134 | /* PROTECTED has a restricted meaning in the emulator; it is used | ||
135 | to signal that the emulator needs to do special things to ensure | ||
136 | that protection is respected in a segmented model. */ | ||
137 | #define PROTECTED 4 | ||
138 | #define SIXTEEN 1 /* We rely upon this being 1 (true) */ | ||
139 | #define VM86 SIXTEEN | ||
140 | #define PM16 (SIXTEEN | PROTECTED) | ||
141 | #define SEG32 PROTECTED | ||
142 | extern u_char const data_sizes_16[32]; | ||
143 | |||
144 | #define register_base ((u_char *) registers ) | ||
145 | #define fpu_register(x) ( * ((FPU_REG *)( register_base + 10 * (x & 7) )) ) | ||
146 | #define st(x) ( * ((FPU_REG *)( register_base + 10 * ((top+x) & 7) )) ) | ||
147 | |||
148 | #define STACK_OVERFLOW (FPU_stackoverflow(&st_new_ptr)) | ||
149 | #define NOT_EMPTY(i) (!FPU_empty_i(i)) | ||
150 | |||
151 | #define NOT_EMPTY_ST0 (st0_tag ^ TAG_Empty) | ||
152 | |||
153 | #define poppop() { FPU_pop(); FPU_pop(); } | ||
154 | |||
155 | /* push() does not affect the tags */ | ||
156 | #define push() { top--; } | ||
157 | |||
158 | #define signbyte(a) (((u_char *)(a))[9]) | ||
159 | #define getsign(a) (signbyte(a) & 0x80) | ||
160 | #define setsign(a,b) { if (b) signbyte(a) |= 0x80; else signbyte(a) &= 0x7f; } | ||
161 | #define copysign(a,b) { if (getsign(a)) signbyte(b) |= 0x80; \ | ||
162 | else signbyte(b) &= 0x7f; } | ||
163 | #define changesign(a) { signbyte(a) ^= 0x80; } | ||
164 | #define setpositive(a) { signbyte(a) &= 0x7f; } | ||
165 | #define setnegative(a) { signbyte(a) |= 0x80; } | ||
166 | #define signpositive(a) ( (signbyte(a) & 0x80) == 0 ) | ||
167 | #define signnegative(a) (signbyte(a) & 0x80) | ||
168 | |||
169 | static inline void reg_copy(FPU_REG const *x, FPU_REG *y) | ||
170 | { | ||
171 | *(short *)&(y->exp) = *(const short *)&(x->exp); | ||
172 | *(long long *)&(y->sigl) = *(const long long *)&(x->sigl); | ||
173 | } | ||
174 | |||
175 | #define exponent(x) (((*(short *)&((x)->exp)) & 0x7fff) - EXTENDED_Ebias) | ||
176 | #define setexponentpos(x,y) { (*(short *)&((x)->exp)) = \ | ||
177 | ((y) + EXTENDED_Ebias) & 0x7fff; } | ||
178 | #define exponent16(x) (*(short *)&((x)->exp)) | ||
179 | #define setexponent16(x,y) { (*(short *)&((x)->exp)) = (y); } | ||
180 | #define addexponent(x,y) { (*(short *)&((x)->exp)) += (y); } | ||
181 | #define stdexp(x) { (*(short *)&((x)->exp)) += EXTENDED_Ebias; } | ||
182 | |||
183 | #define isdenormal(ptr) (exponent(ptr) == EXP_BIAS+EXP_UNDER) | ||
184 | |||
185 | #define significand(x) ( ((unsigned long long *)&((x)->sigl))[0] ) | ||
186 | |||
187 | |||
188 | /*----- Prototypes for functions written in assembler -----*/ | ||
189 | /* extern void reg_move(FPU_REG *a, FPU_REG *b); */ | ||
190 | |||
191 | asmlinkage int FPU_normalize(FPU_REG *x); | ||
192 | asmlinkage int FPU_normalize_nuo(FPU_REG *x); | ||
193 | asmlinkage int FPU_u_sub(FPU_REG const *arg1, FPU_REG const *arg2, | ||
194 | FPU_REG *answ, unsigned int control_w, u_char sign, | ||
195 | int expa, int expb); | ||
196 | asmlinkage int FPU_u_mul(FPU_REG const *arg1, FPU_REG const *arg2, | ||
197 | FPU_REG *answ, unsigned int control_w, u_char sign, | ||
198 | int expon); | ||
199 | asmlinkage int FPU_u_div(FPU_REG const *arg1, FPU_REG const *arg2, | ||
200 | FPU_REG *answ, unsigned int control_w, u_char sign); | ||
201 | asmlinkage int FPU_u_add(FPU_REG const *arg1, FPU_REG const *arg2, | ||
202 | FPU_REG *answ, unsigned int control_w, u_char sign, | ||
203 | int expa, int expb); | ||
204 | asmlinkage int wm_sqrt(FPU_REG *n, int dummy1, int dummy2, | ||
205 | unsigned int control_w, u_char sign); | ||
206 | asmlinkage unsigned FPU_shrx(void *l, unsigned x); | ||
207 | asmlinkage unsigned FPU_shrxs(void *v, unsigned x); | ||
208 | asmlinkage unsigned long FPU_div_small(unsigned long long *x, unsigned long y); | ||
209 | asmlinkage int FPU_round(FPU_REG *arg, unsigned int extent, int dummy, | ||
210 | unsigned int control_w, u_char sign); | ||
211 | |||
212 | #ifndef MAKING_PROTO | ||
213 | #include "fpu_proto.h" | ||
214 | #endif | ||
215 | |||
216 | #endif /* __ASSEMBLY__ */ | ||
217 | |||
218 | #endif /* _FPU_EMU_H_ */ | ||
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c new file mode 100644 index 000000000000..1853524c8b57 --- /dev/null +++ b/arch/x86/math-emu/fpu_entry.c | |||
@@ -0,0 +1,761 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | fpu_entry.c | | ||
3 | | | | ||
4 | | The entry functions for wm-FPU-emu | | ||
5 | | | | ||
6 | | Copyright (C) 1992,1993,1994,1996,1997 | | ||
7 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | | ||
8 | | E-mail billm@suburbia.net | | ||
9 | | | | ||
10 | | See the files "README" and "COPYING" for further copyright and warranty | | ||
11 | | information. | | ||
12 | | | | ||
13 | +---------------------------------------------------------------------------*/ | ||
14 | |||
15 | /*---------------------------------------------------------------------------+ | ||
16 | | Note: | | ||
17 | | The file contains code which accesses user memory. | | ||
18 | | Emulator static data may change when user memory is accessed, due to | | ||
19 | | other processes using the emulator while swapping is in progress. | | ||
20 | +---------------------------------------------------------------------------*/ | ||
21 | |||
22 | /*---------------------------------------------------------------------------+ | ||
23 | | math_emulate(), restore_i387_soft() and save_i387_soft() are the only | | ||
24 | | entry points for wm-FPU-emu. | | ||
25 | +---------------------------------------------------------------------------*/ | ||
26 | |||
27 | #include <linux/signal.h> | ||
28 | #include <linux/ptrace.h> | ||
29 | |||
30 | #include <asm/uaccess.h> | ||
31 | #include <asm/desc.h> | ||
32 | |||
33 | #include "fpu_system.h" | ||
34 | #include "fpu_emu.h" | ||
35 | #include "exception.h" | ||
36 | #include "control_w.h" | ||
37 | #include "status_w.h" | ||
38 | |||
39 | #define __BAD__ FPU_illegal /* Illegal on an 80486, causes SIGILL */ | ||
40 | |||
41 | #ifndef NO_UNDOC_CODE /* Un-documented FPU op-codes supported by default. */ | ||
42 | |||
43 | /* WARNING: These codes are not documented by Intel in their 80486 manual | ||
44 | and may not work on FPU clones or later Intel FPUs. */ | ||
45 | |||
46 | /* Changes to support the un-doc codes provided by Linus Torvalds. */ | ||
47 | |||
48 | #define _d9_d8_ fstp_i /* unofficial code (19) */ | ||
49 | #define _dc_d0_ fcom_st /* unofficial code (14) */ | ||
50 | #define _dc_d8_ fcompst /* unofficial code (1c) */ | ||
51 | #define _dd_c8_ fxch_i /* unofficial code (0d) */ | ||
52 | #define _de_d0_ fcompst /* unofficial code (16) */ | ||
53 | #define _df_c0_ ffreep /* unofficial code (07) ffree + pop */ | ||
54 | #define _df_c8_ fxch_i /* unofficial code (0f) */ | ||
55 | #define _df_d0_ fstp_i /* unofficial code (17) */ | ||
56 | #define _df_d8_ fstp_i /* unofficial code (1f) */ | ||
57 | |||
58 | static FUNC const st_instr_table[64] = { | ||
59 | fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, _df_c0_, | ||
60 | fmul__, fxch_i, __BAD__, __BAD__, fmul_i, _dd_c8_, fmulp_, _df_c8_, | ||
61 | fcom_st, fp_nop, __BAD__, __BAD__, _dc_d0_, fst_i_, _de_d0_, _df_d0_, | ||
62 | fcompst, _d9_d8_, __BAD__, __BAD__, _dc_d8_, fstp_i, fcompp, _df_d8_, | ||
63 | fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_, | ||
64 | fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__, | ||
65 | fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__, | ||
66 | fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__, | ||
67 | }; | ||
68 | |||
69 | #else /* Support only documented FPU op-codes */ | ||
70 | |||
71 | static FUNC const st_instr_table[64] = { | ||
72 | fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, __BAD__, | ||
73 | fmul__, fxch_i, __BAD__, __BAD__, fmul_i, __BAD__, fmulp_, __BAD__, | ||
74 | fcom_st, fp_nop, __BAD__, __BAD__, __BAD__, fst_i_, __BAD__, __BAD__, | ||
75 | fcompst, __BAD__, __BAD__, __BAD__, __BAD__, fstp_i, fcompp, __BAD__, | ||
76 | fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_, | ||
77 | fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__, | ||
78 | fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__, | ||
79 | fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__, | ||
80 | }; | ||
81 | |||
82 | #endif /* NO_UNDOC_CODE */ | ||
83 | |||
84 | |||
85 | #define _NONE_ 0 /* Take no special action */ | ||
86 | #define _REG0_ 1 /* Need to check for not empty st(0) */ | ||
87 | #define _REGI_ 2 /* Need to check for not empty st(0) and st(rm) */ | ||
88 | #define _REGi_ 0 /* Uses st(rm) */ | ||
89 | #define _PUSH_ 3 /* Need to check for space to push onto stack */ | ||
90 | #define _null_ 4 /* Function illegal or not implemented */ | ||
91 | #define _REGIi 5 /* Uses st(0) and st(rm), result to st(rm) */ | ||
92 | #define _REGIp 6 /* Uses st(0) and st(rm), result to st(rm) then pop */ | ||
93 | #define _REGIc 0 /* Compare st(0) and st(rm) */ | ||
94 | #define _REGIn 0 /* Uses st(0) and st(rm), but handle checks later */ | ||
95 | |||
96 | #ifndef NO_UNDOC_CODE | ||
97 | |||
98 | /* Un-documented FPU op-codes supported by default. (see above) */ | ||
99 | |||
100 | static u_char const type_table[64] = { | ||
101 | _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _REGi_, | ||
102 | _REGI_, _REGIn, _null_, _null_, _REGIi, _REGI_, _REGIp, _REGI_, | ||
103 | _REGIc, _NONE_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_, | ||
104 | _REGIc, _REG0_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_, | ||
105 | _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_, | ||
106 | _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_, | ||
107 | _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_, | ||
108 | _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_ | ||
109 | }; | ||
110 | |||
111 | #else /* Support only documented FPU op-codes */ | ||
112 | |||
113 | static u_char const type_table[64] = { | ||
114 | _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _null_, | ||
115 | _REGI_, _REGIn, _null_, _null_, _REGIi, _null_, _REGIp, _null_, | ||
116 | _REGIc, _NONE_, _null_, _null_, _null_, _REG0_, _null_, _null_, | ||
117 | _REGIc, _null_, _null_, _null_, _null_, _REG0_, _REGIc, _null_, | ||
118 | _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_, | ||
119 | _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_, | ||
120 | _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_, | ||
121 | _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_ | ||
122 | }; | ||
123 | |||
124 | #endif /* NO_UNDOC_CODE */ | ||
125 | |||
126 | |||
127 | #ifdef RE_ENTRANT_CHECKING | ||
128 | u_char emulating=0; | ||
129 | #endif /* RE_ENTRANT_CHECKING */ | ||
130 | |||
131 | static int valid_prefix(u_char *Byte, u_char __user **fpu_eip, | ||
132 | overrides *override); | ||
133 | |||
134 | asmlinkage void math_emulate(long arg) | ||
135 | { | ||
136 | u_char FPU_modrm, byte1; | ||
137 | unsigned short code; | ||
138 | fpu_addr_modes addr_modes; | ||
139 | int unmasked; | ||
140 | FPU_REG loaded_data; | ||
141 | FPU_REG *st0_ptr; | ||
142 | u_char loaded_tag, st0_tag; | ||
143 | void __user *data_address; | ||
144 | struct address data_sel_off; | ||
145 | struct address entry_sel_off; | ||
146 | unsigned long code_base = 0; | ||
147 | unsigned long code_limit = 0; /* Initialized to stop compiler warnings */ | ||
148 | struct desc_struct code_descriptor; | ||
149 | |||
150 | #ifdef RE_ENTRANT_CHECKING | ||
151 | if ( emulating ) | ||
152 | { | ||
153 | printk("ERROR: wm-FPU-emu is not RE-ENTRANT!\n"); | ||
154 | } | ||
155 | RE_ENTRANT_CHECK_ON; | ||
156 | #endif /* RE_ENTRANT_CHECKING */ | ||
157 | |||
158 | if (!used_math()) | ||
159 | { | ||
160 | finit(); | ||
161 | set_used_math(); | ||
162 | } | ||
163 | |||
164 | SETUP_DATA_AREA(arg); | ||
165 | |||
166 | FPU_ORIG_EIP = FPU_EIP; | ||
167 | |||
168 | if ( (FPU_EFLAGS & 0x00020000) != 0 ) | ||
169 | { | ||
170 | /* Virtual 8086 mode */ | ||
171 | addr_modes.default_mode = VM86; | ||
172 | FPU_EIP += code_base = FPU_CS << 4; | ||
173 | code_limit = code_base + 0xffff; /* Assumes code_base <= 0xffff0000 */ | ||
174 | } | ||
175 | else if ( FPU_CS == __USER_CS && FPU_DS == __USER_DS ) | ||
176 | { | ||
177 | addr_modes.default_mode = 0; | ||
178 | } | ||
179 | else if ( FPU_CS == __KERNEL_CS ) | ||
180 | { | ||
181 | printk("math_emulate: %04x:%08lx\n",FPU_CS,FPU_EIP); | ||
182 | panic("Math emulation needed in kernel"); | ||
183 | } | ||
184 | else | ||
185 | { | ||
186 | |||
187 | if ( (FPU_CS & 4) != 4 ) /* Must be in the LDT */ | ||
188 | { | ||
189 | /* Can only handle segmented addressing via the LDT | ||
190 | for now, and it must be 16 bit */ | ||
191 | printk("FPU emulator: Unsupported addressing mode\n"); | ||
192 | math_abort(FPU_info, SIGILL); | ||
193 | } | ||
194 | |||
195 | code_descriptor = LDT_DESCRIPTOR(FPU_CS); | ||
196 | if ( SEG_D_SIZE(code_descriptor) ) | ||
197 | { | ||
198 | /* The above test may be wrong, the book is not clear */ | ||
199 | /* Segmented 32 bit protected mode */ | ||
200 | addr_modes.default_mode = SEG32; | ||
201 | } | ||
202 | else | ||
203 | { | ||
204 | /* 16 bit protected mode */ | ||
205 | addr_modes.default_mode = PM16; | ||
206 | } | ||
207 | FPU_EIP += code_base = SEG_BASE_ADDR(code_descriptor); | ||
208 | code_limit = code_base | ||
209 | + (SEG_LIMIT(code_descriptor)+1) * SEG_GRANULARITY(code_descriptor) | ||
210 | - 1; | ||
211 | if ( code_limit < code_base ) code_limit = 0xffffffff; | ||
212 | } | ||
213 | |||
214 | FPU_lookahead = 1; | ||
215 | if (current->ptrace & PT_PTRACED) | ||
216 | FPU_lookahead = 0; | ||
217 | |||
218 | if ( !valid_prefix(&byte1, (u_char __user **)&FPU_EIP, | ||
219 | &addr_modes.override) ) | ||
220 | { | ||
221 | RE_ENTRANT_CHECK_OFF; | ||
222 | printk("FPU emulator: Unknown prefix byte 0x%02x, probably due to\n" | ||
223 | "FPU emulator: self-modifying code! (emulation impossible)\n", | ||
224 | byte1); | ||
225 | RE_ENTRANT_CHECK_ON; | ||
226 | EXCEPTION(EX_INTERNAL|0x126); | ||
227 | math_abort(FPU_info,SIGILL); | ||
228 | } | ||
229 | |||
230 | do_another_FPU_instruction: | ||
231 | |||
232 | no_ip_update = 0; | ||
233 | |||
234 | FPU_EIP++; /* We have fetched the prefix and first code bytes. */ | ||
235 | |||
236 | if ( addr_modes.default_mode ) | ||
237 | { | ||
238 | /* This checks for the minimum instruction bytes. | ||
239 | We also need to check any extra (address mode) code access. */ | ||
240 | if ( FPU_EIP > code_limit ) | ||
241 | math_abort(FPU_info,SIGSEGV); | ||
242 | } | ||
243 | |||
244 | if ( (byte1 & 0xf8) != 0xd8 ) | ||
245 | { | ||
246 | if ( byte1 == FWAIT_OPCODE ) | ||
247 | { | ||
248 | if (partial_status & SW_Summary) | ||
249 | goto do_the_FPU_interrupt; | ||
250 | else | ||
251 | goto FPU_fwait_done; | ||
252 | } | ||
253 | #ifdef PARANOID | ||
254 | EXCEPTION(EX_INTERNAL|0x128); | ||
255 | math_abort(FPU_info,SIGILL); | ||
256 | #endif /* PARANOID */ | ||
257 | } | ||
258 | |||
259 | RE_ENTRANT_CHECK_OFF; | ||
260 | FPU_code_access_ok(1); | ||
261 | FPU_get_user(FPU_modrm, (u_char __user *) FPU_EIP); | ||
262 | RE_ENTRANT_CHECK_ON; | ||
263 | FPU_EIP++; | ||
264 | |||
265 | if (partial_status & SW_Summary) | ||
266 | { | ||
267 | /* Ignore the error for now if the current instruction is a no-wait | ||
268 | control instruction */ | ||
269 | /* The 80486 manual contradicts itself on this topic, | ||
270 | but a real 80486 uses the following instructions: | ||
271 | fninit, fnstenv, fnsave, fnstsw, fnstenv, fnclex. | ||
272 | */ | ||
273 | code = (FPU_modrm << 8) | byte1; | ||
274 | if ( ! ( (((code & 0xf803) == 0xe003) || /* fnclex, fninit, fnstsw */ | ||
275 | (((code & 0x3003) == 0x3001) && /* fnsave, fnstcw, fnstenv, | ||
276 | fnstsw */ | ||
277 | ((code & 0xc000) != 0xc000))) ) ) | ||
278 | { | ||
279 | /* | ||
280 | * We need to simulate the action of the kernel to FPU | ||
281 | * interrupts here. | ||
282 | */ | ||
283 | do_the_FPU_interrupt: | ||
284 | |||
285 | FPU_EIP = FPU_ORIG_EIP; /* Point to current FPU instruction. */ | ||
286 | |||
287 | RE_ENTRANT_CHECK_OFF; | ||
288 | current->thread.trap_no = 16; | ||
289 | current->thread.error_code = 0; | ||
290 | send_sig(SIGFPE, current, 1); | ||
291 | return; | ||
292 | } | ||
293 | } | ||
294 | |||
295 | entry_sel_off.offset = FPU_ORIG_EIP; | ||
296 | entry_sel_off.selector = FPU_CS; | ||
297 | entry_sel_off.opcode = (byte1 << 8) | FPU_modrm; | ||
298 | |||
299 | FPU_rm = FPU_modrm & 7; | ||
300 | |||
301 | if ( FPU_modrm < 0300 ) | ||
302 | { | ||
303 | /* All of these instructions use the mod/rm byte to get a data address */ | ||
304 | |||
305 | if ( (addr_modes.default_mode & SIXTEEN) | ||
306 | ^ (addr_modes.override.address_size == ADDR_SIZE_PREFIX) ) | ||
307 | data_address = FPU_get_address_16(FPU_modrm, &FPU_EIP, &data_sel_off, | ||
308 | addr_modes); | ||
309 | else | ||
310 | data_address = FPU_get_address(FPU_modrm, &FPU_EIP, &data_sel_off, | ||
311 | addr_modes); | ||
312 | |||
313 | if ( addr_modes.default_mode ) | ||
314 | { | ||
315 | if ( FPU_EIP-1 > code_limit ) | ||
316 | math_abort(FPU_info,SIGSEGV); | ||
317 | } | ||
318 | |||
319 | if ( !(byte1 & 1) ) | ||
320 | { | ||
321 | unsigned short status1 = partial_status; | ||
322 | |||
323 | st0_ptr = &st(0); | ||
324 | st0_tag = FPU_gettag0(); | ||
325 | |||
326 | /* Stack underflow has priority */ | ||
327 | if ( NOT_EMPTY_ST0 ) | ||
328 | { | ||
329 | if ( addr_modes.default_mode & PROTECTED ) | ||
330 | { | ||
331 | /* This table works for 16 and 32 bit protected mode */ | ||
332 | if ( access_limit < data_sizes_16[(byte1 >> 1) & 3] ) | ||
333 | math_abort(FPU_info,SIGSEGV); | ||
334 | } | ||
335 | |||
336 | unmasked = 0; /* Do this here to stop compiler warnings. */ | ||
337 | switch ( (byte1 >> 1) & 3 ) | ||
338 | { | ||
339 | case 0: | ||
340 | unmasked = FPU_load_single((float __user *)data_address, | ||
341 | &loaded_data); | ||
342 | loaded_tag = unmasked & 0xff; | ||
343 | unmasked &= ~0xff; | ||
344 | break; | ||
345 | case 1: | ||
346 | loaded_tag = FPU_load_int32((long __user *)data_address, &loaded_data); | ||
347 | break; | ||
348 | case 2: | ||
349 | unmasked = FPU_load_double((double __user *)data_address, | ||
350 | &loaded_data); | ||
351 | loaded_tag = unmasked & 0xff; | ||
352 | unmasked &= ~0xff; | ||
353 | break; | ||
354 | case 3: | ||
355 | default: /* Used here to suppress gcc warnings. */ | ||
356 | loaded_tag = FPU_load_int16((short __user *)data_address, &loaded_data); | ||
357 | break; | ||
358 | } | ||
359 | |||
360 | /* No more access to user memory, it is safe | ||
361 | to use static data now */ | ||
362 | |||
363 | /* NaN operands have the next priority. */ | ||
364 | /* We have to delay looking at st(0) until after | ||
365 | loading the data, because that data might contain an SNaN */ | ||
366 | if ( ((st0_tag == TAG_Special) && isNaN(st0_ptr)) || | ||
367 | ((loaded_tag == TAG_Special) && isNaN(&loaded_data)) ) | ||
368 | { | ||
369 | /* Restore the status word; we might have loaded a | ||
370 | denormal. */ | ||
371 | partial_status = status1; | ||
372 | if ( (FPU_modrm & 0x30) == 0x10 ) | ||
373 | { | ||
374 | /* fcom or fcomp */ | ||
375 | EXCEPTION(EX_Invalid); | ||
376 | setcc(SW_C3 | SW_C2 | SW_C0); | ||
377 | if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) ) | ||
378 | FPU_pop(); /* fcomp, masked, so we pop. */ | ||
379 | } | ||
380 | else | ||
381 | { | ||
382 | if ( loaded_tag == TAG_Special ) | ||
383 | loaded_tag = FPU_Special(&loaded_data); | ||
384 | #ifdef PECULIAR_486 | ||
385 | /* This is not really needed, but gives behaviour | ||
386 | identical to an 80486 */ | ||
387 | if ( (FPU_modrm & 0x28) == 0x20 ) | ||
388 | /* fdiv or fsub */ | ||
389 | real_2op_NaN(&loaded_data, loaded_tag, 0, &loaded_data); | ||
390 | else | ||
391 | #endif /* PECULIAR_486 */ | ||
392 | /* fadd, fdivr, fmul, or fsubr */ | ||
393 | real_2op_NaN(&loaded_data, loaded_tag, 0, st0_ptr); | ||
394 | } | ||
395 | goto reg_mem_instr_done; | ||
396 | } | ||
397 | |||
398 | if ( unmasked && !((FPU_modrm & 0x30) == 0x10) ) | ||
399 | { | ||
400 | /* Is not a comparison instruction. */ | ||
401 | if ( (FPU_modrm & 0x38) == 0x38 ) | ||
402 | { | ||
403 | /* fdivr */ | ||
404 | if ( (st0_tag == TAG_Zero) && | ||
405 | ((loaded_tag == TAG_Valid) | ||
406 | || (loaded_tag == TAG_Special | ||
407 | && isdenormal(&loaded_data))) ) | ||
408 | { | ||
409 | if ( FPU_divide_by_zero(0, getsign(&loaded_data)) | ||
410 | < 0 ) | ||
411 | { | ||
412 | /* We use the fact here that the unmasked | ||
413 | exception in the loaded data was for a | ||
414 | denormal operand */ | ||
415 | /* Restore the state of the denormal op bit */ | ||
416 | partial_status &= ~SW_Denorm_Op; | ||
417 | partial_status |= status1 & SW_Denorm_Op; | ||
418 | } | ||
419 | else | ||
420 | setsign(st0_ptr, getsign(&loaded_data)); | ||
421 | } | ||
422 | } | ||
423 | goto reg_mem_instr_done; | ||
424 | } | ||
425 | |||
426 | switch ( (FPU_modrm >> 3) & 7 ) | ||
427 | { | ||
428 | case 0: /* fadd */ | ||
429 | clear_C1(); | ||
430 | FPU_add(&loaded_data, loaded_tag, 0, control_word); | ||
431 | break; | ||
432 | case 1: /* fmul */ | ||
433 | clear_C1(); | ||
434 | FPU_mul(&loaded_data, loaded_tag, 0, control_word); | ||
435 | break; | ||
436 | case 2: /* fcom */ | ||
437 | FPU_compare_st_data(&loaded_data, loaded_tag); | ||
438 | break; | ||
439 | case 3: /* fcomp */ | ||
440 | if ( !FPU_compare_st_data(&loaded_data, loaded_tag) | ||
441 | && !unmasked ) | ||
442 | FPU_pop(); | ||
443 | break; | ||
444 | case 4: /* fsub */ | ||
445 | clear_C1(); | ||
446 | FPU_sub(LOADED|loaded_tag, (int)&loaded_data, control_word); | ||
447 | break; | ||
448 | case 5: /* fsubr */ | ||
449 | clear_C1(); | ||
450 | FPU_sub(REV|LOADED|loaded_tag, (int)&loaded_data, control_word); | ||
451 | break; | ||
452 | case 6: /* fdiv */ | ||
453 | clear_C1(); | ||
454 | FPU_div(LOADED|loaded_tag, (int)&loaded_data, control_word); | ||
455 | break; | ||
456 | case 7: /* fdivr */ | ||
457 | clear_C1(); | ||
458 | if ( st0_tag == TAG_Zero ) | ||
459 | partial_status = status1; /* Undo any denorm tag, | ||
460 | zero-divide has priority. */ | ||
461 | FPU_div(REV|LOADED|loaded_tag, (int)&loaded_data, control_word); | ||
462 | break; | ||
463 | } | ||
464 | } | ||
465 | else | ||
466 | { | ||
467 | if ( (FPU_modrm & 0x30) == 0x10 ) | ||
468 | { | ||
469 | /* The instruction is fcom or fcomp */ | ||
470 | EXCEPTION(EX_StackUnder); | ||
471 | setcc(SW_C3 | SW_C2 | SW_C0); | ||
472 | if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) ) | ||
473 | FPU_pop(); /* fcomp */ | ||
474 | } | ||
475 | else | ||
476 | FPU_stack_underflow(); | ||
477 | } | ||
478 | reg_mem_instr_done: | ||
479 | operand_address = data_sel_off; | ||
480 | } | ||
481 | else | ||
482 | { | ||
483 | if ( !(no_ip_update = | ||
484 | FPU_load_store(((FPU_modrm & 0x38) | (byte1 & 6)) >> 1, | ||
485 | addr_modes, data_address)) ) | ||
486 | { | ||
487 | operand_address = data_sel_off; | ||
488 | } | ||
489 | } | ||
490 | |||
491 | } | ||
492 | else | ||
493 | { | ||
494 | /* None of these instructions access user memory */ | ||
495 | u_char instr_index = (FPU_modrm & 0x38) | (byte1 & 7); | ||
496 | |||
497 | #ifdef PECULIAR_486 | ||
498 | /* This is supposed to be undefined, but a real 80486 seems | ||
499 | to do this: */ | ||
500 | operand_address.offset = 0; | ||
501 | operand_address.selector = FPU_DS; | ||
502 | #endif /* PECULIAR_486 */ | ||
503 | |||
504 | st0_ptr = &st(0); | ||
505 | st0_tag = FPU_gettag0(); | ||
506 | switch ( type_table[(int) instr_index] ) | ||
507 | { | ||
508 | case _NONE_: /* also _REGIc: _REGIn */ | ||
509 | break; | ||
510 | case _REG0_: | ||
511 | if ( !NOT_EMPTY_ST0 ) | ||
512 | { | ||
513 | FPU_stack_underflow(); | ||
514 | goto FPU_instruction_done; | ||
515 | } | ||
516 | break; | ||
517 | case _REGIi: | ||
518 | if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) ) | ||
519 | { | ||
520 | FPU_stack_underflow_i(FPU_rm); | ||
521 | goto FPU_instruction_done; | ||
522 | } | ||
523 | break; | ||
524 | case _REGIp: | ||
525 | if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) ) | ||
526 | { | ||
527 | FPU_stack_underflow_pop(FPU_rm); | ||
528 | goto FPU_instruction_done; | ||
529 | } | ||
530 | break; | ||
531 | case _REGI_: | ||
532 | if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) ) | ||
533 | { | ||
534 | FPU_stack_underflow(); | ||
535 | goto FPU_instruction_done; | ||
536 | } | ||
537 | break; | ||
538 | case _PUSH_: /* Only used by the fld st(i) instruction */ | ||
539 | break; | ||
540 | case _null_: | ||
541 | FPU_illegal(); | ||
542 | goto FPU_instruction_done; | ||
543 | default: | ||
544 | EXCEPTION(EX_INTERNAL|0x111); | ||
545 | goto FPU_instruction_done; | ||
546 | } | ||
547 | (*st_instr_table[(int) instr_index])(); | ||
548 | |||
549 | FPU_instruction_done: | ||
550 | ; | ||
551 | } | ||
552 | |||
553 | if ( ! no_ip_update ) | ||
554 | instruction_address = entry_sel_off; | ||
555 | |||
556 | FPU_fwait_done: | ||
557 | |||
558 | #ifdef DEBUG | ||
559 | RE_ENTRANT_CHECK_OFF; | ||
560 | FPU_printall(); | ||
561 | RE_ENTRANT_CHECK_ON; | ||
562 | #endif /* DEBUG */ | ||
563 | |||
564 | if (FPU_lookahead && !need_resched()) | ||
565 | { | ||
566 | FPU_ORIG_EIP = FPU_EIP - code_base; | ||
567 | if ( valid_prefix(&byte1, (u_char __user **)&FPU_EIP, | ||
568 | &addr_modes.override) ) | ||
569 | goto do_another_FPU_instruction; | ||
570 | } | ||
571 | |||
572 | if ( addr_modes.default_mode ) | ||
573 | FPU_EIP -= code_base; | ||
574 | |||
575 | RE_ENTRANT_CHECK_OFF; | ||
576 | } | ||
577 | |||
578 | |||
579 | /* Support for prefix bytes is not yet complete. To properly handle | ||
580 | all prefix bytes, further changes are needed in the emulator code | ||
581 | which accesses user address space. Access to separate segments is | ||
582 | important for msdos emulation. */ | ||
583 | static int valid_prefix(u_char *Byte, u_char __user **fpu_eip, | ||
584 | overrides *override) | ||
585 | { | ||
586 | u_char byte; | ||
587 | u_char __user *ip = *fpu_eip; | ||
588 | |||
589 | *override = (overrides) { 0, 0, PREFIX_DEFAULT }; /* defaults */ | ||
590 | |||
591 | RE_ENTRANT_CHECK_OFF; | ||
592 | FPU_code_access_ok(1); | ||
593 | FPU_get_user(byte, ip); | ||
594 | RE_ENTRANT_CHECK_ON; | ||
595 | |||
596 | while ( 1 ) | ||
597 | { | ||
598 | switch ( byte ) | ||
599 | { | ||
600 | case ADDR_SIZE_PREFIX: | ||
601 | override->address_size = ADDR_SIZE_PREFIX; | ||
602 | goto do_next_byte; | ||
603 | |||
604 | case OP_SIZE_PREFIX: | ||
605 | override->operand_size = OP_SIZE_PREFIX; | ||
606 | goto do_next_byte; | ||
607 | |||
608 | case PREFIX_CS: | ||
609 | override->segment = PREFIX_CS_; | ||
610 | goto do_next_byte; | ||
611 | case PREFIX_ES: | ||
612 | override->segment = PREFIX_ES_; | ||
613 | goto do_next_byte; | ||
614 | case PREFIX_SS: | ||
615 | override->segment = PREFIX_SS_; | ||
616 | goto do_next_byte; | ||
617 | case PREFIX_FS: | ||
618 | override->segment = PREFIX_FS_; | ||
619 | goto do_next_byte; | ||
620 | case PREFIX_GS: | ||
621 | override->segment = PREFIX_GS_; | ||
622 | goto do_next_byte; | ||
623 | case PREFIX_DS: | ||
624 | override->segment = PREFIX_DS_; | ||
625 | goto do_next_byte; | ||
626 | |||
627 | /* lock is not a valid prefix for FPU instructions, | ||
628 | let the cpu handle it to generate a SIGILL. */ | ||
629 | /* case PREFIX_LOCK: */ | ||
630 | |||
631 | /* rep.. prefixes have no meaning for FPU instructions */ | ||
632 | case PREFIX_REPE: | ||
633 | case PREFIX_REPNE: | ||
634 | |||
635 | do_next_byte: | ||
636 | ip++; | ||
637 | RE_ENTRANT_CHECK_OFF; | ||
638 | FPU_code_access_ok(1); | ||
639 | FPU_get_user(byte, ip); | ||
640 | RE_ENTRANT_CHECK_ON; | ||
641 | break; | ||
642 | case FWAIT_OPCODE: | ||
643 | *Byte = byte; | ||
644 | return 1; | ||
645 | default: | ||
646 | if ( (byte & 0xf8) == 0xd8 ) | ||
647 | { | ||
648 | *Byte = byte; | ||
649 | *fpu_eip = ip; | ||
650 | return 1; | ||
651 | } | ||
652 | else | ||
653 | { | ||
654 | /* Not a valid sequence of prefix bytes followed by | ||
655 | an FPU instruction. */ | ||
656 | *Byte = byte; /* Needed for error message. */ | ||
657 | return 0; | ||
658 | } | ||
659 | } | ||
660 | } | ||
661 | } | ||
662 | |||
663 | |||
664 | void math_abort(struct info * info, unsigned int signal) | ||
665 | { | ||
666 | FPU_EIP = FPU_ORIG_EIP; | ||
667 | current->thread.trap_no = 16; | ||
668 | current->thread.error_code = 0; | ||
669 | send_sig(signal,current,1); | ||
670 | RE_ENTRANT_CHECK_OFF; | ||
671 | __asm__("movl %0,%%esp ; ret": :"g" (((long) info)-4)); | ||
672 | #ifdef PARANOID | ||
673 | printk("ERROR: wm-FPU-emu math_abort failed!\n"); | ||
674 | #endif /* PARANOID */ | ||
675 | } | ||
676 | |||
677 | |||
678 | |||
679 | #define S387 ((struct i387_soft_struct *)s387) | ||
680 | #define sstatus_word() \ | ||
681 | ((S387->swd & ~SW_Top & 0xffff) | ((S387->ftop << SW_Top_Shift) & SW_Top)) | ||
682 | |||
683 | int restore_i387_soft(void *s387, struct _fpstate __user *buf) | ||
684 | { | ||
685 | u_char __user *d = (u_char __user *)buf; | ||
686 | int offset, other, i, tags, regnr, tag, newtop; | ||
687 | |||
688 | RE_ENTRANT_CHECK_OFF; | ||
689 | FPU_access_ok(VERIFY_READ, d, 7*4 + 8*10); | ||
690 | if (__copy_from_user(&S387->cwd, d, 7*4)) | ||
691 | return -1; | ||
692 | RE_ENTRANT_CHECK_ON; | ||
693 | |||
694 | d += 7*4; | ||
695 | |||
696 | S387->ftop = (S387->swd >> SW_Top_Shift) & 7; | ||
697 | offset = (S387->ftop & 7) * 10; | ||
698 | other = 80 - offset; | ||
699 | |||
700 | RE_ENTRANT_CHECK_OFF; | ||
701 | /* Copy all registers in stack order. */ | ||
702 | if (__copy_from_user(((u_char *)&S387->st_space)+offset, d, other)) | ||
703 | return -1; | ||
704 | if ( offset ) | ||
705 | if (__copy_from_user((u_char *)&S387->st_space, d+other, offset)) | ||
706 | return -1; | ||
707 | RE_ENTRANT_CHECK_ON; | ||
708 | |||
709 | /* The tags may need to be corrected now. */ | ||
710 | tags = S387->twd; | ||
711 | newtop = S387->ftop; | ||
712 | for ( i = 0; i < 8; i++ ) | ||
713 | { | ||
714 | regnr = (i+newtop) & 7; | ||
715 | if ( ((tags >> ((regnr & 7)*2)) & 3) != TAG_Empty ) | ||
716 | { | ||
717 | /* The loaded data over-rides all other cases. */ | ||
718 | tag = FPU_tagof((FPU_REG *)((u_char *)S387->st_space + 10*regnr)); | ||
719 | tags &= ~(3 << (regnr*2)); | ||
720 | tags |= (tag & 3) << (regnr*2); | ||
721 | } | ||
722 | } | ||
723 | S387->twd = tags; | ||
724 | |||
725 | return 0; | ||
726 | } | ||
727 | |||
728 | |||
729 | int save_i387_soft(void *s387, struct _fpstate __user * buf) | ||
730 | { | ||
731 | u_char __user *d = (u_char __user *)buf; | ||
732 | int offset = (S387->ftop & 7) * 10, other = 80 - offset; | ||
733 | |||
734 | RE_ENTRANT_CHECK_OFF; | ||
735 | FPU_access_ok(VERIFY_WRITE, d, 7*4 + 8*10); | ||
736 | #ifdef PECULIAR_486 | ||
737 | S387->cwd &= ~0xe080; | ||
738 | /* An 80486 sets nearly all of the reserved bits to 1. */ | ||
739 | S387->cwd |= 0xffff0040; | ||
740 | S387->swd = sstatus_word() | 0xffff0000; | ||
741 | S387->twd |= 0xffff0000; | ||
742 | S387->fcs &= ~0xf8000000; | ||
743 | S387->fos |= 0xffff0000; | ||
744 | #endif /* PECULIAR_486 */ | ||
745 | if (__copy_to_user(d, &S387->cwd, 7*4)) | ||
746 | return -1; | ||
747 | RE_ENTRANT_CHECK_ON; | ||
748 | |||
749 | d += 7*4; | ||
750 | |||
751 | RE_ENTRANT_CHECK_OFF; | ||
752 | /* Copy all registers in stack order. */ | ||
753 | if (__copy_to_user(d, ((u_char *)&S387->st_space)+offset, other)) | ||
754 | return -1; | ||
755 | if ( offset ) | ||
756 | if (__copy_to_user(d+other, (u_char *)&S387->st_space, offset)) | ||
757 | return -1; | ||
758 | RE_ENTRANT_CHECK_ON; | ||
759 | |||
760 | return 1; | ||
761 | } | ||
diff --git a/arch/x86/math-emu/fpu_etc.c b/arch/x86/math-emu/fpu_etc.c new file mode 100644 index 000000000000..e3b5d465587f --- /dev/null +++ b/arch/x86/math-emu/fpu_etc.c | |||
@@ -0,0 +1,143 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | fpu_etc.c | | ||
3 | | | | ||
4 | | Implement a few FPU instructions. | | ||
5 | | | | ||
6 | | Copyright (C) 1992,1993,1994,1997 | | ||
7 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | ||
8 | | Australia. E-mail billm@suburbia.net | | ||
9 | | | | ||
10 | | | | ||
11 | +---------------------------------------------------------------------------*/ | ||
12 | |||
13 | #include "fpu_system.h" | ||
14 | #include "exception.h" | ||
15 | #include "fpu_emu.h" | ||
16 | #include "status_w.h" | ||
17 | #include "reg_constant.h" | ||
18 | |||
19 | |||
20 | static void fchs(FPU_REG *st0_ptr, u_char st0tag) | ||
21 | { | ||
22 | if ( st0tag ^ TAG_Empty ) | ||
23 | { | ||
24 | signbyte(st0_ptr) ^= SIGN_NEG; | ||
25 | clear_C1(); | ||
26 | } | ||
27 | else | ||
28 | FPU_stack_underflow(); | ||
29 | } | ||
30 | |||
31 | |||
32 | static void fabs(FPU_REG *st0_ptr, u_char st0tag) | ||
33 | { | ||
34 | if ( st0tag ^ TAG_Empty ) | ||
35 | { | ||
36 | setpositive(st0_ptr); | ||
37 | clear_C1(); | ||
38 | } | ||
39 | else | ||
40 | FPU_stack_underflow(); | ||
41 | } | ||
42 | |||
43 | |||
44 | static void ftst_(FPU_REG *st0_ptr, u_char st0tag) | ||
45 | { | ||
46 | switch (st0tag) | ||
47 | { | ||
48 | case TAG_Zero: | ||
49 | setcc(SW_C3); | ||
50 | break; | ||
51 | case TAG_Valid: | ||
52 | if (getsign(st0_ptr) == SIGN_POS) | ||
53 | setcc(0); | ||
54 | else | ||
55 | setcc(SW_C0); | ||
56 | break; | ||
57 | case TAG_Special: | ||
58 | switch ( FPU_Special(st0_ptr) ) | ||
59 | { | ||
60 | case TW_Denormal: | ||
61 | if (getsign(st0_ptr) == SIGN_POS) | ||
62 | setcc(0); | ||
63 | else | ||
64 | setcc(SW_C0); | ||
65 | if ( denormal_operand() < 0 ) | ||
66 | { | ||
67 | #ifdef PECULIAR_486 | ||
68 | /* This is weird! */ | ||
69 | if (getsign(st0_ptr) == SIGN_POS) | ||
70 | setcc(SW_C3); | ||
71 | #endif /* PECULIAR_486 */ | ||
72 | return; | ||
73 | } | ||
74 | break; | ||
75 | case TW_NaN: | ||
76 | setcc(SW_C0|SW_C2|SW_C3); /* Operand is not comparable */ | ||
77 | EXCEPTION(EX_Invalid); | ||
78 | break; | ||
79 | case TW_Infinity: | ||
80 | if (getsign(st0_ptr) == SIGN_POS) | ||
81 | setcc(0); | ||
82 | else | ||
83 | setcc(SW_C0); | ||
84 | break; | ||
85 | default: | ||
86 | setcc(SW_C0|SW_C2|SW_C3); /* Operand is not comparable */ | ||
87 | EXCEPTION(EX_INTERNAL|0x14); | ||
88 | break; | ||
89 | } | ||
90 | break; | ||
91 | case TAG_Empty: | ||
92 | setcc(SW_C0|SW_C2|SW_C3); | ||
93 | EXCEPTION(EX_StackUnder); | ||
94 | break; | ||
95 | } | ||
96 | } | ||
97 | |||
98 | |||
99 | static void fxam(FPU_REG *st0_ptr, u_char st0tag) | ||
100 | { | ||
101 | int c = 0; | ||
102 | switch (st0tag) | ||
103 | { | ||
104 | case TAG_Empty: | ||
105 | c = SW_C3|SW_C0; | ||
106 | break; | ||
107 | case TAG_Zero: | ||
108 | c = SW_C3; | ||
109 | break; | ||
110 | case TAG_Valid: | ||
111 | c = SW_C2; | ||
112 | break; | ||
113 | case TAG_Special: | ||
114 | switch ( FPU_Special(st0_ptr) ) | ||
115 | { | ||
116 | case TW_Denormal: | ||
117 | c = SW_C2|SW_C3; /* Denormal */ | ||
118 | break; | ||
119 | case TW_NaN: | ||
120 | /* We also use NaN for unsupported types. */ | ||
121 | if ( (st0_ptr->sigh & 0x80000000) && (exponent(st0_ptr) == EXP_OVER) ) | ||
122 | c = SW_C0; | ||
123 | break; | ||
124 | case TW_Infinity: | ||
125 | c = SW_C2|SW_C0; | ||
126 | break; | ||
127 | } | ||
128 | } | ||
129 | if ( getsign(st0_ptr) == SIGN_NEG ) | ||
130 | c |= SW_C1; | ||
131 | setcc(c); | ||
132 | } | ||
133 | |||
134 | |||
135 | static FUNC_ST0 const fp_etc_table[] = { | ||
136 | fchs, fabs, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal, | ||
137 | ftst_, fxam, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal | ||
138 | }; | ||
139 | |||
140 | void FPU_etc(void) | ||
141 | { | ||
142 | (fp_etc_table[FPU_rm])(&st(0), FPU_gettag0()); | ||
143 | } | ||
diff --git a/arch/x86/math-emu/fpu_proto.h b/arch/x86/math-emu/fpu_proto.h new file mode 100644 index 000000000000..37a8a7fe7e2b --- /dev/null +++ b/arch/x86/math-emu/fpu_proto.h | |||
@@ -0,0 +1,140 @@ | |||
1 | #ifndef _FPU_PROTO_H | ||
2 | #define _FPU_PROTO_H | ||
3 | |||
4 | /* errors.c */ | ||
5 | extern void FPU_illegal(void); | ||
6 | extern void FPU_printall(void); | ||
7 | asmlinkage void FPU_exception(int n); | ||
8 | extern int real_1op_NaN(FPU_REG *a); | ||
9 | extern int real_2op_NaN(FPU_REG const *b, u_char tagb, int deststnr, | ||
10 | FPU_REG const *defaultNaN); | ||
11 | asmlinkage int arith_invalid(int deststnr); | ||
12 | asmlinkage int FPU_divide_by_zero(int deststnr, u_char sign); | ||
13 | extern int set_precision_flag(int flags); | ||
14 | asmlinkage void set_precision_flag_up(void); | ||
15 | asmlinkage void set_precision_flag_down(void); | ||
16 | asmlinkage int denormal_operand(void); | ||
17 | asmlinkage int arith_overflow(FPU_REG *dest); | ||
18 | asmlinkage int arith_underflow(FPU_REG *dest); | ||
19 | extern void FPU_stack_overflow(void); | ||
20 | extern void FPU_stack_underflow(void); | ||
21 | extern void FPU_stack_underflow_i(int i); | ||
22 | extern void FPU_stack_underflow_pop(int i); | ||
23 | /* fpu_arith.c */ | ||
24 | extern void fadd__(void); | ||
25 | extern void fmul__(void); | ||
26 | extern void fsub__(void); | ||
27 | extern void fsubr_(void); | ||
28 | extern void fdiv__(void); | ||
29 | extern void fdivr_(void); | ||
30 | extern void fadd_i(void); | ||
31 | extern void fmul_i(void); | ||
32 | extern void fsubri(void); | ||
33 | extern void fsub_i(void); | ||
34 | extern void fdivri(void); | ||
35 | extern void fdiv_i(void); | ||
36 | extern void faddp_(void); | ||
37 | extern void fmulp_(void); | ||
38 | extern void fsubrp(void); | ||
39 | extern void fsubp_(void); | ||
40 | extern void fdivrp(void); | ||
41 | extern void fdivp_(void); | ||
42 | /* fpu_aux.c */ | ||
43 | extern void finit(void); | ||
44 | extern void finit_(void); | ||
45 | extern void fstsw_(void); | ||
46 | extern void fp_nop(void); | ||
47 | extern void fld_i_(void); | ||
48 | extern void fxch_i(void); | ||
49 | extern void ffree_(void); | ||
50 | extern void ffreep(void); | ||
51 | extern void fst_i_(void); | ||
52 | extern void fstp_i(void); | ||
53 | /* fpu_entry.c */ | ||
54 | asmlinkage extern void math_emulate(long arg); | ||
55 | extern void math_abort(struct info *info, unsigned int signal); | ||
56 | /* fpu_etc.c */ | ||
57 | extern void FPU_etc(void); | ||
58 | /* fpu_tags.c */ | ||
59 | extern int FPU_gettag0(void); | ||
60 | extern int FPU_gettagi(int stnr); | ||
61 | extern int FPU_gettag(int regnr); | ||
62 | extern void FPU_settag0(int tag); | ||
63 | extern void FPU_settagi(int stnr, int tag); | ||
64 | extern void FPU_settag(int regnr, int tag); | ||
65 | extern int FPU_Special(FPU_REG const *ptr); | ||
66 | extern int isNaN(FPU_REG const *ptr); | ||
67 | extern void FPU_pop(void); | ||
68 | extern int FPU_empty_i(int stnr); | ||
69 | extern int FPU_stackoverflow(FPU_REG **st_new_ptr); | ||
70 | extern void FPU_copy_to_regi(FPU_REG const *r, u_char tag, int stnr); | ||
71 | extern void FPU_copy_to_reg1(FPU_REG const *r, u_char tag); | ||
72 | extern void FPU_copy_to_reg0(FPU_REG const *r, u_char tag); | ||
73 | /* fpu_trig.c */ | ||
74 | extern void FPU_triga(void); | ||
75 | extern void FPU_trigb(void); | ||
76 | /* get_address.c */ | ||
77 | extern void __user *FPU_get_address(u_char FPU_modrm, unsigned long *fpu_eip, | ||
78 | struct address *addr, fpu_addr_modes addr_modes); | ||
79 | extern void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip, | ||
80 | struct address *addr, fpu_addr_modes addr_modes); | ||
81 | /* load_store.c */ | ||
82 | extern int FPU_load_store(u_char type, fpu_addr_modes addr_modes, | ||
83 | void __user *data_address); | ||
84 | /* poly_2xm1.c */ | ||
85 | extern int poly_2xm1(u_char sign, FPU_REG *arg, FPU_REG *result); | ||
86 | /* poly_atan.c */ | ||
87 | extern void poly_atan(FPU_REG *st0_ptr, u_char st0_tag, FPU_REG *st1_ptr, | ||
88 | u_char st1_tag); | ||
89 | /* poly_l2.c */ | ||
90 | extern void poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign); | ||
91 | extern int poly_l2p1(u_char s0, u_char s1, FPU_REG *r0, FPU_REG *r1, | ||
92 | FPU_REG *d); | ||
93 | /* poly_sin.c */ | ||
94 | extern void poly_sine(FPU_REG *st0_ptr); | ||
95 | extern void poly_cos(FPU_REG *st0_ptr); | ||
96 | /* poly_tan.c */ | ||
97 | extern void poly_tan(FPU_REG *st0_ptr); | ||
98 | /* reg_add_sub.c */ | ||
99 | extern int FPU_add(FPU_REG const *b, u_char tagb, int destrnr, int control_w); | ||
100 | extern int FPU_sub(int flags, int rm, int control_w); | ||
101 | /* reg_compare.c */ | ||
102 | extern int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag); | ||
103 | extern void fcom_st(void); | ||
104 | extern void fcompst(void); | ||
105 | extern void fcompp(void); | ||
106 | extern void fucom_(void); | ||
107 | extern void fucomp(void); | ||
108 | extern void fucompp(void); | ||
109 | /* reg_constant.c */ | ||
110 | extern void fconst(void); | ||
111 | /* reg_ld_str.c */ | ||
112 | extern int FPU_load_extended(long double __user *s, int stnr); | ||
113 | extern int FPU_load_double(double __user *dfloat, FPU_REG *loaded_data); | ||
114 | extern int FPU_load_single(float __user *single, FPU_REG *loaded_data); | ||
115 | extern int FPU_load_int64(long long __user *_s); | ||
116 | extern int FPU_load_int32(long __user *_s, FPU_REG *loaded_data); | ||
117 | extern int FPU_load_int16(short __user *_s, FPU_REG *loaded_data); | ||
118 | extern int FPU_load_bcd(u_char __user *s); | ||
119 | extern int FPU_store_extended(FPU_REG *st0_ptr, u_char st0_tag, | ||
120 | long double __user *d); | ||
121 | extern int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, double __user *dfloat); | ||
122 | extern int FPU_store_single(FPU_REG *st0_ptr, u_char st0_tag, float __user *single); | ||
123 | extern int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag, long long __user *d); | ||
124 | extern int FPU_store_int32(FPU_REG *st0_ptr, u_char st0_tag, long __user *d); | ||
125 | extern int FPU_store_int16(FPU_REG *st0_ptr, u_char st0_tag, short __user *d); | ||
126 | extern int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d); | ||
127 | extern int FPU_round_to_int(FPU_REG *r, u_char tag); | ||
128 | extern u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user *s); | ||
129 | extern void frstor(fpu_addr_modes addr_modes, u_char __user *data_address); | ||
130 | extern u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d); | ||
131 | extern void fsave(fpu_addr_modes addr_modes, u_char __user *data_address); | ||
132 | extern int FPU_tagof(FPU_REG *ptr); | ||
133 | /* reg_mul.c */ | ||
134 | extern int FPU_mul(FPU_REG const *b, u_char tagb, int deststnr, int control_w); | ||
135 | |||
136 | extern int FPU_div(int flags, int regrm, int control_w); | ||
137 | /* reg_convert.c */ | ||
138 | extern int FPU_to_exp16(FPU_REG const *a, FPU_REG *x); | ||
139 | #endif /* _FPU_PROTO_H */ | ||
140 | |||
diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h new file mode 100644 index 000000000000..a3ae28c49ddd --- /dev/null +++ b/arch/x86/math-emu/fpu_system.h | |||
@@ -0,0 +1,90 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | fpu_system.h | | ||
3 | | | | ||
4 | | Copyright (C) 1992,1994,1997 | | ||
5 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | ||
6 | | Australia. E-mail billm@suburbia.net | | ||
7 | | | | ||
8 | +---------------------------------------------------------------------------*/ | ||
9 | |||
10 | #ifndef _FPU_SYSTEM_H | ||
11 | #define _FPU_SYSTEM_H | ||
12 | |||
13 | /* system dependent definitions */ | ||
14 | |||
15 | #include <linux/sched.h> | ||
16 | #include <linux/kernel.h> | ||
17 | #include <linux/mm.h> | ||
18 | |||
19 | /* This sets the pointer FPU_info to point to the argument part | ||
20 | of the stack frame of math_emulate() */ | ||
21 | #define SETUP_DATA_AREA(arg) FPU_info = (struct info *) &arg | ||
22 | |||
23 | /* s is always from a cpu register, and the cpu does bounds checking | ||
24 | * during register load --> no further bounds checks needed */ | ||
25 | #define LDT_DESCRIPTOR(s) (((struct desc_struct *)current->mm->context.ldt)[(s) >> 3]) | ||
26 | #define SEG_D_SIZE(x) ((x).b & (3 << 21)) | ||
27 | #define SEG_G_BIT(x) ((x).b & (1 << 23)) | ||
28 | #define SEG_GRANULARITY(x) (((x).b & (1 << 23)) ? 4096 : 1) | ||
29 | #define SEG_286_MODE(x) ((x).b & ( 0xff000000 | 0xf0000 | (1 << 23))) | ||
30 | #define SEG_BASE_ADDR(s) (((s).b & 0xff000000) \ | ||
31 | | (((s).b & 0xff) << 16) | ((s).a >> 16)) | ||
32 | #define SEG_LIMIT(s) (((s).b & 0xff0000) | ((s).a & 0xffff)) | ||
33 | #define SEG_EXECUTE_ONLY(s) (((s).b & ((1 << 11) | (1 << 9))) == (1 << 11)) | ||
34 | #define SEG_WRITE_PERM(s) (((s).b & ((1 << 11) | (1 << 9))) == (1 << 9)) | ||
35 | #define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \ | ||
36 | == (1 << 10)) | ||
37 | |||
38 | #define I387 (current->thread.i387) | ||
39 | #define FPU_info (I387.soft.info) | ||
40 | |||
41 | #define FPU_CS (*(unsigned short *) &(FPU_info->___cs)) | ||
42 | #define FPU_SS (*(unsigned short *) &(FPU_info->___ss)) | ||
43 | #define FPU_DS (*(unsigned short *) &(FPU_info->___ds)) | ||
44 | #define FPU_EAX (FPU_info->___eax) | ||
45 | #define FPU_EFLAGS (FPU_info->___eflags) | ||
46 | #define FPU_EIP (FPU_info->___eip) | ||
47 | #define FPU_ORIG_EIP (FPU_info->___orig_eip) | ||
48 | |||
49 | #define FPU_lookahead (I387.soft.lookahead) | ||
50 | |||
51 | /* nz if ip_offset and cs_selector are not to be set for the current | ||
52 | instruction. */ | ||
53 | #define no_ip_update (*(u_char *)&(I387.soft.no_update)) | ||
54 | #define FPU_rm (*(u_char *)&(I387.soft.rm)) | ||
55 | |||
56 | /* Number of bytes of data which can be legally accessed by the current | ||
57 | instruction. This only needs to hold a number <= 108, so a byte will do. */ | ||
58 | #define access_limit (*(u_char *)&(I387.soft.alimit)) | ||
59 | |||
60 | #define partial_status (I387.soft.swd) | ||
61 | #define control_word (I387.soft.cwd) | ||
62 | #define fpu_tag_word (I387.soft.twd) | ||
63 | #define registers (I387.soft.st_space) | ||
64 | #define top (I387.soft.ftop) | ||
65 | |||
66 | #define instruction_address (*(struct address *)&I387.soft.fip) | ||
67 | #define operand_address (*(struct address *)&I387.soft.foo) | ||
68 | |||
69 | #define FPU_access_ok(x,y,z) if ( !access_ok(x,y,z) ) \ | ||
70 | math_abort(FPU_info,SIGSEGV) | ||
71 | #define FPU_abort math_abort(FPU_info, SIGSEGV) | ||
72 | |||
73 | #undef FPU_IGNORE_CODE_SEGV | ||
74 | #ifdef FPU_IGNORE_CODE_SEGV | ||
75 | /* access_ok() is very expensive, and causes the emulator to run | ||
76 | about 20% slower if applied to the code. Anyway, errors due to bad | ||
77 | code addresses should be much rarer than errors due to bad data | ||
78 | addresses. */ | ||
79 | #define FPU_code_access_ok(z) | ||
80 | #else | ||
81 | /* A simpler test than access_ok() can probably be done for | ||
82 | FPU_code_access_ok() because the only possible error is to step | ||
83 | past the upper boundary of a legal code area. */ | ||
84 | #define FPU_code_access_ok(z) FPU_access_ok(VERIFY_READ,(void __user *)FPU_EIP,z) | ||
85 | #endif | ||
86 | |||
87 | #define FPU_get_user(x,y) get_user((x),(y)) | ||
88 | #define FPU_put_user(x,y) put_user((x),(y)) | ||
89 | |||
90 | #endif | ||
diff --git a/arch/x86/math-emu/fpu_tags.c b/arch/x86/math-emu/fpu_tags.c new file mode 100644 index 000000000000..cb436fe20e4c --- /dev/null +++ b/arch/x86/math-emu/fpu_tags.c | |||
@@ -0,0 +1,127 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | fpu_tags.c | | ||
3 | | | | ||
4 | | Set FPU register tags. | | ||
5 | | | | ||
6 | | Copyright (C) 1997 | | ||
7 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | | ||
8 | | E-mail billm@jacobi.maths.monash.edu.au | | ||
9 | | | | ||
10 | | | | ||
11 | +---------------------------------------------------------------------------*/ | ||
12 | |||
13 | #include "fpu_emu.h" | ||
14 | #include "fpu_system.h" | ||
15 | #include "exception.h" | ||
16 | |||
17 | |||
18 | void FPU_pop(void) | ||
19 | { | ||
20 | fpu_tag_word |= 3 << ((top & 7)*2); | ||
21 | top++; | ||
22 | } | ||
23 | |||
24 | |||
25 | int FPU_gettag0(void) | ||
26 | { | ||
27 | return (fpu_tag_word >> ((top & 7)*2)) & 3; | ||
28 | } | ||
29 | |||
30 | |||
31 | int FPU_gettagi(int stnr) | ||
32 | { | ||
33 | return (fpu_tag_word >> (((top+stnr) & 7)*2)) & 3; | ||
34 | } | ||
35 | |||
36 | |||
37 | int FPU_gettag(int regnr) | ||
38 | { | ||
39 | return (fpu_tag_word >> ((regnr & 7)*2)) & 3; | ||
40 | } | ||
41 | |||
42 | |||
43 | void FPU_settag0(int tag) | ||
44 | { | ||
45 | int regnr = top; | ||
46 | regnr &= 7; | ||
47 | fpu_tag_word &= ~(3 << (regnr*2)); | ||
48 | fpu_tag_word |= (tag & 3) << (regnr*2); | ||
49 | } | ||
50 | |||
51 | |||
52 | void FPU_settagi(int stnr, int tag) | ||
53 | { | ||
54 | int regnr = stnr+top; | ||
55 | regnr &= 7; | ||
56 | fpu_tag_word &= ~(3 << (regnr*2)); | ||
57 | fpu_tag_word |= (tag & 3) << (regnr*2); | ||
58 | } | ||
59 | |||
60 | |||
61 | void FPU_settag(int regnr, int tag) | ||
62 | { | ||
63 | regnr &= 7; | ||
64 | fpu_tag_word &= ~(3 << (regnr*2)); | ||
65 | fpu_tag_word |= (tag & 3) << (regnr*2); | ||
66 | } | ||
67 | |||
68 | |||
69 | int FPU_Special(FPU_REG const *ptr) | ||
70 | { | ||
71 | int exp = exponent(ptr); | ||
72 | |||
73 | if ( exp == EXP_BIAS+EXP_UNDER ) | ||
74 | return TW_Denormal; | ||
75 | else if ( exp != EXP_BIAS+EXP_OVER ) | ||
76 | return TW_NaN; | ||
77 | else if ( (ptr->sigh == 0x80000000) && (ptr->sigl == 0) ) | ||
78 | return TW_Infinity; | ||
79 | return TW_NaN; | ||
80 | } | ||
81 | |||
82 | |||
83 | int isNaN(FPU_REG const *ptr) | ||
84 | { | ||
85 | return ( (exponent(ptr) == EXP_BIAS+EXP_OVER) | ||
86 | && !((ptr->sigh == 0x80000000) && (ptr->sigl == 0)) ); | ||
87 | } | ||
88 | |||
89 | |||
90 | int FPU_empty_i(int stnr) | ||
91 | { | ||
92 | int regnr = (top+stnr) & 7; | ||
93 | |||
94 | return ((fpu_tag_word >> (regnr*2)) & 3) == TAG_Empty; | ||
95 | } | ||
96 | |||
97 | |||
98 | int FPU_stackoverflow(FPU_REG **st_new_ptr) | ||
99 | { | ||
100 | *st_new_ptr = &st(-1); | ||
101 | |||
102 | return ((fpu_tag_word >> (((top - 1) & 7)*2)) & 3) != TAG_Empty; | ||
103 | } | ||
104 | |||
105 | |||
106 | void FPU_copy_to_regi(FPU_REG const *r, u_char tag, int stnr) | ||
107 | { | ||
108 | reg_copy(r, &st(stnr)); | ||
109 | FPU_settagi(stnr, tag); | ||
110 | } | ||
111 | |||
112 | void FPU_copy_to_reg1(FPU_REG const *r, u_char tag) | ||
113 | { | ||
114 | reg_copy(r, &st(1)); | ||
115 | FPU_settagi(1, tag); | ||
116 | } | ||
117 | |||
118 | void FPU_copy_to_reg0(FPU_REG const *r, u_char tag) | ||
119 | { | ||
120 | int regnr = top; | ||
121 | regnr &= 7; | ||
122 | |||
123 | reg_copy(r, &st(0)); | ||
124 | |||
125 | fpu_tag_word &= ~(3 << (regnr*2)); | ||
126 | fpu_tag_word |= (tag & 3) << (regnr*2); | ||
127 | } | ||
diff --git a/arch/x86/math-emu/fpu_trig.c b/arch/x86/math-emu/fpu_trig.c new file mode 100644 index 000000000000..403cbde1d425 --- /dev/null +++ b/arch/x86/math-emu/fpu_trig.c | |||
@@ -0,0 +1,1845 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | fpu_trig.c | | ||
3 | | | | ||
4 | | Implementation of the FPU "transcendental" functions. | | ||
5 | | | | ||
6 | | Copyright (C) 1992,1993,1994,1997,1999 | | ||
7 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | ||
8 | | Australia. E-mail billm@melbpc.org.au | | ||
9 | | | | ||
10 | | | | ||
11 | +---------------------------------------------------------------------------*/ | ||
12 | |||
13 | #include "fpu_system.h" | ||
14 | #include "exception.h" | ||
15 | #include "fpu_emu.h" | ||
16 | #include "status_w.h" | ||
17 | #include "control_w.h" | ||
18 | #include "reg_constant.h" | ||
19 | |||
20 | static void rem_kernel(unsigned long long st0, unsigned long long *y, | ||
21 | unsigned long long st1, | ||
22 | unsigned long long q, int n); | ||
23 | |||
24 | #define BETTER_THAN_486 | ||
25 | |||
26 | #define FCOS 4 | ||
27 | |||
28 | /* Used only by fptan, fsin, fcos, and fsincos. */ | ||
29 | /* This routine produces very accurate results, similar to | ||
30 | using a value of pi with more than 128 bits precision. */ | ||
31 | /* Limited measurements show no results worse than 64 bit precision | ||
32 | except for the results for arguments close to 2^63, where the | ||
33 | precision of the result sometimes degrades to about 63.9 bits */ | ||
34 | static int trig_arg(FPU_REG *st0_ptr, int even) | ||
35 | { | ||
36 | FPU_REG tmp; | ||
37 | u_char tmptag; | ||
38 | unsigned long long q; | ||
39 | int old_cw = control_word, saved_status = partial_status; | ||
40 | int tag, st0_tag = TAG_Valid; | ||
41 | |||
42 | if ( exponent(st0_ptr) >= 63 ) | ||
43 | { | ||
44 | partial_status |= SW_C2; /* Reduction incomplete. */ | ||
45 | return -1; | ||
46 | } | ||
47 | |||
48 | control_word &= ~CW_RC; | ||
49 | control_word |= RC_CHOP; | ||
50 | |||
51 | setpositive(st0_ptr); | ||
52 | tag = FPU_u_div(st0_ptr, &CONST_PI2, &tmp, PR_64_BITS | RC_CHOP | 0x3f, | ||
53 | SIGN_POS); | ||
54 | |||
55 | FPU_round_to_int(&tmp, tag); /* Fortunately, this can't overflow | ||
56 | to 2^64 */ | ||
57 | q = significand(&tmp); | ||
58 | if ( q ) | ||
59 | { | ||
60 | rem_kernel(significand(st0_ptr), | ||
61 | &significand(&tmp), | ||
62 | significand(&CONST_PI2), | ||
63 | q, exponent(st0_ptr) - exponent(&CONST_PI2)); | ||
64 | setexponent16(&tmp, exponent(&CONST_PI2)); | ||
65 | st0_tag = FPU_normalize(&tmp); | ||
66 | FPU_copy_to_reg0(&tmp, st0_tag); | ||
67 | } | ||
68 | |||
69 | if ( (even && !(q & 1)) || (!even && (q & 1)) ) | ||
70 | { | ||
71 | st0_tag = FPU_sub(REV|LOADED|TAG_Valid, (int)&CONST_PI2, FULL_PRECISION); | ||
72 | |||
73 | #ifdef BETTER_THAN_486 | ||
74 | /* So far, the results are exact but based upon a 64 bit | ||
75 | precision approximation to pi/2. The technique used | ||
76 | now is equivalent to using an approximation to pi/2 which | ||
77 | is accurate to about 128 bits. */ | ||
78 | if ( (exponent(st0_ptr) <= exponent(&CONST_PI2extra) + 64) || (q > 1) ) | ||
79 | { | ||
80 | /* This code gives the effect of having pi/2 to better than | ||
81 | 128 bits precision. */ | ||
82 | |||
83 | significand(&tmp) = q + 1; | ||
84 | setexponent16(&tmp, 63); | ||
85 | FPU_normalize(&tmp); | ||
86 | tmptag = | ||
87 | FPU_u_mul(&CONST_PI2extra, &tmp, &tmp, FULL_PRECISION, SIGN_POS, | ||
88 | exponent(&CONST_PI2extra) + exponent(&tmp)); | ||
89 | setsign(&tmp, getsign(&CONST_PI2extra)); | ||
90 | st0_tag = FPU_add(&tmp, tmptag, 0, FULL_PRECISION); | ||
91 | if ( signnegative(st0_ptr) ) | ||
92 | { | ||
93 | /* CONST_PI2extra is negative, so the result of the addition | ||
94 | can be negative. This means that the argument is actually | ||
95 | in a different quadrant. The correction is always < pi/2, | ||
96 | so it can't overflow into yet another quadrant. */ | ||
97 | setpositive(st0_ptr); | ||
98 | q++; | ||
99 | } | ||
100 | } | ||
101 | #endif /* BETTER_THAN_486 */ | ||
102 | } | ||
103 | #ifdef BETTER_THAN_486 | ||
104 | else | ||
105 | { | ||
106 | /* So far, the results are exact but based upon a 64 bit | ||
107 | precision approximation to pi/2. The technique used | ||
108 | now is equivalent to using an approximation to pi/2 which | ||
109 | is accurate to about 128 bits. */ | ||
110 | if ( ((q > 0) && (exponent(st0_ptr) <= exponent(&CONST_PI2extra) + 64)) | ||
111 | || (q > 1) ) | ||
112 | { | ||
113 | /* This code gives the effect of having p/2 to better than | ||
114 | 128 bits precision. */ | ||
115 | |||
116 | significand(&tmp) = q; | ||
117 | setexponent16(&tmp, 63); | ||
118 | FPU_normalize(&tmp); /* This must return TAG_Valid */ | ||
119 | tmptag = FPU_u_mul(&CONST_PI2extra, &tmp, &tmp, FULL_PRECISION, | ||
120 | SIGN_POS, | ||
121 | exponent(&CONST_PI2extra) + exponent(&tmp)); | ||
122 | setsign(&tmp, getsign(&CONST_PI2extra)); | ||
123 | st0_tag = FPU_sub(LOADED|(tmptag & 0x0f), (int)&tmp, | ||
124 | FULL_PRECISION); | ||
125 | if ( (exponent(st0_ptr) == exponent(&CONST_PI2)) && | ||
126 | ((st0_ptr->sigh > CONST_PI2.sigh) | ||
127 | || ((st0_ptr->sigh == CONST_PI2.sigh) | ||
128 | && (st0_ptr->sigl > CONST_PI2.sigl))) ) | ||
129 | { | ||
130 | /* CONST_PI2extra is negative, so the result of the | ||
131 | subtraction can be larger than pi/2. This means | ||
132 | that the argument is actually in a different quadrant. | ||
133 | The correction is always < pi/2, so it can't overflow | ||
134 | into yet another quadrant. */ | ||
135 | st0_tag = FPU_sub(REV|LOADED|TAG_Valid, (int)&CONST_PI2, | ||
136 | FULL_PRECISION); | ||
137 | q++; | ||
138 | } | ||
139 | } | ||
140 | } | ||
141 | #endif /* BETTER_THAN_486 */ | ||
142 | |||
143 | FPU_settag0(st0_tag); | ||
144 | control_word = old_cw; | ||
145 | partial_status = saved_status & ~SW_C2; /* Reduction complete. */ | ||
146 | |||
147 | return (q & 3) | even; | ||
148 | } | ||
149 | |||
150 | |||
151 | /* Convert a long to register */ | ||
152 | static void convert_l2reg(long const *arg, int deststnr) | ||
153 | { | ||
154 | int tag; | ||
155 | long num = *arg; | ||
156 | u_char sign; | ||
157 | FPU_REG *dest = &st(deststnr); | ||
158 | |||
159 | if (num == 0) | ||
160 | { | ||
161 | FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); | ||
162 | return; | ||
163 | } | ||
164 | |||
165 | if (num > 0) | ||
166 | { sign = SIGN_POS; } | ||
167 | else | ||
168 | { num = -num; sign = SIGN_NEG; } | ||
169 | |||
170 | dest->sigh = num; | ||
171 | dest->sigl = 0; | ||
172 | setexponent16(dest, 31); | ||
173 | tag = FPU_normalize(dest); | ||
174 | FPU_settagi(deststnr, tag); | ||
175 | setsign(dest, sign); | ||
176 | return; | ||
177 | } | ||
178 | |||
179 | |||
180 | static void single_arg_error(FPU_REG *st0_ptr, u_char st0_tag) | ||
181 | { | ||
182 | if ( st0_tag == TAG_Empty ) | ||
183 | FPU_stack_underflow(); /* Puts a QNaN in st(0) */ | ||
184 | else if ( st0_tag == TW_NaN ) | ||
185 | real_1op_NaN(st0_ptr); /* return with a NaN in st(0) */ | ||
186 | #ifdef PARANOID | ||
187 | else | ||
188 | EXCEPTION(EX_INTERNAL|0x0112); | ||
189 | #endif /* PARANOID */ | ||
190 | } | ||
191 | |||
192 | |||
193 | static void single_arg_2_error(FPU_REG *st0_ptr, u_char st0_tag) | ||
194 | { | ||
195 | int isNaN; | ||
196 | |||
197 | switch ( st0_tag ) | ||
198 | { | ||
199 | case TW_NaN: | ||
200 | isNaN = (exponent(st0_ptr) == EXP_OVER) && (st0_ptr->sigh & 0x80000000); | ||
201 | if ( isNaN && !(st0_ptr->sigh & 0x40000000) ) /* Signaling ? */ | ||
202 | { | ||
203 | EXCEPTION(EX_Invalid); | ||
204 | if ( control_word & CW_Invalid ) | ||
205 | { | ||
206 | /* The masked response */ | ||
207 | /* Convert to a QNaN */ | ||
208 | st0_ptr->sigh |= 0x40000000; | ||
209 | push(); | ||
210 | FPU_copy_to_reg0(st0_ptr, TAG_Special); | ||
211 | } | ||
212 | } | ||
213 | else if ( isNaN ) | ||
214 | { | ||
215 | /* A QNaN */ | ||
216 | push(); | ||
217 | FPU_copy_to_reg0(st0_ptr, TAG_Special); | ||
218 | } | ||
219 | else | ||
220 | { | ||
221 | /* pseudoNaN or other unsupported */ | ||
222 | EXCEPTION(EX_Invalid); | ||
223 | if ( control_word & CW_Invalid ) | ||
224 | { | ||
225 | /* The masked response */ | ||
226 | FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); | ||
227 | push(); | ||
228 | FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); | ||
229 | } | ||
230 | } | ||
231 | break; /* return with a NaN in st(0) */ | ||
232 | #ifdef PARANOID | ||
233 | default: | ||
234 | EXCEPTION(EX_INTERNAL|0x0112); | ||
235 | #endif /* PARANOID */ | ||
236 | } | ||
237 | } | ||
238 | |||
239 | |||
240 | /*---------------------------------------------------------------------------*/ | ||
241 | |||
242 | static void f2xm1(FPU_REG *st0_ptr, u_char tag) | ||
243 | { | ||
244 | FPU_REG a; | ||
245 | |||
246 | clear_C1(); | ||
247 | |||
248 | if ( tag == TAG_Valid ) | ||
249 | { | ||
250 | /* For an 80486 FPU, the result is undefined if the arg is >= 1.0 */ | ||
251 | if ( exponent(st0_ptr) < 0 ) | ||
252 | { | ||
253 | denormal_arg: | ||
254 | |||
255 | FPU_to_exp16(st0_ptr, &a); | ||
256 | |||
257 | /* poly_2xm1(x) requires 0 < st(0) < 1. */ | ||
258 | poly_2xm1(getsign(st0_ptr), &a, st0_ptr); | ||
259 | } | ||
260 | set_precision_flag_up(); /* 80486 appears to always do this */ | ||
261 | return; | ||
262 | } | ||
263 | |||
264 | if ( tag == TAG_Zero ) | ||
265 | return; | ||
266 | |||
267 | if ( tag == TAG_Special ) | ||
268 | tag = FPU_Special(st0_ptr); | ||
269 | |||
270 | switch ( tag ) | ||
271 | { | ||
272 | case TW_Denormal: | ||
273 | if ( denormal_operand() < 0 ) | ||
274 | return; | ||
275 | goto denormal_arg; | ||
276 | case TW_Infinity: | ||
277 | if ( signnegative(st0_ptr) ) | ||
278 | { | ||
279 | /* -infinity gives -1 (p16-10) */ | ||
280 | FPU_copy_to_reg0(&CONST_1, TAG_Valid); | ||
281 | setnegative(st0_ptr); | ||
282 | } | ||
283 | return; | ||
284 | default: | ||
285 | single_arg_error(st0_ptr, tag); | ||
286 | } | ||
287 | } | ||
288 | |||
289 | |||
290 | static void fptan(FPU_REG *st0_ptr, u_char st0_tag) | ||
291 | { | ||
292 | FPU_REG *st_new_ptr; | ||
293 | int q; | ||
294 | u_char arg_sign = getsign(st0_ptr); | ||
295 | |||
296 | /* Stack underflow has higher priority */ | ||
297 | if ( st0_tag == TAG_Empty ) | ||
298 | { | ||
299 | FPU_stack_underflow(); /* Puts a QNaN in st(0) */ | ||
300 | if ( control_word & CW_Invalid ) | ||
301 | { | ||
302 | st_new_ptr = &st(-1); | ||
303 | push(); | ||
304 | FPU_stack_underflow(); /* Puts a QNaN in the new st(0) */ | ||
305 | } | ||
306 | return; | ||
307 | } | ||
308 | |||
309 | if ( STACK_OVERFLOW ) | ||
310 | { FPU_stack_overflow(); return; } | ||
311 | |||
312 | if ( st0_tag == TAG_Valid ) | ||
313 | { | ||
314 | if ( exponent(st0_ptr) > -40 ) | ||
315 | { | ||
316 | if ( (q = trig_arg(st0_ptr, 0)) == -1 ) | ||
317 | { | ||
318 | /* Operand is out of range */ | ||
319 | return; | ||
320 | } | ||
321 | |||
322 | poly_tan(st0_ptr); | ||
323 | setsign(st0_ptr, (q & 1) ^ (arg_sign != 0)); | ||
324 | set_precision_flag_up(); /* We do not really know if up or down */ | ||
325 | } | ||
326 | else | ||
327 | { | ||
328 | /* For a small arg, the result == the argument */ | ||
329 | /* Underflow may happen */ | ||
330 | |||
331 | denormal_arg: | ||
332 | |||
333 | FPU_to_exp16(st0_ptr, st0_ptr); | ||
334 | |||
335 | st0_tag = FPU_round(st0_ptr, 1, 0, FULL_PRECISION, arg_sign); | ||
336 | FPU_settag0(st0_tag); | ||
337 | } | ||
338 | push(); | ||
339 | FPU_copy_to_reg0(&CONST_1, TAG_Valid); | ||
340 | return; | ||
341 | } | ||
342 | |||
343 | if ( st0_tag == TAG_Zero ) | ||
344 | { | ||
345 | push(); | ||
346 | FPU_copy_to_reg0(&CONST_1, TAG_Valid); | ||
347 | setcc(0); | ||
348 | return; | ||
349 | } | ||
350 | |||
351 | if ( st0_tag == TAG_Special ) | ||
352 | st0_tag = FPU_Special(st0_ptr); | ||
353 | |||
354 | if ( st0_tag == TW_Denormal ) | ||
355 | { | ||
356 | if ( denormal_operand() < 0 ) | ||
357 | return; | ||
358 | |||
359 | goto denormal_arg; | ||
360 | } | ||
361 | |||
362 | if ( st0_tag == TW_Infinity ) | ||
363 | { | ||
364 | /* The 80486 treats infinity as an invalid operand */ | ||
365 | if ( arith_invalid(0) >= 0 ) | ||
366 | { | ||
367 | st_new_ptr = &st(-1); | ||
368 | push(); | ||
369 | arith_invalid(0); | ||
370 | } | ||
371 | return; | ||
372 | } | ||
373 | |||
374 | single_arg_2_error(st0_ptr, st0_tag); | ||
375 | } | ||
376 | |||
377 | |||
378 | static void fxtract(FPU_REG *st0_ptr, u_char st0_tag) | ||
379 | { | ||
380 | FPU_REG *st_new_ptr; | ||
381 | u_char sign; | ||
382 | register FPU_REG *st1_ptr = st0_ptr; /* anticipate */ | ||
383 | |||
384 | if ( STACK_OVERFLOW ) | ||
385 | { FPU_stack_overflow(); return; } | ||
386 | |||
387 | clear_C1(); | ||
388 | |||
389 | if ( st0_tag == TAG_Valid ) | ||
390 | { | ||
391 | long e; | ||
392 | |||
393 | push(); | ||
394 | sign = getsign(st1_ptr); | ||
395 | reg_copy(st1_ptr, st_new_ptr); | ||
396 | setexponent16(st_new_ptr, exponent(st_new_ptr)); | ||
397 | |||
398 | denormal_arg: | ||
399 | |||
400 | e = exponent16(st_new_ptr); | ||
401 | convert_l2reg(&e, 1); | ||
402 | setexponentpos(st_new_ptr, 0); | ||
403 | setsign(st_new_ptr, sign); | ||
404 | FPU_settag0(TAG_Valid); /* Needed if arg was a denormal */ | ||
405 | return; | ||
406 | } | ||
407 | else if ( st0_tag == TAG_Zero ) | ||
408 | { | ||
409 | sign = getsign(st0_ptr); | ||
410 | |||
411 | if ( FPU_divide_by_zero(0, SIGN_NEG) < 0 ) | ||
412 | return; | ||
413 | |||
414 | push(); | ||
415 | FPU_copy_to_reg0(&CONST_Z, TAG_Zero); | ||
416 | setsign(st_new_ptr, sign); | ||
417 | return; | ||
418 | } | ||
419 | |||
420 | if ( st0_tag == TAG_Special ) | ||
421 | st0_tag = FPU_Special(st0_ptr); | ||
422 | |||
423 | if ( st0_tag == TW_Denormal ) | ||
424 | { | ||
425 | if (denormal_operand() < 0 ) | ||
426 | return; | ||
427 | |||
428 | push(); | ||
429 | sign = getsign(st1_ptr); | ||
430 | FPU_to_exp16(st1_ptr, st_new_ptr); | ||
431 | goto denormal_arg; | ||
432 | } | ||
433 | else if ( st0_tag == TW_Infinity ) | ||
434 | { | ||
435 | sign = getsign(st0_ptr); | ||
436 | setpositive(st0_ptr); | ||
437 | push(); | ||
438 | FPU_copy_to_reg0(&CONST_INF, TAG_Special); | ||
439 | setsign(st_new_ptr, sign); | ||
440 | return; | ||
441 | } | ||
442 | else if ( st0_tag == TW_NaN ) | ||
443 | { | ||
444 | if ( real_1op_NaN(st0_ptr) < 0 ) | ||
445 | return; | ||
446 | |||
447 | push(); | ||
448 | FPU_copy_to_reg0(st0_ptr, TAG_Special); | ||
449 | return; | ||
450 | } | ||
451 | else if ( st0_tag == TAG_Empty ) | ||
452 | { | ||
453 | /* Is this the correct behaviour? */ | ||
454 | if ( control_word & EX_Invalid ) | ||
455 | { | ||
456 | FPU_stack_underflow(); | ||
457 | push(); | ||
458 | FPU_stack_underflow(); | ||
459 | } | ||
460 | else | ||
461 | EXCEPTION(EX_StackUnder); | ||
462 | } | ||
463 | #ifdef PARANOID | ||
464 | else | ||
465 | EXCEPTION(EX_INTERNAL | 0x119); | ||
466 | #endif /* PARANOID */ | ||
467 | } | ||
468 | |||
469 | |||
470 | static void fdecstp(void) | ||
471 | { | ||
472 | clear_C1(); | ||
473 | top--; | ||
474 | } | ||
475 | |||
476 | static void fincstp(void) | ||
477 | { | ||
478 | clear_C1(); | ||
479 | top++; | ||
480 | } | ||
481 | |||
482 | |||
483 | static void fsqrt_(FPU_REG *st0_ptr, u_char st0_tag) | ||
484 | { | ||
485 | int expon; | ||
486 | |||
487 | clear_C1(); | ||
488 | |||
489 | if ( st0_tag == TAG_Valid ) | ||
490 | { | ||
491 | u_char tag; | ||
492 | |||
493 | if (signnegative(st0_ptr)) | ||
494 | { | ||
495 | arith_invalid(0); /* sqrt(negative) is invalid */ | ||
496 | return; | ||
497 | } | ||
498 | |||
499 | /* make st(0) in [1.0 .. 4.0) */ | ||
500 | expon = exponent(st0_ptr); | ||
501 | |||
502 | denormal_arg: | ||
503 | |||
504 | setexponent16(st0_ptr, (expon & 1)); | ||
505 | |||
506 | /* Do the computation, the sign of the result will be positive. */ | ||
507 | tag = wm_sqrt(st0_ptr, 0, 0, control_word, SIGN_POS); | ||
508 | addexponent(st0_ptr, expon >> 1); | ||
509 | FPU_settag0(tag); | ||
510 | return; | ||
511 | } | ||
512 | |||
513 | if ( st0_tag == TAG_Zero ) | ||
514 | return; | ||
515 | |||
516 | if ( st0_tag == TAG_Special ) | ||
517 | st0_tag = FPU_Special(st0_ptr); | ||
518 | |||
519 | if ( st0_tag == TW_Infinity ) | ||
520 | { | ||
521 | if ( signnegative(st0_ptr) ) | ||
522 | arith_invalid(0); /* sqrt(-Infinity) is invalid */ | ||
523 | return; | ||
524 | } | ||
525 | else if ( st0_tag == TW_Denormal ) | ||
526 | { | ||
527 | if (signnegative(st0_ptr)) | ||
528 | { | ||
529 | arith_invalid(0); /* sqrt(negative) is invalid */ | ||
530 | return; | ||
531 | } | ||
532 | |||
533 | if ( denormal_operand() < 0 ) | ||
534 | return; | ||
535 | |||
536 | FPU_to_exp16(st0_ptr, st0_ptr); | ||
537 | |||
538 | expon = exponent16(st0_ptr); | ||
539 | |||
540 | goto denormal_arg; | ||
541 | } | ||
542 | |||
543 | single_arg_error(st0_ptr, st0_tag); | ||
544 | |||
545 | } | ||
546 | |||
547 | |||
548 | static void frndint_(FPU_REG *st0_ptr, u_char st0_tag) | ||
549 | { | ||
550 | int flags, tag; | ||
551 | |||
552 | if ( st0_tag == TAG_Valid ) | ||
553 | { | ||
554 | u_char sign; | ||
555 | |||
556 | denormal_arg: | ||
557 | |||
558 | sign = getsign(st0_ptr); | ||
559 | |||
560 | if (exponent(st0_ptr) > 63) | ||
561 | return; | ||
562 | |||
563 | if ( st0_tag == TW_Denormal ) | ||
564 | { | ||
565 | if (denormal_operand() < 0 ) | ||
566 | return; | ||
567 | } | ||
568 | |||
569 | /* Fortunately, this can't overflow to 2^64 */ | ||
570 | if ( (flags = FPU_round_to_int(st0_ptr, st0_tag)) ) | ||
571 | set_precision_flag(flags); | ||
572 | |||
573 | setexponent16(st0_ptr, 63); | ||
574 | tag = FPU_normalize(st0_ptr); | ||
575 | setsign(st0_ptr, sign); | ||
576 | FPU_settag0(tag); | ||
577 | return; | ||
578 | } | ||
579 | |||
580 | if ( st0_tag == TAG_Zero ) | ||
581 | return; | ||
582 | |||
583 | if ( st0_tag == TAG_Special ) | ||
584 | st0_tag = FPU_Special(st0_ptr); | ||
585 | |||
586 | if ( st0_tag == TW_Denormal ) | ||
587 | goto denormal_arg; | ||
588 | else if ( st0_tag == TW_Infinity ) | ||
589 | return; | ||
590 | else | ||
591 | single_arg_error(st0_ptr, st0_tag); | ||
592 | } | ||
593 | |||
594 | |||
595 | static int fsin(FPU_REG *st0_ptr, u_char tag) | ||
596 | { | ||
597 | u_char arg_sign = getsign(st0_ptr); | ||
598 | |||
599 | if ( tag == TAG_Valid ) | ||
600 | { | ||
601 | int q; | ||
602 | |||
603 | if ( exponent(st0_ptr) > -40 ) | ||
604 | { | ||
605 | if ( (q = trig_arg(st0_ptr, 0)) == -1 ) | ||
606 | { | ||
607 | /* Operand is out of range */ | ||
608 | return 1; | ||
609 | } | ||
610 | |||
611 | poly_sine(st0_ptr); | ||
612 | |||
613 | if (q & 2) | ||
614 | changesign(st0_ptr); | ||
615 | |||
616 | setsign(st0_ptr, getsign(st0_ptr) ^ arg_sign); | ||
617 | |||
618 | /* We do not really know if up or down */ | ||
619 | set_precision_flag_up(); | ||
620 | return 0; | ||
621 | } | ||
622 | else | ||
623 | { | ||
624 | /* For a small arg, the result == the argument */ | ||
625 | set_precision_flag_up(); /* Must be up. */ | ||
626 | return 0; | ||
627 | } | ||
628 | } | ||
629 | |||
630 | if ( tag == TAG_Zero ) | ||
631 | { | ||
632 | setcc(0); | ||
633 | return 0; | ||
634 | } | ||
635 | |||
636 | if ( tag == TAG_Special ) | ||
637 | tag = FPU_Special(st0_ptr); | ||
638 | |||
639 | if ( tag == TW_Denormal ) | ||
640 | { | ||
641 | if ( denormal_operand() < 0 ) | ||
642 | return 1; | ||
643 | |||
644 | /* For a small arg, the result == the argument */ | ||
645 | /* Underflow may happen */ | ||
646 | FPU_to_exp16(st0_ptr, st0_ptr); | ||
647 | |||
648 | tag = FPU_round(st0_ptr, 1, 0, FULL_PRECISION, arg_sign); | ||
649 | |||
650 | FPU_settag0(tag); | ||
651 | |||
652 | return 0; | ||
653 | } | ||
654 | else if ( tag == TW_Infinity ) | ||
655 | { | ||
656 | /* The 80486 treats infinity as an invalid operand */ | ||
657 | arith_invalid(0); | ||
658 | return 1; | ||
659 | } | ||
660 | else | ||
661 | { | ||
662 | single_arg_error(st0_ptr, tag); | ||
663 | return 1; | ||
664 | } | ||
665 | } | ||
666 | |||
667 | |||
668 | static int f_cos(FPU_REG *st0_ptr, u_char tag) | ||
669 | { | ||
670 | u_char st0_sign; | ||
671 | |||
672 | st0_sign = getsign(st0_ptr); | ||
673 | |||
674 | if ( tag == TAG_Valid ) | ||
675 | { | ||
676 | int q; | ||
677 | |||
678 | if ( exponent(st0_ptr) > -40 ) | ||
679 | { | ||
680 | if ( (exponent(st0_ptr) < 0) | ||
681 | || ((exponent(st0_ptr) == 0) | ||
682 | && (significand(st0_ptr) <= 0xc90fdaa22168c234LL)) ) | ||
683 | { | ||
684 | poly_cos(st0_ptr); | ||
685 | |||
686 | /* We do not really know if up or down */ | ||
687 | set_precision_flag_down(); | ||
688 | |||
689 | return 0; | ||
690 | } | ||
691 | else if ( (q = trig_arg(st0_ptr, FCOS)) != -1 ) | ||
692 | { | ||
693 | poly_sine(st0_ptr); | ||
694 | |||
695 | if ((q+1) & 2) | ||
696 | changesign(st0_ptr); | ||
697 | |||
698 | /* We do not really know if up or down */ | ||
699 | set_precision_flag_down(); | ||
700 | |||
701 | return 0; | ||
702 | } | ||
703 | else | ||
704 | { | ||
705 | /* Operand is out of range */ | ||
706 | return 1; | ||
707 | } | ||
708 | } | ||
709 | else | ||
710 | { | ||
711 | denormal_arg: | ||
712 | |||
713 | setcc(0); | ||
714 | FPU_copy_to_reg0(&CONST_1, TAG_Valid); | ||
715 | #ifdef PECULIAR_486 | ||
716 | set_precision_flag_down(); /* 80486 appears to do this. */ | ||
717 | #else | ||
718 | set_precision_flag_up(); /* Must be up. */ | ||
719 | #endif /* PECULIAR_486 */ | ||
720 | return 0; | ||
721 | } | ||
722 | } | ||
723 | else if ( tag == TAG_Zero ) | ||
724 | { | ||
725 | FPU_copy_to_reg0(&CONST_1, TAG_Valid); | ||
726 | setcc(0); | ||
727 | return 0; | ||
728 | } | ||
729 | |||
730 | if ( tag == TAG_Special ) | ||
731 | tag = FPU_Special(st0_ptr); | ||
732 | |||
733 | if ( tag == TW_Denormal ) | ||
734 | { | ||
735 | if ( denormal_operand() < 0 ) | ||
736 | return 1; | ||
737 | |||
738 | goto denormal_arg; | ||
739 | } | ||
740 | else if ( tag == TW_Infinity ) | ||
741 | { | ||
742 | /* The 80486 treats infinity as an invalid operand */ | ||
743 | arith_invalid(0); | ||
744 | return 1; | ||
745 | } | ||
746 | else | ||
747 | { | ||
748 | single_arg_error(st0_ptr, tag); /* requires st0_ptr == &st(0) */ | ||
749 | return 1; | ||
750 | } | ||
751 | } | ||
752 | |||
753 | |||
754 | static void fcos(FPU_REG *st0_ptr, u_char st0_tag) | ||
755 | { | ||
756 | f_cos(st0_ptr, st0_tag); | ||
757 | } | ||
758 | |||
759 | |||
760 | static void fsincos(FPU_REG *st0_ptr, u_char st0_tag) | ||
761 | { | ||
762 | FPU_REG *st_new_ptr; | ||
763 | FPU_REG arg; | ||
764 | u_char tag; | ||
765 | |||
766 | /* Stack underflow has higher priority */ | ||
767 | if ( st0_tag == TAG_Empty ) | ||
768 | { | ||
769 | FPU_stack_underflow(); /* Puts a QNaN in st(0) */ | ||
770 | if ( control_word & CW_Invalid ) | ||
771 | { | ||
772 | st_new_ptr = &st(-1); | ||
773 | push(); | ||
774 | FPU_stack_underflow(); /* Puts a QNaN in the new st(0) */ | ||
775 | } | ||
776 | return; | ||
777 | } | ||
778 | |||
779 | if ( STACK_OVERFLOW ) | ||
780 | { FPU_stack_overflow(); return; } | ||
781 | |||
782 | if ( st0_tag == TAG_Special ) | ||
783 | tag = FPU_Special(st0_ptr); | ||
784 | else | ||
785 | tag = st0_tag; | ||
786 | |||
787 | if ( tag == TW_NaN ) | ||
788 | { | ||
789 | single_arg_2_error(st0_ptr, TW_NaN); | ||
790 | return; | ||
791 | } | ||
792 | else if ( tag == TW_Infinity ) | ||
793 | { | ||
794 | /* The 80486 treats infinity as an invalid operand */ | ||
795 | if ( arith_invalid(0) >= 0 ) | ||
796 | { | ||
797 | /* Masked response */ | ||
798 | push(); | ||
799 | arith_invalid(0); | ||
800 | } | ||
801 | return; | ||
802 | } | ||
803 | |||
804 | reg_copy(st0_ptr, &arg); | ||
805 | if ( !fsin(st0_ptr, st0_tag) ) | ||
806 | { | ||
807 | push(); | ||
808 | FPU_copy_to_reg0(&arg, st0_tag); | ||
809 | f_cos(&st(0), st0_tag); | ||
810 | } | ||
811 | else | ||
812 | { | ||
813 | /* An error, so restore st(0) */ | ||
814 | FPU_copy_to_reg0(&arg, st0_tag); | ||
815 | } | ||
816 | } | ||
817 | |||
818 | |||
819 | /*---------------------------------------------------------------------------*/ | ||
820 | /* The following all require two arguments: st(0) and st(1) */ | ||
821 | |||
822 | /* A lean, mean kernel for the fprem instructions. This relies upon | ||
823 | the division and rounding to an integer in do_fprem giving an | ||
824 | exact result. Because of this, rem_kernel() needs to deal only with | ||
825 | the least significant 64 bits, the more significant bits of the | ||
826 | result must be zero. | ||
827 | */ | ||
828 | static void rem_kernel(unsigned long long st0, unsigned long long *y, | ||
829 | unsigned long long st1, | ||
830 | unsigned long long q, int n) | ||
831 | { | ||
832 | int dummy; | ||
833 | unsigned long long x; | ||
834 | |||
835 | x = st0 << n; | ||
836 | |||
837 | /* Do the required multiplication and subtraction in the one operation */ | ||
838 | |||
839 | /* lsw x -= lsw st1 * lsw q */ | ||
840 | asm volatile ("mull %4; subl %%eax,%0; sbbl %%edx,%1" | ||
841 | :"=m" (((unsigned *)&x)[0]), "=m" (((unsigned *)&x)[1]), | ||
842 | "=a" (dummy) | ||
843 | :"2" (((unsigned *)&st1)[0]), "m" (((unsigned *)&q)[0]) | ||
844 | :"%dx"); | ||
845 | /* msw x -= msw st1 * lsw q */ | ||
846 | asm volatile ("mull %3; subl %%eax,%0" | ||
847 | :"=m" (((unsigned *)&x)[1]), "=a" (dummy) | ||
848 | :"1" (((unsigned *)&st1)[1]), "m" (((unsigned *)&q)[0]) | ||
849 | :"%dx"); | ||
850 | /* msw x -= lsw st1 * msw q */ | ||
851 | asm volatile ("mull %3; subl %%eax,%0" | ||
852 | :"=m" (((unsigned *)&x)[1]), "=a" (dummy) | ||
853 | :"1" (((unsigned *)&st1)[0]), "m" (((unsigned *)&q)[1]) | ||
854 | :"%dx"); | ||
855 | |||
856 | *y = x; | ||
857 | } | ||
858 | |||
859 | |||
860 | /* Remainder of st(0) / st(1) */ | ||
861 | /* This routine produces exact results, i.e. there is never any | ||
862 | rounding or truncation, etc of the result. */ | ||
863 | static void do_fprem(FPU_REG *st0_ptr, u_char st0_tag, int round) | ||
864 | { | ||
865 | FPU_REG *st1_ptr = &st(1); | ||
866 | u_char st1_tag = FPU_gettagi(1); | ||
867 | |||
868 | if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) ) | ||
869 | { | ||
870 | FPU_REG tmp, st0, st1; | ||
871 | u_char st0_sign, st1_sign; | ||
872 | u_char tmptag; | ||
873 | int tag; | ||
874 | int old_cw; | ||
875 | int expdif; | ||
876 | long long q; | ||
877 | unsigned short saved_status; | ||
878 | int cc; | ||
879 | |||
880 | fprem_valid: | ||
881 | /* Convert registers for internal use. */ | ||
882 | st0_sign = FPU_to_exp16(st0_ptr, &st0); | ||
883 | st1_sign = FPU_to_exp16(st1_ptr, &st1); | ||
884 | expdif = exponent16(&st0) - exponent16(&st1); | ||
885 | |||
886 | old_cw = control_word; | ||
887 | cc = 0; | ||
888 | |||
889 | /* We want the status following the denorm tests, but don't want | ||
890 | the status changed by the arithmetic operations. */ | ||
891 | saved_status = partial_status; | ||
892 | control_word &= ~CW_RC; | ||
893 | control_word |= RC_CHOP; | ||
894 | |||
895 | if ( expdif < 64 ) | ||
896 | { | ||
897 | /* This should be the most common case */ | ||
898 | |||
899 | if ( expdif > -2 ) | ||
900 | { | ||
901 | u_char sign = st0_sign ^ st1_sign; | ||
902 | tag = FPU_u_div(&st0, &st1, &tmp, | ||
903 | PR_64_BITS | RC_CHOP | 0x3f, | ||
904 | sign); | ||
905 | setsign(&tmp, sign); | ||
906 | |||
907 | if ( exponent(&tmp) >= 0 ) | ||
908 | { | ||
909 | FPU_round_to_int(&tmp, tag); /* Fortunately, this can't | ||
910 | overflow to 2^64 */ | ||
911 | q = significand(&tmp); | ||
912 | |||
913 | rem_kernel(significand(&st0), | ||
914 | &significand(&tmp), | ||
915 | significand(&st1), | ||
916 | q, expdif); | ||
917 | |||
918 | setexponent16(&tmp, exponent16(&st1)); | ||
919 | } | ||
920 | else | ||
921 | { | ||
922 | reg_copy(&st0, &tmp); | ||
923 | q = 0; | ||
924 | } | ||
925 | |||
926 | if ( (round == RC_RND) && (tmp.sigh & 0xc0000000) ) | ||
927 | { | ||
928 | /* We may need to subtract st(1) once more, | ||
929 | to get a result <= 1/2 of st(1). */ | ||
930 | unsigned long long x; | ||
931 | expdif = exponent16(&st1) - exponent16(&tmp); | ||
932 | if ( expdif <= 1 ) | ||
933 | { | ||
934 | if ( expdif == 0 ) | ||
935 | x = significand(&st1) - significand(&tmp); | ||
936 | else /* expdif is 1 */ | ||
937 | x = (significand(&st1) << 1) - significand(&tmp); | ||
938 | if ( (x < significand(&tmp)) || | ||
939 | /* or equi-distant (from 0 & st(1)) and q is odd */ | ||
940 | ((x == significand(&tmp)) && (q & 1) ) ) | ||
941 | { | ||
942 | st0_sign = ! st0_sign; | ||
943 | significand(&tmp) = x; | ||
944 | q++; | ||
945 | } | ||
946 | } | ||
947 | } | ||
948 | |||
949 | if (q & 4) cc |= SW_C0; | ||
950 | if (q & 2) cc |= SW_C3; | ||
951 | if (q & 1) cc |= SW_C1; | ||
952 | } | ||
953 | else | ||
954 | { | ||
955 | control_word = old_cw; | ||
956 | setcc(0); | ||
957 | return; | ||
958 | } | ||
959 | } | ||
960 | else | ||
961 | { | ||
962 | /* There is a large exponent difference ( >= 64 ) */ | ||
963 | /* To make much sense, the code in this section should | ||
964 | be done at high precision. */ | ||
965 | int exp_1, N; | ||
966 | u_char sign; | ||
967 | |||
968 | /* prevent overflow here */ | ||
969 | /* N is 'a number between 32 and 63' (p26-113) */ | ||
970 | reg_copy(&st0, &tmp); | ||
971 | tmptag = st0_tag; | ||
972 | N = (expdif & 0x0000001f) + 32; /* This choice gives results | ||
973 | identical to an AMD 486 */ | ||
974 | setexponent16(&tmp, N); | ||
975 | exp_1 = exponent16(&st1); | ||
976 | setexponent16(&st1, 0); | ||
977 | expdif -= N; | ||
978 | |||
979 | sign = getsign(&tmp) ^ st1_sign; | ||
980 | tag = FPU_u_div(&tmp, &st1, &tmp, PR_64_BITS | RC_CHOP | 0x3f, | ||
981 | sign); | ||
982 | setsign(&tmp, sign); | ||
983 | |||
984 | FPU_round_to_int(&tmp, tag); /* Fortunately, this can't | ||
985 | overflow to 2^64 */ | ||
986 | |||
987 | rem_kernel(significand(&st0), | ||
988 | &significand(&tmp), | ||
989 | significand(&st1), | ||
990 | significand(&tmp), | ||
991 | exponent(&tmp) | ||
992 | ); | ||
993 | setexponent16(&tmp, exp_1 + expdif); | ||
994 | |||
995 | /* It is possible for the operation to be complete here. | ||
996 | What does the IEEE standard say? The Intel 80486 manual | ||
997 | implies that the operation will never be completed at this | ||
998 | point, and the behaviour of a real 80486 confirms this. | ||
999 | */ | ||
1000 | if ( !(tmp.sigh | tmp.sigl) ) | ||
1001 | { | ||
1002 | /* The result is zero */ | ||
1003 | control_word = old_cw; | ||
1004 | partial_status = saved_status; | ||
1005 | FPU_copy_to_reg0(&CONST_Z, TAG_Zero); | ||
1006 | setsign(&st0, st0_sign); | ||
1007 | #ifdef PECULIAR_486 | ||
1008 | setcc(SW_C2); | ||
1009 | #else | ||
1010 | setcc(0); | ||
1011 | #endif /* PECULIAR_486 */ | ||
1012 | return; | ||
1013 | } | ||
1014 | cc = SW_C2; | ||
1015 | } | ||
1016 | |||
1017 | control_word = old_cw; | ||
1018 | partial_status = saved_status; | ||
1019 | tag = FPU_normalize_nuo(&tmp); | ||
1020 | reg_copy(&tmp, st0_ptr); | ||
1021 | |||
1022 | /* The only condition to be looked for is underflow, | ||
1023 | and it can occur here only if underflow is unmasked. */ | ||
1024 | if ( (exponent16(&tmp) <= EXP_UNDER) && (tag != TAG_Zero) | ||
1025 | && !(control_word & CW_Underflow) ) | ||
1026 | { | ||
1027 | setcc(cc); | ||
1028 | tag = arith_underflow(st0_ptr); | ||
1029 | setsign(st0_ptr, st0_sign); | ||
1030 | FPU_settag0(tag); | ||
1031 | return; | ||
1032 | } | ||
1033 | else if ( (exponent16(&tmp) > EXP_UNDER) || (tag == TAG_Zero) ) | ||
1034 | { | ||
1035 | stdexp(st0_ptr); | ||
1036 | setsign(st0_ptr, st0_sign); | ||
1037 | } | ||
1038 | else | ||
1039 | { | ||
1040 | tag = FPU_round(st0_ptr, 0, 0, FULL_PRECISION, st0_sign); | ||
1041 | } | ||
1042 | FPU_settag0(tag); | ||
1043 | setcc(cc); | ||
1044 | |||
1045 | return; | ||
1046 | } | ||
1047 | |||
1048 | if ( st0_tag == TAG_Special ) | ||
1049 | st0_tag = FPU_Special(st0_ptr); | ||
1050 | if ( st1_tag == TAG_Special ) | ||
1051 | st1_tag = FPU_Special(st1_ptr); | ||
1052 | |||
1053 | if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal)) | ||
1054 | || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid)) | ||
1055 | || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) ) | ||
1056 | { | ||
1057 | if ( denormal_operand() < 0 ) | ||
1058 | return; | ||
1059 | goto fprem_valid; | ||
1060 | } | ||
1061 | else if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) ) | ||
1062 | { | ||
1063 | FPU_stack_underflow(); | ||
1064 | return; | ||
1065 | } | ||
1066 | else if ( st0_tag == TAG_Zero ) | ||
1067 | { | ||
1068 | if ( st1_tag == TAG_Valid ) | ||
1069 | { | ||
1070 | setcc(0); return; | ||
1071 | } | ||
1072 | else if ( st1_tag == TW_Denormal ) | ||
1073 | { | ||
1074 | if ( denormal_operand() < 0 ) | ||
1075 | return; | ||
1076 | setcc(0); return; | ||
1077 | } | ||
1078 | else if ( st1_tag == TAG_Zero ) | ||
1079 | { arith_invalid(0); return; } /* fprem(?,0) always invalid */ | ||
1080 | else if ( st1_tag == TW_Infinity ) | ||
1081 | { setcc(0); return; } | ||
1082 | } | ||
1083 | else if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) ) | ||
1084 | { | ||
1085 | if ( st1_tag == TAG_Zero ) | ||
1086 | { | ||
1087 | arith_invalid(0); /* fprem(Valid,Zero) is invalid */ | ||
1088 | return; | ||
1089 | } | ||
1090 | else if ( st1_tag != TW_NaN ) | ||
1091 | { | ||
1092 | if ( ((st0_tag == TW_Denormal) || (st1_tag == TW_Denormal)) | ||
1093 | && (denormal_operand() < 0) ) | ||
1094 | return; | ||
1095 | |||
1096 | if ( st1_tag == TW_Infinity ) | ||
1097 | { | ||
1098 | /* fprem(Valid,Infinity) is o.k. */ | ||
1099 | setcc(0); return; | ||
1100 | } | ||
1101 | } | ||
1102 | } | ||
1103 | else if ( st0_tag == TW_Infinity ) | ||
1104 | { | ||
1105 | if ( st1_tag != TW_NaN ) | ||
1106 | { | ||
1107 | arith_invalid(0); /* fprem(Infinity,?) is invalid */ | ||
1108 | return; | ||
1109 | } | ||
1110 | } | ||
1111 | |||
1112 | /* One of the registers must contain a NaN if we got here. */ | ||
1113 | |||
1114 | #ifdef PARANOID | ||
1115 | if ( (st0_tag != TW_NaN) && (st1_tag != TW_NaN) ) | ||
1116 | EXCEPTION(EX_INTERNAL | 0x118); | ||
1117 | #endif /* PARANOID */ | ||
1118 | |||
1119 | real_2op_NaN(st1_ptr, st1_tag, 0, st1_ptr); | ||
1120 | |||
1121 | } | ||
1122 | |||
1123 | |||
1124 | /* ST(1) <- ST(1) * log ST; pop ST */ | ||
1125 | static void fyl2x(FPU_REG *st0_ptr, u_char st0_tag) | ||
1126 | { | ||
1127 | FPU_REG *st1_ptr = &st(1), exponent; | ||
1128 | u_char st1_tag = FPU_gettagi(1); | ||
1129 | u_char sign; | ||
1130 | int e, tag; | ||
1131 | |||
1132 | clear_C1(); | ||
1133 | |||
1134 | if ( (st0_tag == TAG_Valid) && (st1_tag == TAG_Valid) ) | ||
1135 | { | ||
1136 | both_valid: | ||
1137 | /* Both regs are Valid or Denormal */ | ||
1138 | if ( signpositive(st0_ptr) ) | ||
1139 | { | ||
1140 | if ( st0_tag == TW_Denormal ) | ||
1141 | FPU_to_exp16(st0_ptr, st0_ptr); | ||
1142 | else | ||
1143 | /* Convert st(0) for internal use. */ | ||
1144 | setexponent16(st0_ptr, exponent(st0_ptr)); | ||
1145 | |||
1146 | if ( (st0_ptr->sigh == 0x80000000) && (st0_ptr->sigl == 0) ) | ||
1147 | { | ||
1148 | /* Special case. The result can be precise. */ | ||
1149 | u_char esign; | ||
1150 | e = exponent16(st0_ptr); | ||
1151 | if ( e >= 0 ) | ||
1152 | { | ||
1153 | exponent.sigh = e; | ||
1154 | esign = SIGN_POS; | ||
1155 | } | ||
1156 | else | ||
1157 | { | ||
1158 | exponent.sigh = -e; | ||
1159 | esign = SIGN_NEG; | ||
1160 | } | ||
1161 | exponent.sigl = 0; | ||
1162 | setexponent16(&exponent, 31); | ||
1163 | tag = FPU_normalize_nuo(&exponent); | ||
1164 | stdexp(&exponent); | ||
1165 | setsign(&exponent, esign); | ||
1166 | tag = FPU_mul(&exponent, tag, 1, FULL_PRECISION); | ||
1167 | if ( tag >= 0 ) | ||
1168 | FPU_settagi(1, tag); | ||
1169 | } | ||
1170 | else | ||
1171 | { | ||
1172 | /* The usual case */ | ||
1173 | sign = getsign(st1_ptr); | ||
1174 | if ( st1_tag == TW_Denormal ) | ||
1175 | FPU_to_exp16(st1_ptr, st1_ptr); | ||
1176 | else | ||
1177 | /* Convert st(1) for internal use. */ | ||
1178 | setexponent16(st1_ptr, exponent(st1_ptr)); | ||
1179 | poly_l2(st0_ptr, st1_ptr, sign); | ||
1180 | } | ||
1181 | } | ||
1182 | else | ||
1183 | { | ||
1184 | /* negative */ | ||
1185 | if ( arith_invalid(1) < 0 ) | ||
1186 | return; | ||
1187 | } | ||
1188 | |||
1189 | FPU_pop(); | ||
1190 | |||
1191 | return; | ||
1192 | } | ||
1193 | |||
1194 | if ( st0_tag == TAG_Special ) | ||
1195 | st0_tag = FPU_Special(st0_ptr); | ||
1196 | if ( st1_tag == TAG_Special ) | ||
1197 | st1_tag = FPU_Special(st1_ptr); | ||
1198 | |||
1199 | if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) ) | ||
1200 | { | ||
1201 | FPU_stack_underflow_pop(1); | ||
1202 | return; | ||
1203 | } | ||
1204 | else if ( (st0_tag <= TW_Denormal) && (st1_tag <= TW_Denormal) ) | ||
1205 | { | ||
1206 | if ( st0_tag == TAG_Zero ) | ||
1207 | { | ||
1208 | if ( st1_tag == TAG_Zero ) | ||
1209 | { | ||
1210 | /* Both args zero is invalid */ | ||
1211 | if ( arith_invalid(1) < 0 ) | ||
1212 | return; | ||
1213 | } | ||
1214 | else | ||
1215 | { | ||
1216 | u_char sign; | ||
1217 | sign = getsign(st1_ptr)^SIGN_NEG; | ||
1218 | if ( FPU_divide_by_zero(1, sign) < 0 ) | ||
1219 | return; | ||
1220 | |||
1221 | setsign(st1_ptr, sign); | ||
1222 | } | ||
1223 | } | ||
1224 | else if ( st1_tag == TAG_Zero ) | ||
1225 | { | ||
1226 | /* st(1) contains zero, st(0) valid <> 0 */ | ||
1227 | /* Zero is the valid answer */ | ||
1228 | sign = getsign(st1_ptr); | ||
1229 | |||
1230 | if ( signnegative(st0_ptr) ) | ||
1231 | { | ||
1232 | /* log(negative) */ | ||
1233 | if ( arith_invalid(1) < 0 ) | ||
1234 | return; | ||
1235 | } | ||
1236 | else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) | ||
1237 | return; | ||
1238 | else | ||
1239 | { | ||
1240 | if ( exponent(st0_ptr) < 0 ) | ||
1241 | sign ^= SIGN_NEG; | ||
1242 | |||
1243 | FPU_copy_to_reg1(&CONST_Z, TAG_Zero); | ||
1244 | setsign(st1_ptr, sign); | ||
1245 | } | ||
1246 | } | ||
1247 | else | ||
1248 | { | ||
1249 | /* One or both operands are denormals. */ | ||
1250 | if ( denormal_operand() < 0 ) | ||
1251 | return; | ||
1252 | goto both_valid; | ||
1253 | } | ||
1254 | } | ||
1255 | else if ( (st0_tag == TW_NaN) || (st1_tag == TW_NaN) ) | ||
1256 | { | ||
1257 | if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 ) | ||
1258 | return; | ||
1259 | } | ||
1260 | /* One or both arg must be an infinity */ | ||
1261 | else if ( st0_tag == TW_Infinity ) | ||
1262 | { | ||
1263 | if ( (signnegative(st0_ptr)) || (st1_tag == TAG_Zero) ) | ||
1264 | { | ||
1265 | /* log(-infinity) or 0*log(infinity) */ | ||
1266 | if ( arith_invalid(1) < 0 ) | ||
1267 | return; | ||
1268 | } | ||
1269 | else | ||
1270 | { | ||
1271 | u_char sign = getsign(st1_ptr); | ||
1272 | |||
1273 | if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) ) | ||
1274 | return; | ||
1275 | |||
1276 | FPU_copy_to_reg1(&CONST_INF, TAG_Special); | ||
1277 | setsign(st1_ptr, sign); | ||
1278 | } | ||
1279 | } | ||
1280 | /* st(1) must be infinity here */ | ||
1281 | else if ( ((st0_tag == TAG_Valid) || (st0_tag == TW_Denormal)) | ||
1282 | && ( signpositive(st0_ptr) ) ) | ||
1283 | { | ||
1284 | if ( exponent(st0_ptr) >= 0 ) | ||
1285 | { | ||
1286 | if ( (exponent(st0_ptr) == 0) && | ||
1287 | (st0_ptr->sigh == 0x80000000) && | ||
1288 | (st0_ptr->sigl == 0) ) | ||
1289 | { | ||
1290 | /* st(0) holds 1.0 */ | ||
1291 | /* infinity*log(1) */ | ||
1292 | if ( arith_invalid(1) < 0 ) | ||
1293 | return; | ||
1294 | } | ||
1295 | /* else st(0) is positive and > 1.0 */ | ||
1296 | } | ||
1297 | else | ||
1298 | { | ||
1299 | /* st(0) is positive and < 1.0 */ | ||
1300 | |||
1301 | if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) | ||
1302 | return; | ||
1303 | |||
1304 | changesign(st1_ptr); | ||
1305 | } | ||
1306 | } | ||
1307 | else | ||
1308 | { | ||
1309 | /* st(0) must be zero or negative */ | ||
1310 | if ( st0_tag == TAG_Zero ) | ||
1311 | { | ||
1312 | /* This should be invalid, but a real 80486 is happy with it. */ | ||
1313 | |||
1314 | #ifndef PECULIAR_486 | ||
1315 | sign = getsign(st1_ptr); | ||
1316 | if ( FPU_divide_by_zero(1, sign) < 0 ) | ||
1317 | return; | ||
1318 | #endif /* PECULIAR_486 */ | ||
1319 | |||
1320 | changesign(st1_ptr); | ||
1321 | } | ||
1322 | else if ( arith_invalid(1) < 0 ) /* log(negative) */ | ||
1323 | return; | ||
1324 | } | ||
1325 | |||
1326 | FPU_pop(); | ||
1327 | } | ||
1328 | |||
1329 | |||
1330 | static void fpatan(FPU_REG *st0_ptr, u_char st0_tag) | ||
1331 | { | ||
1332 | FPU_REG *st1_ptr = &st(1); | ||
1333 | u_char st1_tag = FPU_gettagi(1); | ||
1334 | int tag; | ||
1335 | |||
1336 | clear_C1(); | ||
1337 | if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) ) | ||
1338 | { | ||
1339 | valid_atan: | ||
1340 | |||
1341 | poly_atan(st0_ptr, st0_tag, st1_ptr, st1_tag); | ||
1342 | |||
1343 | FPU_pop(); | ||
1344 | |||
1345 | return; | ||
1346 | } | ||
1347 | |||
1348 | if ( st0_tag == TAG_Special ) | ||
1349 | st0_tag = FPU_Special(st0_ptr); | ||
1350 | if ( st1_tag == TAG_Special ) | ||
1351 | st1_tag = FPU_Special(st1_ptr); | ||
1352 | |||
1353 | if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal)) | ||
1354 | || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid)) | ||
1355 | || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) ) | ||
1356 | { | ||
1357 | if ( denormal_operand() < 0 ) | ||
1358 | return; | ||
1359 | |||
1360 | goto valid_atan; | ||
1361 | } | ||
1362 | else if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) ) | ||
1363 | { | ||
1364 | FPU_stack_underflow_pop(1); | ||
1365 | return; | ||
1366 | } | ||
1367 | else if ( (st0_tag == TW_NaN) || (st1_tag == TW_NaN) ) | ||
1368 | { | ||
1369 | if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) >= 0 ) | ||
1370 | FPU_pop(); | ||
1371 | return; | ||
1372 | } | ||
1373 | else if ( (st0_tag == TW_Infinity) || (st1_tag == TW_Infinity) ) | ||
1374 | { | ||
1375 | u_char sign = getsign(st1_ptr); | ||
1376 | if ( st0_tag == TW_Infinity ) | ||
1377 | { | ||
1378 | if ( st1_tag == TW_Infinity ) | ||
1379 | { | ||
1380 | if ( signpositive(st0_ptr) ) | ||
1381 | { | ||
1382 | FPU_copy_to_reg1(&CONST_PI4, TAG_Valid); | ||
1383 | } | ||
1384 | else | ||
1385 | { | ||
1386 | setpositive(st1_ptr); | ||
1387 | tag = FPU_u_add(&CONST_PI4, &CONST_PI2, st1_ptr, | ||
1388 | FULL_PRECISION, SIGN_POS, | ||
1389 | exponent(&CONST_PI4), exponent(&CONST_PI2)); | ||
1390 | if ( tag >= 0 ) | ||
1391 | FPU_settagi(1, tag); | ||
1392 | } | ||
1393 | } | ||
1394 | else | ||
1395 | { | ||
1396 | if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) ) | ||
1397 | return; | ||
1398 | |||
1399 | if ( signpositive(st0_ptr) ) | ||
1400 | { | ||
1401 | FPU_copy_to_reg1(&CONST_Z, TAG_Zero); | ||
1402 | setsign(st1_ptr, sign); /* An 80486 preserves the sign */ | ||
1403 | FPU_pop(); | ||
1404 | return; | ||
1405 | } | ||
1406 | else | ||
1407 | { | ||
1408 | FPU_copy_to_reg1(&CONST_PI, TAG_Valid); | ||
1409 | } | ||
1410 | } | ||
1411 | } | ||
1412 | else | ||
1413 | { | ||
1414 | /* st(1) is infinity, st(0) not infinity */ | ||
1415 | if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) | ||
1416 | return; | ||
1417 | |||
1418 | FPU_copy_to_reg1(&CONST_PI2, TAG_Valid); | ||
1419 | } | ||
1420 | setsign(st1_ptr, sign); | ||
1421 | } | ||
1422 | else if ( st1_tag == TAG_Zero ) | ||
1423 | { | ||
1424 | /* st(0) must be valid or zero */ | ||
1425 | u_char sign = getsign(st1_ptr); | ||
1426 | |||
1427 | if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) | ||
1428 | return; | ||
1429 | |||
1430 | if ( signpositive(st0_ptr) ) | ||
1431 | { | ||
1432 | /* An 80486 preserves the sign */ | ||
1433 | FPU_pop(); | ||
1434 | return; | ||
1435 | } | ||
1436 | |||
1437 | FPU_copy_to_reg1(&CONST_PI, TAG_Valid); | ||
1438 | setsign(st1_ptr, sign); | ||
1439 | } | ||
1440 | else if ( st0_tag == TAG_Zero ) | ||
1441 | { | ||
1442 | /* st(1) must be TAG_Valid here */ | ||
1443 | u_char sign = getsign(st1_ptr); | ||
1444 | |||
1445 | if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) ) | ||
1446 | return; | ||
1447 | |||
1448 | FPU_copy_to_reg1(&CONST_PI2, TAG_Valid); | ||
1449 | setsign(st1_ptr, sign); | ||
1450 | } | ||
1451 | #ifdef PARANOID | ||
1452 | else | ||
1453 | EXCEPTION(EX_INTERNAL | 0x125); | ||
1454 | #endif /* PARANOID */ | ||
1455 | |||
1456 | FPU_pop(); | ||
1457 | set_precision_flag_up(); /* We do not really know if up or down */ | ||
1458 | } | ||
1459 | |||
1460 | |||
1461 | static void fprem(FPU_REG *st0_ptr, u_char st0_tag) | ||
1462 | { | ||
1463 | do_fprem(st0_ptr, st0_tag, RC_CHOP); | ||
1464 | } | ||
1465 | |||
1466 | |||
1467 | static void fprem1(FPU_REG *st0_ptr, u_char st0_tag) | ||
1468 | { | ||
1469 | do_fprem(st0_ptr, st0_tag, RC_RND); | ||
1470 | } | ||
1471 | |||
1472 | |||
1473 | static void fyl2xp1(FPU_REG *st0_ptr, u_char st0_tag) | ||
1474 | { | ||
1475 | u_char sign, sign1; | ||
1476 | FPU_REG *st1_ptr = &st(1), a, b; | ||
1477 | u_char st1_tag = FPU_gettagi(1); | ||
1478 | |||
1479 | clear_C1(); | ||
1480 | if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) ) | ||
1481 | { | ||
1482 | valid_yl2xp1: | ||
1483 | |||
1484 | sign = getsign(st0_ptr); | ||
1485 | sign1 = getsign(st1_ptr); | ||
1486 | |||
1487 | FPU_to_exp16(st0_ptr, &a); | ||
1488 | FPU_to_exp16(st1_ptr, &b); | ||
1489 | |||
1490 | if ( poly_l2p1(sign, sign1, &a, &b, st1_ptr) ) | ||
1491 | return; | ||
1492 | |||
1493 | FPU_pop(); | ||
1494 | return; | ||
1495 | } | ||
1496 | |||
1497 | if ( st0_tag == TAG_Special ) | ||
1498 | st0_tag = FPU_Special(st0_ptr); | ||
1499 | if ( st1_tag == TAG_Special ) | ||
1500 | st1_tag = FPU_Special(st1_ptr); | ||
1501 | |||
1502 | if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal)) | ||
1503 | || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid)) | ||
1504 | || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) ) | ||
1505 | { | ||
1506 | if ( denormal_operand() < 0 ) | ||
1507 | return; | ||
1508 | |||
1509 | goto valid_yl2xp1; | ||
1510 | } | ||
1511 | else if ( (st0_tag == TAG_Empty) | (st1_tag == TAG_Empty) ) | ||
1512 | { | ||
1513 | FPU_stack_underflow_pop(1); | ||
1514 | return; | ||
1515 | } | ||
1516 | else if ( st0_tag == TAG_Zero ) | ||
1517 | { | ||
1518 | switch ( st1_tag ) | ||
1519 | { | ||
1520 | case TW_Denormal: | ||
1521 | if ( denormal_operand() < 0 ) | ||
1522 | return; | ||
1523 | |||
1524 | case TAG_Zero: | ||
1525 | case TAG_Valid: | ||
1526 | setsign(st0_ptr, getsign(st0_ptr) ^ getsign(st1_ptr)); | ||
1527 | FPU_copy_to_reg1(st0_ptr, st0_tag); | ||
1528 | break; | ||
1529 | |||
1530 | case TW_Infinity: | ||
1531 | /* Infinity*log(1) */ | ||
1532 | if ( arith_invalid(1) < 0 ) | ||
1533 | return; | ||
1534 | break; | ||
1535 | |||
1536 | case TW_NaN: | ||
1537 | if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 ) | ||
1538 | return; | ||
1539 | break; | ||
1540 | |||
1541 | default: | ||
1542 | #ifdef PARANOID | ||
1543 | EXCEPTION(EX_INTERNAL | 0x116); | ||
1544 | return; | ||
1545 | #endif /* PARANOID */ | ||
1546 | break; | ||
1547 | } | ||
1548 | } | ||
1549 | else if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) ) | ||
1550 | { | ||
1551 | switch ( st1_tag ) | ||
1552 | { | ||
1553 | case TAG_Zero: | ||
1554 | if ( signnegative(st0_ptr) ) | ||
1555 | { | ||
1556 | if ( exponent(st0_ptr) >= 0 ) | ||
1557 | { | ||
1558 | /* st(0) holds <= -1.0 */ | ||
1559 | #ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */ | ||
1560 | changesign(st1_ptr); | ||
1561 | #else | ||
1562 | if ( arith_invalid(1) < 0 ) | ||
1563 | return; | ||
1564 | #endif /* PECULIAR_486 */ | ||
1565 | } | ||
1566 | else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) | ||
1567 | return; | ||
1568 | else | ||
1569 | changesign(st1_ptr); | ||
1570 | } | ||
1571 | else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) | ||
1572 | return; | ||
1573 | break; | ||
1574 | |||
1575 | case TW_Infinity: | ||
1576 | if ( signnegative(st0_ptr) ) | ||
1577 | { | ||
1578 | if ( (exponent(st0_ptr) >= 0) && | ||
1579 | !((st0_ptr->sigh == 0x80000000) && | ||
1580 | (st0_ptr->sigl == 0)) ) | ||
1581 | { | ||
1582 | /* st(0) holds < -1.0 */ | ||
1583 | #ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */ | ||
1584 | changesign(st1_ptr); | ||
1585 | #else | ||
1586 | if ( arith_invalid(1) < 0 ) return; | ||
1587 | #endif /* PECULIAR_486 */ | ||
1588 | } | ||
1589 | else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) | ||
1590 | return; | ||
1591 | else | ||
1592 | changesign(st1_ptr); | ||
1593 | } | ||
1594 | else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) | ||
1595 | return; | ||
1596 | break; | ||
1597 | |||
1598 | case TW_NaN: | ||
1599 | if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 ) | ||
1600 | return; | ||
1601 | } | ||
1602 | |||
1603 | } | ||
1604 | else if ( st0_tag == TW_NaN ) | ||
1605 | { | ||
1606 | if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 ) | ||
1607 | return; | ||
1608 | } | ||
1609 | else if ( st0_tag == TW_Infinity ) | ||
1610 | { | ||
1611 | if ( st1_tag == TW_NaN ) | ||
1612 | { | ||
1613 | if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 ) | ||
1614 | return; | ||
1615 | } | ||
1616 | else if ( signnegative(st0_ptr) ) | ||
1617 | { | ||
1618 | #ifndef PECULIAR_486 | ||
1619 | /* This should have higher priority than denormals, but... */ | ||
1620 | if ( arith_invalid(1) < 0 ) /* log(-infinity) */ | ||
1621 | return; | ||
1622 | #endif /* PECULIAR_486 */ | ||
1623 | if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) ) | ||
1624 | return; | ||
1625 | #ifdef PECULIAR_486 | ||
1626 | /* Denormal operands actually get higher priority */ | ||
1627 | if ( arith_invalid(1) < 0 ) /* log(-infinity) */ | ||
1628 | return; | ||
1629 | #endif /* PECULIAR_486 */ | ||
1630 | } | ||
1631 | else if ( st1_tag == TAG_Zero ) | ||
1632 | { | ||
1633 | /* log(infinity) */ | ||
1634 | if ( arith_invalid(1) < 0 ) | ||
1635 | return; | ||
1636 | } | ||
1637 | |||
1638 | /* st(1) must be valid here. */ | ||
1639 | |||
1640 | else if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) ) | ||
1641 | return; | ||
1642 | |||
1643 | /* The Manual says that log(Infinity) is invalid, but a real | ||
1644 | 80486 sensibly says that it is o.k. */ | ||
1645 | else | ||
1646 | { | ||
1647 | u_char sign = getsign(st1_ptr); | ||
1648 | FPU_copy_to_reg1(&CONST_INF, TAG_Special); | ||
1649 | setsign(st1_ptr, sign); | ||
1650 | } | ||
1651 | } | ||
1652 | #ifdef PARANOID | ||
1653 | else | ||
1654 | { | ||
1655 | EXCEPTION(EX_INTERNAL | 0x117); | ||
1656 | return; | ||
1657 | } | ||
1658 | #endif /* PARANOID */ | ||
1659 | |||
1660 | FPU_pop(); | ||
1661 | return; | ||
1662 | |||
1663 | } | ||
1664 | |||
1665 | |||
1666 | static void fscale(FPU_REG *st0_ptr, u_char st0_tag) | ||
1667 | { | ||
1668 | FPU_REG *st1_ptr = &st(1); | ||
1669 | u_char st1_tag = FPU_gettagi(1); | ||
1670 | int old_cw = control_word; | ||
1671 | u_char sign = getsign(st0_ptr); | ||
1672 | |||
1673 | clear_C1(); | ||
1674 | if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) ) | ||
1675 | { | ||
1676 | long scale; | ||
1677 | FPU_REG tmp; | ||
1678 | |||
1679 | /* Convert register for internal use. */ | ||
1680 | setexponent16(st0_ptr, exponent(st0_ptr)); | ||
1681 | |||
1682 | valid_scale: | ||
1683 | |||
1684 | if ( exponent(st1_ptr) > 30 ) | ||
1685 | { | ||
1686 | /* 2^31 is far too large, would require 2^(2^30) or 2^(-2^30) */ | ||
1687 | |||
1688 | if ( signpositive(st1_ptr) ) | ||
1689 | { | ||
1690 | EXCEPTION(EX_Overflow); | ||
1691 | FPU_copy_to_reg0(&CONST_INF, TAG_Special); | ||
1692 | } | ||
1693 | else | ||
1694 | { | ||
1695 | EXCEPTION(EX_Underflow); | ||
1696 | FPU_copy_to_reg0(&CONST_Z, TAG_Zero); | ||
1697 | } | ||
1698 | setsign(st0_ptr, sign); | ||
1699 | return; | ||
1700 | } | ||
1701 | |||
1702 | control_word &= ~CW_RC; | ||
1703 | control_word |= RC_CHOP; | ||
1704 | reg_copy(st1_ptr, &tmp); | ||
1705 | FPU_round_to_int(&tmp, st1_tag); /* This can never overflow here */ | ||
1706 | control_word = old_cw; | ||
1707 | scale = signnegative(st1_ptr) ? -tmp.sigl : tmp.sigl; | ||
1708 | scale += exponent16(st0_ptr); | ||
1709 | |||
1710 | setexponent16(st0_ptr, scale); | ||
1711 | |||
1712 | /* Use FPU_round() to properly detect under/overflow etc */ | ||
1713 | FPU_round(st0_ptr, 0, 0, control_word, sign); | ||
1714 | |||
1715 | return; | ||
1716 | } | ||
1717 | |||
1718 | if ( st0_tag == TAG_Special ) | ||
1719 | st0_tag = FPU_Special(st0_ptr); | ||
1720 | if ( st1_tag == TAG_Special ) | ||
1721 | st1_tag = FPU_Special(st1_ptr); | ||
1722 | |||
1723 | if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) ) | ||
1724 | { | ||
1725 | switch ( st1_tag ) | ||
1726 | { | ||
1727 | case TAG_Valid: | ||
1728 | /* st(0) must be a denormal */ | ||
1729 | if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) | ||
1730 | return; | ||
1731 | |||
1732 | FPU_to_exp16(st0_ptr, st0_ptr); /* Will not be left on stack */ | ||
1733 | goto valid_scale; | ||
1734 | |||
1735 | case TAG_Zero: | ||
1736 | if ( st0_tag == TW_Denormal ) | ||
1737 | denormal_operand(); | ||
1738 | return; | ||
1739 | |||
1740 | case TW_Denormal: | ||
1741 | denormal_operand(); | ||
1742 | return; | ||
1743 | |||
1744 | case TW_Infinity: | ||
1745 | if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) | ||
1746 | return; | ||
1747 | |||
1748 | if ( signpositive(st1_ptr) ) | ||
1749 | FPU_copy_to_reg0(&CONST_INF, TAG_Special); | ||
1750 | else | ||
1751 | FPU_copy_to_reg0(&CONST_Z, TAG_Zero); | ||
1752 | setsign(st0_ptr, sign); | ||
1753 | return; | ||
1754 | |||
1755 | case TW_NaN: | ||
1756 | real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr); | ||
1757 | return; | ||
1758 | } | ||
1759 | } | ||
1760 | else if ( st0_tag == TAG_Zero ) | ||
1761 | { | ||
1762 | switch ( st1_tag ) | ||
1763 | { | ||
1764 | case TAG_Valid: | ||
1765 | case TAG_Zero: | ||
1766 | return; | ||
1767 | |||
1768 | case TW_Denormal: | ||
1769 | denormal_operand(); | ||
1770 | return; | ||
1771 | |||
1772 | case TW_Infinity: | ||
1773 | if ( signpositive(st1_ptr) ) | ||
1774 | arith_invalid(0); /* Zero scaled by +Infinity */ | ||
1775 | return; | ||
1776 | |||
1777 | case TW_NaN: | ||
1778 | real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr); | ||
1779 | return; | ||
1780 | } | ||
1781 | } | ||
1782 | else if ( st0_tag == TW_Infinity ) | ||
1783 | { | ||
1784 | switch ( st1_tag ) | ||
1785 | { | ||
1786 | case TAG_Valid: | ||
1787 | case TAG_Zero: | ||
1788 | return; | ||
1789 | |||
1790 | case TW_Denormal: | ||
1791 | denormal_operand(); | ||
1792 | return; | ||
1793 | |||
1794 | case TW_Infinity: | ||
1795 | if ( signnegative(st1_ptr) ) | ||
1796 | arith_invalid(0); /* Infinity scaled by -Infinity */ | ||
1797 | return; | ||
1798 | |||
1799 | case TW_NaN: | ||
1800 | real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr); | ||
1801 | return; | ||
1802 | } | ||
1803 | } | ||
1804 | else if ( st0_tag == TW_NaN ) | ||
1805 | { | ||
1806 | if ( st1_tag != TAG_Empty ) | ||
1807 | { real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr); return; } | ||
1808 | } | ||
1809 | |||
1810 | #ifdef PARANOID | ||
1811 | if ( !((st0_tag == TAG_Empty) || (st1_tag == TAG_Empty)) ) | ||
1812 | { | ||
1813 | EXCEPTION(EX_INTERNAL | 0x115); | ||
1814 | return; | ||
1815 | } | ||
1816 | #endif | ||
1817 | |||
1818 | /* At least one of st(0), st(1) must be empty */ | ||
1819 | FPU_stack_underflow(); | ||
1820 | |||
1821 | } | ||
1822 | |||
1823 | |||
1824 | /*---------------------------------------------------------------------------*/ | ||
1825 | |||
1826 | static FUNC_ST0 const trig_table_a[] = { | ||
1827 | f2xm1, fyl2x, fptan, fpatan, | ||
1828 | fxtract, fprem1, (FUNC_ST0)fdecstp, (FUNC_ST0)fincstp | ||
1829 | }; | ||
1830 | |||
1831 | void FPU_triga(void) | ||
1832 | { | ||
1833 | (trig_table_a[FPU_rm])(&st(0), FPU_gettag0()); | ||
1834 | } | ||
1835 | |||
1836 | |||
1837 | static FUNC_ST0 const trig_table_b[] = | ||
1838 | { | ||
1839 | fprem, fyl2xp1, fsqrt_, fsincos, frndint_, fscale, (FUNC_ST0)fsin, fcos | ||
1840 | }; | ||
1841 | |||
1842 | void FPU_trigb(void) | ||
1843 | { | ||
1844 | (trig_table_b[FPU_rm])(&st(0), FPU_gettag0()); | ||
1845 | } | ||
diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c new file mode 100644 index 000000000000..2e2c51a8bd3a --- /dev/null +++ b/arch/x86/math-emu/get_address.c | |||
@@ -0,0 +1,438 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | get_address.c | | ||
3 | | | | ||
4 | | Get the effective address from an FPU instruction. | | ||
5 | | | | ||
6 | | Copyright (C) 1992,1993,1994,1997 | | ||
7 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | ||
8 | | Australia. E-mail billm@suburbia.net | | ||
9 | | | | ||
10 | | | | ||
11 | +---------------------------------------------------------------------------*/ | ||
12 | |||
13 | /*---------------------------------------------------------------------------+ | ||
14 | | Note: | | ||
15 | | The file contains code which accesses user memory. | | ||
16 | | Emulator static data may change when user memory is accessed, due to | | ||
17 | | other processes using the emulator while swapping is in progress. | | ||
18 | +---------------------------------------------------------------------------*/ | ||
19 | |||
20 | |||
21 | #include <linux/stddef.h> | ||
22 | |||
23 | #include <asm/uaccess.h> | ||
24 | #include <asm/desc.h> | ||
25 | |||
26 | #include "fpu_system.h" | ||
27 | #include "exception.h" | ||
28 | #include "fpu_emu.h" | ||
29 | |||
30 | |||
31 | #define FPU_WRITE_BIT 0x10 | ||
32 | |||
33 | static int reg_offset[] = { | ||
34 | offsetof(struct info,___eax), | ||
35 | offsetof(struct info,___ecx), | ||
36 | offsetof(struct info,___edx), | ||
37 | offsetof(struct info,___ebx), | ||
38 | offsetof(struct info,___esp), | ||
39 | offsetof(struct info,___ebp), | ||
40 | offsetof(struct info,___esi), | ||
41 | offsetof(struct info,___edi) | ||
42 | }; | ||
43 | |||
44 | #define REG_(x) (*(long *)(reg_offset[(x)]+(u_char *) FPU_info)) | ||
45 | |||
46 | static int reg_offset_vm86[] = { | ||
47 | offsetof(struct info,___cs), | ||
48 | offsetof(struct info,___vm86_ds), | ||
49 | offsetof(struct info,___vm86_es), | ||
50 | offsetof(struct info,___vm86_fs), | ||
51 | offsetof(struct info,___vm86_gs), | ||
52 | offsetof(struct info,___ss), | ||
53 | offsetof(struct info,___vm86_ds) | ||
54 | }; | ||
55 | |||
56 | #define VM86_REG_(x) (*(unsigned short *) \ | ||
57 | (reg_offset_vm86[((unsigned)x)]+(u_char *) FPU_info)) | ||
58 | |||
59 | /* This dummy, gs is not saved on the stack. */ | ||
60 | #define ___GS ___ds | ||
61 | |||
62 | static int reg_offset_pm[] = { | ||
63 | offsetof(struct info,___cs), | ||
64 | offsetof(struct info,___ds), | ||
65 | offsetof(struct info,___es), | ||
66 | offsetof(struct info,___fs), | ||
67 | offsetof(struct info,___GS), | ||
68 | offsetof(struct info,___ss), | ||
69 | offsetof(struct info,___ds) | ||
70 | }; | ||
71 | |||
72 | #define PM_REG_(x) (*(unsigned short *) \ | ||
73 | (reg_offset_pm[((unsigned)x)]+(u_char *) FPU_info)) | ||
74 | |||
75 | |||
76 | /* Decode the SIB byte. This function assumes mod != 0 */ | ||
77 | static int sib(int mod, unsigned long *fpu_eip) | ||
78 | { | ||
79 | u_char ss,index,base; | ||
80 | long offset; | ||
81 | |||
82 | RE_ENTRANT_CHECK_OFF; | ||
83 | FPU_code_access_ok(1); | ||
84 | FPU_get_user(base, (u_char __user *) (*fpu_eip)); /* The SIB byte */ | ||
85 | RE_ENTRANT_CHECK_ON; | ||
86 | (*fpu_eip)++; | ||
87 | ss = base >> 6; | ||
88 | index = (base >> 3) & 7; | ||
89 | base &= 7; | ||
90 | |||
91 | if ((mod == 0) && (base == 5)) | ||
92 | offset = 0; /* No base register */ | ||
93 | else | ||
94 | offset = REG_(base); | ||
95 | |||
96 | if (index == 4) | ||
97 | { | ||
98 | /* No index register */ | ||
99 | /* A non-zero ss is illegal */ | ||
100 | if ( ss ) | ||
101 | EXCEPTION(EX_Invalid); | ||
102 | } | ||
103 | else | ||
104 | { | ||
105 | offset += (REG_(index)) << ss; | ||
106 | } | ||
107 | |||
108 | if (mod == 1) | ||
109 | { | ||
110 | /* 8 bit signed displacement */ | ||
111 | long displacement; | ||
112 | RE_ENTRANT_CHECK_OFF; | ||
113 | FPU_code_access_ok(1); | ||
114 | FPU_get_user(displacement, (signed char __user *) (*fpu_eip)); | ||
115 | offset += displacement; | ||
116 | RE_ENTRANT_CHECK_ON; | ||
117 | (*fpu_eip)++; | ||
118 | } | ||
119 | else if (mod == 2 || base == 5) /* The second condition also has mod==0 */ | ||
120 | { | ||
121 | /* 32 bit displacement */ | ||
122 | long displacement; | ||
123 | RE_ENTRANT_CHECK_OFF; | ||
124 | FPU_code_access_ok(4); | ||
125 | FPU_get_user(displacement, (long __user *) (*fpu_eip)); | ||
126 | offset += displacement; | ||
127 | RE_ENTRANT_CHECK_ON; | ||
128 | (*fpu_eip) += 4; | ||
129 | } | ||
130 | |||
131 | return offset; | ||
132 | } | ||
133 | |||
134 | |||
135 | static unsigned long vm86_segment(u_char segment, | ||
136 | struct address *addr) | ||
137 | { | ||
138 | segment--; | ||
139 | #ifdef PARANOID | ||
140 | if ( segment > PREFIX_SS_ ) | ||
141 | { | ||
142 | EXCEPTION(EX_INTERNAL|0x130); | ||
143 | math_abort(FPU_info,SIGSEGV); | ||
144 | } | ||
145 | #endif /* PARANOID */ | ||
146 | addr->selector = VM86_REG_(segment); | ||
147 | return (unsigned long)VM86_REG_(segment) << 4; | ||
148 | } | ||
149 | |||
150 | |||
151 | /* This should work for 16 and 32 bit protected mode. */ | ||
152 | static long pm_address(u_char FPU_modrm, u_char segment, | ||
153 | struct address *addr, long offset) | ||
154 | { | ||
155 | struct desc_struct descriptor; | ||
156 | unsigned long base_address, limit, address, seg_top; | ||
157 | |||
158 | segment--; | ||
159 | |||
160 | #ifdef PARANOID | ||
161 | /* segment is unsigned, so this also detects if segment was 0: */ | ||
162 | if ( segment > PREFIX_SS_ ) | ||
163 | { | ||
164 | EXCEPTION(EX_INTERNAL|0x132); | ||
165 | math_abort(FPU_info,SIGSEGV); | ||
166 | } | ||
167 | #endif /* PARANOID */ | ||
168 | |||
169 | switch ( segment ) | ||
170 | { | ||
171 | /* gs isn't used by the kernel, so it still has its | ||
172 | user-space value. */ | ||
173 | case PREFIX_GS_-1: | ||
174 | /* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */ | ||
175 | savesegment(gs, addr->selector); | ||
176 | break; | ||
177 | default: | ||
178 | addr->selector = PM_REG_(segment); | ||
179 | } | ||
180 | |||
181 | descriptor = LDT_DESCRIPTOR(PM_REG_(segment)); | ||
182 | base_address = SEG_BASE_ADDR(descriptor); | ||
183 | address = base_address + offset; | ||
184 | limit = base_address | ||
185 | + (SEG_LIMIT(descriptor)+1) * SEG_GRANULARITY(descriptor) - 1; | ||
186 | if ( limit < base_address ) limit = 0xffffffff; | ||
187 | |||
188 | if ( SEG_EXPAND_DOWN(descriptor) ) | ||
189 | { | ||
190 | if ( SEG_G_BIT(descriptor) ) | ||
191 | seg_top = 0xffffffff; | ||
192 | else | ||
193 | { | ||
194 | seg_top = base_address + (1 << 20); | ||
195 | if ( seg_top < base_address ) seg_top = 0xffffffff; | ||
196 | } | ||
197 | access_limit = | ||
198 | (address <= limit) || (address >= seg_top) ? 0 : | ||
199 | ((seg_top-address) >= 255 ? 255 : seg_top-address); | ||
200 | } | ||
201 | else | ||
202 | { | ||
203 | access_limit = | ||
204 | (address > limit) || (address < base_address) ? 0 : | ||
205 | ((limit-address) >= 254 ? 255 : limit-address+1); | ||
206 | } | ||
207 | if ( SEG_EXECUTE_ONLY(descriptor) || | ||
208 | (!SEG_WRITE_PERM(descriptor) && (FPU_modrm & FPU_WRITE_BIT)) ) | ||
209 | { | ||
210 | access_limit = 0; | ||
211 | } | ||
212 | return address; | ||
213 | } | ||
214 | |||
215 | |||
216 | /* | ||
217 | MOD R/M byte: MOD == 3 has a special use for the FPU | ||
218 | SIB byte used iff R/M = 100b | ||
219 | |||
220 | 7 6 5 4 3 2 1 0 | ||
221 | ..... ......... ......... | ||
222 | MOD OPCODE(2) R/M | ||
223 | |||
224 | |||
225 | SIB byte | ||
226 | |||
227 | 7 6 5 4 3 2 1 0 | ||
228 | ..... ......... ......... | ||
229 | SS INDEX BASE | ||
230 | |||
231 | */ | ||
232 | |||
233 | void __user *FPU_get_address(u_char FPU_modrm, unsigned long *fpu_eip, | ||
234 | struct address *addr, | ||
235 | fpu_addr_modes addr_modes) | ||
236 | { | ||
237 | u_char mod; | ||
238 | unsigned rm = FPU_modrm & 7; | ||
239 | long *cpu_reg_ptr; | ||
240 | int address = 0; /* Initialized just to stop compiler warnings. */ | ||
241 | |||
242 | /* Memory accessed via the cs selector is write protected | ||
243 | in `non-segmented' 32 bit protected mode. */ | ||
244 | if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT) | ||
245 | && (addr_modes.override.segment == PREFIX_CS_) ) | ||
246 | { | ||
247 | math_abort(FPU_info,SIGSEGV); | ||
248 | } | ||
249 | |||
250 | addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */ | ||
251 | |||
252 | mod = (FPU_modrm >> 6) & 3; | ||
253 | |||
254 | if (rm == 4 && mod != 3) | ||
255 | { | ||
256 | address = sib(mod, fpu_eip); | ||
257 | } | ||
258 | else | ||
259 | { | ||
260 | cpu_reg_ptr = & REG_(rm); | ||
261 | switch (mod) | ||
262 | { | ||
263 | case 0: | ||
264 | if (rm == 5) | ||
265 | { | ||
266 | /* Special case: disp32 */ | ||
267 | RE_ENTRANT_CHECK_OFF; | ||
268 | FPU_code_access_ok(4); | ||
269 | FPU_get_user(address, (unsigned long __user *) (*fpu_eip)); | ||
270 | (*fpu_eip) += 4; | ||
271 | RE_ENTRANT_CHECK_ON; | ||
272 | addr->offset = address; | ||
273 | return (void __user *) address; | ||
274 | } | ||
275 | else | ||
276 | { | ||
277 | address = *cpu_reg_ptr; /* Just return the contents | ||
278 | of the cpu register */ | ||
279 | addr->offset = address; | ||
280 | return (void __user *) address; | ||
281 | } | ||
282 | case 1: | ||
283 | /* 8 bit signed displacement */ | ||
284 | RE_ENTRANT_CHECK_OFF; | ||
285 | FPU_code_access_ok(1); | ||
286 | FPU_get_user(address, (signed char __user *) (*fpu_eip)); | ||
287 | RE_ENTRANT_CHECK_ON; | ||
288 | (*fpu_eip)++; | ||
289 | break; | ||
290 | case 2: | ||
291 | /* 32 bit displacement */ | ||
292 | RE_ENTRANT_CHECK_OFF; | ||
293 | FPU_code_access_ok(4); | ||
294 | FPU_get_user(address, (long __user *) (*fpu_eip)); | ||
295 | (*fpu_eip) += 4; | ||
296 | RE_ENTRANT_CHECK_ON; | ||
297 | break; | ||
298 | case 3: | ||
299 | /* Not legal for the FPU */ | ||
300 | EXCEPTION(EX_Invalid); | ||
301 | } | ||
302 | address += *cpu_reg_ptr; | ||
303 | } | ||
304 | |||
305 | addr->offset = address; | ||
306 | |||
307 | switch ( addr_modes.default_mode ) | ||
308 | { | ||
309 | case 0: | ||
310 | break; | ||
311 | case VM86: | ||
312 | address += vm86_segment(addr_modes.override.segment, addr); | ||
313 | break; | ||
314 | case PM16: | ||
315 | case SEG32: | ||
316 | address = pm_address(FPU_modrm, addr_modes.override.segment, | ||
317 | addr, address); | ||
318 | break; | ||
319 | default: | ||
320 | EXCEPTION(EX_INTERNAL|0x133); | ||
321 | } | ||
322 | |||
323 | return (void __user *)address; | ||
324 | } | ||
325 | |||
326 | |||
327 | void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip, | ||
328 | struct address *addr, | ||
329 | fpu_addr_modes addr_modes) | ||
330 | { | ||
331 | u_char mod; | ||
332 | unsigned rm = FPU_modrm & 7; | ||
333 | int address = 0; /* Default used for mod == 0 */ | ||
334 | |||
335 | /* Memory accessed via the cs selector is write protected | ||
336 | in `non-segmented' 32 bit protected mode. */ | ||
337 | if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT) | ||
338 | && (addr_modes.override.segment == PREFIX_CS_) ) | ||
339 | { | ||
340 | math_abort(FPU_info,SIGSEGV); | ||
341 | } | ||
342 | |||
343 | addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */ | ||
344 | |||
345 | mod = (FPU_modrm >> 6) & 3; | ||
346 | |||
347 | switch (mod) | ||
348 | { | ||
349 | case 0: | ||
350 | if (rm == 6) | ||
351 | { | ||
352 | /* Special case: disp16 */ | ||
353 | RE_ENTRANT_CHECK_OFF; | ||
354 | FPU_code_access_ok(2); | ||
355 | FPU_get_user(address, (unsigned short __user *) (*fpu_eip)); | ||
356 | (*fpu_eip) += 2; | ||
357 | RE_ENTRANT_CHECK_ON; | ||
358 | goto add_segment; | ||
359 | } | ||
360 | break; | ||
361 | case 1: | ||
362 | /* 8 bit signed displacement */ | ||
363 | RE_ENTRANT_CHECK_OFF; | ||
364 | FPU_code_access_ok(1); | ||
365 | FPU_get_user(address, (signed char __user *) (*fpu_eip)); | ||
366 | RE_ENTRANT_CHECK_ON; | ||
367 | (*fpu_eip)++; | ||
368 | break; | ||
369 | case 2: | ||
370 | /* 16 bit displacement */ | ||
371 | RE_ENTRANT_CHECK_OFF; | ||
372 | FPU_code_access_ok(2); | ||
373 | FPU_get_user(address, (unsigned short __user *) (*fpu_eip)); | ||
374 | (*fpu_eip) += 2; | ||
375 | RE_ENTRANT_CHECK_ON; | ||
376 | break; | ||
377 | case 3: | ||
378 | /* Not legal for the FPU */ | ||
379 | EXCEPTION(EX_Invalid); | ||
380 | break; | ||
381 | } | ||
382 | switch ( rm ) | ||
383 | { | ||
384 | case 0: | ||
385 | address += FPU_info->___ebx + FPU_info->___esi; | ||
386 | break; | ||
387 | case 1: | ||
388 | address += FPU_info->___ebx + FPU_info->___edi; | ||
389 | break; | ||
390 | case 2: | ||
391 | address += FPU_info->___ebp + FPU_info->___esi; | ||
392 | if ( addr_modes.override.segment == PREFIX_DEFAULT ) | ||
393 | addr_modes.override.segment = PREFIX_SS_; | ||
394 | break; | ||
395 | case 3: | ||
396 | address += FPU_info->___ebp + FPU_info->___edi; | ||
397 | if ( addr_modes.override.segment == PREFIX_DEFAULT ) | ||
398 | addr_modes.override.segment = PREFIX_SS_; | ||
399 | break; | ||
400 | case 4: | ||
401 | address += FPU_info->___esi; | ||
402 | break; | ||
403 | case 5: | ||
404 | address += FPU_info->___edi; | ||
405 | break; | ||
406 | case 6: | ||
407 | address += FPU_info->___ebp; | ||
408 | if ( addr_modes.override.segment == PREFIX_DEFAULT ) | ||
409 | addr_modes.override.segment = PREFIX_SS_; | ||
410 | break; | ||
411 | case 7: | ||
412 | address += FPU_info->___ebx; | ||
413 | break; | ||
414 | } | ||
415 | |||
416 | add_segment: | ||
417 | address &= 0xffff; | ||
418 | |||
419 | addr->offset = address; | ||
420 | |||
421 | switch ( addr_modes.default_mode ) | ||
422 | { | ||
423 | case 0: | ||
424 | break; | ||
425 | case VM86: | ||
426 | address += vm86_segment(addr_modes.override.segment, addr); | ||
427 | break; | ||
428 | case PM16: | ||
429 | case SEG32: | ||
430 | address = pm_address(FPU_modrm, addr_modes.override.segment, | ||
431 | addr, address); | ||
432 | break; | ||
433 | default: | ||
434 | EXCEPTION(EX_INTERNAL|0x131); | ||
435 | } | ||
436 | |||
437 | return (void __user *)address ; | ||
438 | } | ||
diff --git a/arch/x86/math-emu/load_store.c b/arch/x86/math-emu/load_store.c new file mode 100644 index 000000000000..eebd6fb1c8a8 --- /dev/null +++ b/arch/x86/math-emu/load_store.c | |||
@@ -0,0 +1,272 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | load_store.c | | ||
3 | | | | ||
4 | | This file contains most of the code to interpret the FPU instructions | | ||
5 | | which load and store from user memory. | | ||
6 | | | | ||
7 | | Copyright (C) 1992,1993,1994,1997 | | ||
8 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | ||
9 | | Australia. E-mail billm@suburbia.net | | ||
10 | | | | ||
11 | | | | ||
12 | +---------------------------------------------------------------------------*/ | ||
13 | |||
14 | /*---------------------------------------------------------------------------+ | ||
15 | | Note: | | ||
16 | | The file contains code which accesses user memory. | | ||
17 | | Emulator static data may change when user memory is accessed, due to | | ||
18 | | other processes using the emulator while swapping is in progress. | | ||
19 | +---------------------------------------------------------------------------*/ | ||
20 | |||
21 | #include <asm/uaccess.h> | ||
22 | |||
23 | #include "fpu_system.h" | ||
24 | #include "exception.h" | ||
25 | #include "fpu_emu.h" | ||
26 | #include "status_w.h" | ||
27 | #include "control_w.h" | ||
28 | |||
29 | |||
30 | #define _NONE_ 0 /* st0_ptr etc not needed */ | ||
31 | #define _REG0_ 1 /* Will be storing st(0) */ | ||
32 | #define _PUSH_ 3 /* Need to check for space to push onto stack */ | ||
33 | #define _null_ 4 /* Function illegal or not implemented */ | ||
34 | |||
35 | #define pop_0() { FPU_settag0(TAG_Empty); top++; } | ||
36 | |||
37 | |||
38 | static u_char const type_table[32] = { | ||
39 | _PUSH_, _PUSH_, _PUSH_, _PUSH_, | ||
40 | _null_, _null_, _null_, _null_, | ||
41 | _REG0_, _REG0_, _REG0_, _REG0_, | ||
42 | _REG0_, _REG0_, _REG0_, _REG0_, | ||
43 | _NONE_, _null_, _NONE_, _PUSH_, | ||
44 | _NONE_, _PUSH_, _null_, _PUSH_, | ||
45 | _NONE_, _null_, _NONE_, _REG0_, | ||
46 | _NONE_, _REG0_, _NONE_, _REG0_ | ||
47 | }; | ||
48 | |||
49 | u_char const data_sizes_16[32] = { | ||
50 | 4, 4, 8, 2, 0, 0, 0, 0, | ||
51 | 4, 4, 8, 2, 4, 4, 8, 2, | ||
52 | 14, 0, 94, 10, 2, 10, 0, 8, | ||
53 | 14, 0, 94, 10, 2, 10, 2, 8 | ||
54 | }; | ||
55 | |||
56 | static u_char const data_sizes_32[32] = { | ||
57 | 4, 4, 8, 2, 0, 0, 0, 0, | ||
58 | 4, 4, 8, 2, 4, 4, 8, 2, | ||
59 | 28, 0,108, 10, 2, 10, 0, 8, | ||
60 | 28, 0,108, 10, 2, 10, 2, 8 | ||
61 | }; | ||
62 | |||
63 | int FPU_load_store(u_char type, fpu_addr_modes addr_modes, | ||
64 | void __user *data_address) | ||
65 | { | ||
66 | FPU_REG loaded_data; | ||
67 | FPU_REG *st0_ptr; | ||
68 | u_char st0_tag = TAG_Empty; /* This is just to stop a gcc warning. */ | ||
69 | u_char loaded_tag; | ||
70 | |||
71 | st0_ptr = NULL; /* Initialized just to stop compiler warnings. */ | ||
72 | |||
73 | if ( addr_modes.default_mode & PROTECTED ) | ||
74 | { | ||
75 | if ( addr_modes.default_mode == SEG32 ) | ||
76 | { | ||
77 | if ( access_limit < data_sizes_32[type] ) | ||
78 | math_abort(FPU_info,SIGSEGV); | ||
79 | } | ||
80 | else if ( addr_modes.default_mode == PM16 ) | ||
81 | { | ||
82 | if ( access_limit < data_sizes_16[type] ) | ||
83 | math_abort(FPU_info,SIGSEGV); | ||
84 | } | ||
85 | #ifdef PARANOID | ||
86 | else | ||
87 | EXCEPTION(EX_INTERNAL|0x140); | ||
88 | #endif /* PARANOID */ | ||
89 | } | ||
90 | |||
91 | switch ( type_table[type] ) | ||
92 | { | ||
93 | case _NONE_: | ||
94 | break; | ||
95 | case _REG0_: | ||
96 | st0_ptr = &st(0); /* Some of these instructions pop after | ||
97 | storing */ | ||
98 | st0_tag = FPU_gettag0(); | ||
99 | break; | ||
100 | case _PUSH_: | ||
101 | { | ||
102 | if ( FPU_gettagi(-1) != TAG_Empty ) | ||
103 | { FPU_stack_overflow(); return 0; } | ||
104 | top--; | ||
105 | st0_ptr = &st(0); | ||
106 | } | ||
107 | break; | ||
108 | case _null_: | ||
109 | FPU_illegal(); | ||
110 | return 0; | ||
111 | #ifdef PARANOID | ||
112 | default: | ||
113 | EXCEPTION(EX_INTERNAL|0x141); | ||
114 | return 0; | ||
115 | #endif /* PARANOID */ | ||
116 | } | ||
117 | |||
118 | switch ( type ) | ||
119 | { | ||
120 | case 000: /* fld m32real */ | ||
121 | clear_C1(); | ||
122 | loaded_tag = FPU_load_single((float __user *)data_address, &loaded_data); | ||
123 | if ( (loaded_tag == TAG_Special) | ||
124 | && isNaN(&loaded_data) | ||
125 | && (real_1op_NaN(&loaded_data) < 0) ) | ||
126 | { | ||
127 | top++; | ||
128 | break; | ||
129 | } | ||
130 | FPU_copy_to_reg0(&loaded_data, loaded_tag); | ||
131 | break; | ||
132 | case 001: /* fild m32int */ | ||
133 | clear_C1(); | ||
134 | loaded_tag = FPU_load_int32((long __user *)data_address, &loaded_data); | ||
135 | FPU_copy_to_reg0(&loaded_data, loaded_tag); | ||
136 | break; | ||
137 | case 002: /* fld m64real */ | ||
138 | clear_C1(); | ||
139 | loaded_tag = FPU_load_double((double __user *)data_address, &loaded_data); | ||
140 | if ( (loaded_tag == TAG_Special) | ||
141 | && isNaN(&loaded_data) | ||
142 | && (real_1op_NaN(&loaded_data) < 0) ) | ||
143 | { | ||
144 | top++; | ||
145 | break; | ||
146 | } | ||
147 | FPU_copy_to_reg0(&loaded_data, loaded_tag); | ||
148 | break; | ||
149 | case 003: /* fild m16int */ | ||
150 | clear_C1(); | ||
151 | loaded_tag = FPU_load_int16((short __user *)data_address, &loaded_data); | ||
152 | FPU_copy_to_reg0(&loaded_data, loaded_tag); | ||
153 | break; | ||
154 | case 010: /* fst m32real */ | ||
155 | clear_C1(); | ||
156 | FPU_store_single(st0_ptr, st0_tag, (float __user *)data_address); | ||
157 | break; | ||
158 | case 011: /* fist m32int */ | ||
159 | clear_C1(); | ||
160 | FPU_store_int32(st0_ptr, st0_tag, (long __user *)data_address); | ||
161 | break; | ||
162 | case 012: /* fst m64real */ | ||
163 | clear_C1(); | ||
164 | FPU_store_double(st0_ptr, st0_tag, (double __user *)data_address); | ||
165 | break; | ||
166 | case 013: /* fist m16int */ | ||
167 | clear_C1(); | ||
168 | FPU_store_int16(st0_ptr, st0_tag, (short __user *)data_address); | ||
169 | break; | ||
170 | case 014: /* fstp m32real */ | ||
171 | clear_C1(); | ||
172 | if ( FPU_store_single(st0_ptr, st0_tag, (float __user *)data_address) ) | ||
173 | pop_0(); /* pop only if the number was actually stored | ||
174 | (see the 80486 manual p16-28) */ | ||
175 | break; | ||
176 | case 015: /* fistp m32int */ | ||
177 | clear_C1(); | ||
178 | if ( FPU_store_int32(st0_ptr, st0_tag, (long __user *)data_address) ) | ||
179 | pop_0(); /* pop only if the number was actually stored | ||
180 | (see the 80486 manual p16-28) */ | ||
181 | break; | ||
182 | case 016: /* fstp m64real */ | ||
183 | clear_C1(); | ||
184 | if ( FPU_store_double(st0_ptr, st0_tag, (double __user *)data_address) ) | ||
185 | pop_0(); /* pop only if the number was actually stored | ||
186 | (see the 80486 manual p16-28) */ | ||
187 | break; | ||
188 | case 017: /* fistp m16int */ | ||
189 | clear_C1(); | ||
190 | if ( FPU_store_int16(st0_ptr, st0_tag, (short __user *)data_address) ) | ||
191 | pop_0(); /* pop only if the number was actually stored | ||
192 | (see the 80486 manual p16-28) */ | ||
193 | break; | ||
194 | case 020: /* fldenv m14/28byte */ | ||
195 | fldenv(addr_modes, (u_char __user *)data_address); | ||
196 | /* Ensure that the values just loaded are not changed by | ||
197 | fix-up operations. */ | ||
198 | return 1; | ||
199 | case 022: /* frstor m94/108byte */ | ||
200 | frstor(addr_modes, (u_char __user *)data_address); | ||
201 | /* Ensure that the values just loaded are not changed by | ||
202 | fix-up operations. */ | ||
203 | return 1; | ||
204 | case 023: /* fbld m80dec */ | ||
205 | clear_C1(); | ||
206 | loaded_tag = FPU_load_bcd((u_char __user *)data_address); | ||
207 | FPU_settag0(loaded_tag); | ||
208 | break; | ||
209 | case 024: /* fldcw */ | ||
210 | RE_ENTRANT_CHECK_OFF; | ||
211 | FPU_access_ok(VERIFY_READ, data_address, 2); | ||
212 | FPU_get_user(control_word, (unsigned short __user *) data_address); | ||
213 | RE_ENTRANT_CHECK_ON; | ||
214 | if ( partial_status & ~control_word & CW_Exceptions ) | ||
215 | partial_status |= (SW_Summary | SW_Backward); | ||
216 | else | ||
217 | partial_status &= ~(SW_Summary | SW_Backward); | ||
218 | #ifdef PECULIAR_486 | ||
219 | control_word |= 0x40; /* An 80486 appears to always set this bit */ | ||
220 | #endif /* PECULIAR_486 */ | ||
221 | return 1; | ||
222 | case 025: /* fld m80real */ | ||
223 | clear_C1(); | ||
224 | loaded_tag = FPU_load_extended((long double __user *)data_address, 0); | ||
225 | FPU_settag0(loaded_tag); | ||
226 | break; | ||
227 | case 027: /* fild m64int */ | ||
228 | clear_C1(); | ||
229 | loaded_tag = FPU_load_int64((long long __user *)data_address); | ||
230 | if (loaded_tag == TAG_Error) | ||
231 | return 0; | ||
232 | FPU_settag0(loaded_tag); | ||
233 | break; | ||
234 | case 030: /* fstenv m14/28byte */ | ||
235 | fstenv(addr_modes, (u_char __user *)data_address); | ||
236 | return 1; | ||
237 | case 032: /* fsave */ | ||
238 | fsave(addr_modes, (u_char __user *)data_address); | ||
239 | return 1; | ||
240 | case 033: /* fbstp m80dec */ | ||
241 | clear_C1(); | ||
242 | if ( FPU_store_bcd(st0_ptr, st0_tag, (u_char __user *)data_address) ) | ||
243 | pop_0(); /* pop only if the number was actually stored | ||
244 | (see the 80486 manual p16-28) */ | ||
245 | break; | ||
246 | case 034: /* fstcw m16int */ | ||
247 | RE_ENTRANT_CHECK_OFF; | ||
248 | FPU_access_ok(VERIFY_WRITE,data_address,2); | ||
249 | FPU_put_user(control_word, (unsigned short __user *) data_address); | ||
250 | RE_ENTRANT_CHECK_ON; | ||
251 | return 1; | ||
252 | case 035: /* fstp m80real */ | ||
253 | clear_C1(); | ||
254 | if ( FPU_store_extended(st0_ptr, st0_tag, (long double __user *)data_address) ) | ||
255 | pop_0(); /* pop only if the number was actually stored | ||
256 | (see the 80486 manual p16-28) */ | ||
257 | break; | ||
258 | case 036: /* fstsw m2byte */ | ||
259 | RE_ENTRANT_CHECK_OFF; | ||
260 | FPU_access_ok(VERIFY_WRITE,data_address,2); | ||
261 | FPU_put_user(status_word(),(unsigned short __user *) data_address); | ||
262 | RE_ENTRANT_CHECK_ON; | ||
263 | return 1; | ||
264 | case 037: /* fistp m64int */ | ||
265 | clear_C1(); | ||
266 | if ( FPU_store_int64(st0_ptr, st0_tag, (long long __user *)data_address) ) | ||
267 | pop_0(); /* pop only if the number was actually stored | ||
268 | (see the 80486 manual p16-28) */ | ||
269 | break; | ||
270 | } | ||
271 | return 0; | ||
272 | } | ||
diff --git a/arch/x86/math-emu/mul_Xsig.S b/arch/x86/math-emu/mul_Xsig.S new file mode 100644 index 000000000000..717785a53eb4 --- /dev/null +++ b/arch/x86/math-emu/mul_Xsig.S | |||
@@ -0,0 +1,176 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | mul_Xsig.S | | ||
3 | | | | ||
4 | | Multiply a 12 byte fixed point number by another fixed point number. | | ||
5 | | | | ||
6 | | Copyright (C) 1992,1994,1995 | | ||
7 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | ||
8 | | Australia. E-mail billm@jacobi.maths.monash.edu.au | | ||
9 | | | | ||
10 | | Call from C as: | | ||
11 | | void mul32_Xsig(Xsig *x, unsigned b) | | ||
12 | | | | ||
13 | | void mul64_Xsig(Xsig *x, unsigned long long *b) | | ||
14 | | | | ||
15 | | void mul_Xsig_Xsig(Xsig *x, unsigned *b) | | ||
16 | | | | ||
17 | | The result is neither rounded nor normalized, and the ls bit or so may | | ||
18 | | be wrong. | | ||
19 | | | | ||
20 | +---------------------------------------------------------------------------*/ | ||
21 | .file "mul_Xsig.S" | ||
22 | |||
23 | |||
24 | #include "fpu_emu.h" | ||
25 | |||
26 | .text | ||
27 | ENTRY(mul32_Xsig) | ||
28 | pushl %ebp | ||
29 | movl %esp,%ebp | ||
30 | subl $16,%esp | ||
31 | pushl %esi | ||
32 | |||
33 | movl PARAM1,%esi | ||
34 | movl PARAM2,%ecx | ||
35 | |||
36 | xor %eax,%eax | ||
37 | movl %eax,-4(%ebp) | ||
38 | movl %eax,-8(%ebp) | ||
39 | |||
40 | movl (%esi),%eax /* lsl of Xsig */ | ||
41 | mull %ecx /* msl of b */ | ||
42 | movl %edx,-12(%ebp) | ||
43 | |||
44 | movl 4(%esi),%eax /* midl of Xsig */ | ||
45 | mull %ecx /* msl of b */ | ||
46 | addl %eax,-12(%ebp) | ||
47 | adcl %edx,-8(%ebp) | ||
48 | adcl $0,-4(%ebp) | ||
49 | |||
50 | movl 8(%esi),%eax /* msl of Xsig */ | ||
51 | mull %ecx /* msl of b */ | ||
52 | addl %eax,-8(%ebp) | ||
53 | adcl %edx,-4(%ebp) | ||
54 | |||
55 | movl -12(%ebp),%eax | ||
56 | movl %eax,(%esi) | ||
57 | movl -8(%ebp),%eax | ||
58 | movl %eax,4(%esi) | ||
59 | movl -4(%ebp),%eax | ||
60 | movl %eax,8(%esi) | ||
61 | |||
62 | popl %esi | ||
63 | leave | ||
64 | ret | ||
65 | |||
66 | |||
67 | ENTRY(mul64_Xsig) | ||
68 | pushl %ebp | ||
69 | movl %esp,%ebp | ||
70 | subl $16,%esp | ||
71 | pushl %esi | ||
72 | |||
73 | movl PARAM1,%esi | ||
74 | movl PARAM2,%ecx | ||
75 | |||
76 | xor %eax,%eax | ||
77 | movl %eax,-4(%ebp) | ||
78 | movl %eax,-8(%ebp) | ||
79 | |||
80 | movl (%esi),%eax /* lsl of Xsig */ | ||
81 | mull 4(%ecx) /* msl of b */ | ||
82 | movl %edx,-12(%ebp) | ||
83 | |||
84 | movl 4(%esi),%eax /* midl of Xsig */ | ||
85 | mull (%ecx) /* lsl of b */ | ||
86 | addl %edx,-12(%ebp) | ||
87 | adcl $0,-8(%ebp) | ||
88 | adcl $0,-4(%ebp) | ||
89 | |||
90 | movl 4(%esi),%eax /* midl of Xsig */ | ||
91 | mull 4(%ecx) /* msl of b */ | ||
92 | addl %eax,-12(%ebp) | ||
93 | adcl %edx,-8(%ebp) | ||
94 | adcl $0,-4(%ebp) | ||
95 | |||
96 | movl 8(%esi),%eax /* msl of Xsig */ | ||
97 | mull (%ecx) /* lsl of b */ | ||
98 | addl %eax,-12(%ebp) | ||
99 | adcl %edx,-8(%ebp) | ||
100 | adcl $0,-4(%ebp) | ||
101 | |||
102 | movl 8(%esi),%eax /* msl of Xsig */ | ||
103 | mull 4(%ecx) /* msl of b */ | ||
104 | addl %eax,-8(%ebp) | ||
105 | adcl %edx,-4(%ebp) | ||
106 | |||
107 | movl -12(%ebp),%eax | ||
108 | movl %eax,(%esi) | ||
109 | movl -8(%ebp),%eax | ||
110 | movl %eax,4(%esi) | ||
111 | movl -4(%ebp),%eax | ||
112 | movl %eax,8(%esi) | ||
113 | |||
114 | popl %esi | ||
115 | leave | ||
116 | ret | ||
117 | |||
118 | |||
119 | |||
120 | ENTRY(mul_Xsig_Xsig) | ||
121 | pushl %ebp | ||
122 | movl %esp,%ebp | ||
123 | subl $16,%esp | ||
124 | pushl %esi | ||
125 | |||
126 | movl PARAM1,%esi | ||
127 | movl PARAM2,%ecx | ||
128 | |||
129 | xor %eax,%eax | ||
130 | movl %eax,-4(%ebp) | ||
131 | movl %eax,-8(%ebp) | ||
132 | |||
133 | movl (%esi),%eax /* lsl of Xsig */ | ||
134 | mull 8(%ecx) /* msl of b */ | ||
135 | movl %edx,-12(%ebp) | ||
136 | |||
137 | movl 4(%esi),%eax /* midl of Xsig */ | ||
138 | mull 4(%ecx) /* midl of b */ | ||
139 | addl %edx,-12(%ebp) | ||
140 | adcl $0,-8(%ebp) | ||
141 | adcl $0,-4(%ebp) | ||
142 | |||
143 | movl 8(%esi),%eax /* msl of Xsig */ | ||
144 | mull (%ecx) /* lsl of b */ | ||
145 | addl %edx,-12(%ebp) | ||
146 | adcl $0,-8(%ebp) | ||
147 | adcl $0,-4(%ebp) | ||
148 | |||
149 | movl 4(%esi),%eax /* midl of Xsig */ | ||
150 | mull 8(%ecx) /* msl of b */ | ||
151 | addl %eax,-12(%ebp) | ||
152 | adcl %edx,-8(%ebp) | ||
153 | adcl $0,-4(%ebp) | ||
154 | |||
155 | movl 8(%esi),%eax /* msl of Xsig */ | ||
156 | mull 4(%ecx) /* midl of b */ | ||
157 | addl %eax,-12(%ebp) | ||
158 | adcl %edx,-8(%ebp) | ||
159 | adcl $0,-4(%ebp) | ||
160 | |||
161 | movl 8(%esi),%eax /* msl of Xsig */ | ||
162 | mull 8(%ecx) /* msl of b */ | ||
163 | addl %eax,-8(%ebp) | ||
164 | adcl %edx,-4(%ebp) | ||
165 | |||
166 | movl -12(%ebp),%edx | ||
167 | movl %edx,(%esi) | ||
168 | movl -8(%ebp),%edx | ||
169 | movl %edx,4(%esi) | ||
170 | movl -4(%ebp),%edx | ||
171 | movl %edx,8(%esi) | ||
172 | |||
173 | popl %esi | ||
174 | leave | ||
175 | ret | ||
176 | |||
diff --git a/arch/x86/math-emu/poly.h b/arch/x86/math-emu/poly.h new file mode 100644 index 000000000000..4db798114923 --- /dev/null +++ b/arch/x86/math-emu/poly.h | |||
@@ -0,0 +1,121 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | poly.h | | ||
3 | | | | ||
4 | | Header file for the FPU-emu poly*.c source files. | | ||
5 | | | | ||
6 | | Copyright (C) 1994,1999 | | ||
7 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | ||
8 | | Australia. E-mail billm@melbpc.org.au | | ||
9 | | | | ||
10 | | Declarations and definitions for functions operating on Xsig (12-byte | | ||
11 | | extended-significand) quantities. | | ||
12 | | | | ||
13 | +---------------------------------------------------------------------------*/ | ||
14 | |||
15 | #ifndef _POLY_H | ||
16 | #define _POLY_H | ||
17 | |||
18 | /* This 12-byte structure is used to improve the accuracy of computation | ||
19 | of transcendental functions. | ||
20 | Intended to be used to get results better than 8-byte computation | ||
21 | allows. 9-byte would probably be sufficient. | ||
22 | */ | ||
23 | typedef struct { | ||
24 | unsigned long lsw; | ||
25 | unsigned long midw; | ||
26 | unsigned long msw; | ||
27 | } Xsig; | ||
28 | |||
29 | asmlinkage void mul64(unsigned long long const *a, unsigned long long const *b, | ||
30 | unsigned long long *result); | ||
31 | asmlinkage void polynomial_Xsig(Xsig *, const unsigned long long *x, | ||
32 | const unsigned long long terms[], const int n); | ||
33 | |||
34 | asmlinkage void mul32_Xsig(Xsig *, const unsigned long mult); | ||
35 | asmlinkage void mul64_Xsig(Xsig *, const unsigned long long *mult); | ||
36 | asmlinkage void mul_Xsig_Xsig(Xsig *dest, const Xsig *mult); | ||
37 | |||
38 | asmlinkage void shr_Xsig(Xsig *, const int n); | ||
39 | asmlinkage int round_Xsig(Xsig *); | ||
40 | asmlinkage int norm_Xsig(Xsig *); | ||
41 | asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest); | ||
42 | |||
43 | /* Macro to extract the most significant 32 bits from a long long */ | ||
44 | #define LL_MSW(x) (((unsigned long *)&x)[1]) | ||
45 | |||
46 | /* Macro to initialize an Xsig struct */ | ||
47 | #define MK_XSIG(a,b,c) { c, b, a } | ||
48 | |||
49 | /* Macro to access the 8 ms bytes of an Xsig as a long long */ | ||
50 | #define XSIG_LL(x) (*(unsigned long long *)&x.midw) | ||
51 | |||
52 | |||
53 | /* | ||
54 | Need to run gcc with optimizations on to get these to | ||
55 | actually be in-line. | ||
56 | */ | ||
57 | |||
58 | /* Multiply two fixed-point 32 bit numbers, producing a 32 bit result. | ||
59 | The answer is the ms word of the product. */ | ||
60 | /* Some versions of gcc make it difficult to stop eax from being clobbered. | ||
61 | Merely specifying that it is used doesn't work... | ||
62 | */ | ||
63 | static inline unsigned long mul_32_32(const unsigned long arg1, | ||
64 | const unsigned long arg2) | ||
65 | { | ||
66 | int retval; | ||
67 | asm volatile ("mull %2; movl %%edx,%%eax" \ | ||
68 | :"=a" (retval) \ | ||
69 | :"0" (arg1), "g" (arg2) \ | ||
70 | :"dx"); | ||
71 | return retval; | ||
72 | } | ||
73 | |||
74 | |||
75 | /* Add the 12 byte Xsig x2 to Xsig dest, with no checks for overflow. */ | ||
76 | static inline void add_Xsig_Xsig(Xsig *dest, const Xsig *x2) | ||
77 | { | ||
78 | asm volatile ("movl %1,%%edi; movl %2,%%esi;\n" | ||
79 | "movl (%%esi),%%eax; addl %%eax,(%%edi);\n" | ||
80 | "movl 4(%%esi),%%eax; adcl %%eax,4(%%edi);\n" | ||
81 | "movl 8(%%esi),%%eax; adcl %%eax,8(%%edi);\n" | ||
82 | :"=g" (*dest):"g" (dest), "g" (x2) | ||
83 | :"ax","si","di"); | ||
84 | } | ||
85 | |||
86 | |||
87 | /* Add the 12 byte Xsig x2 to Xsig dest, adjust exp if overflow occurs. */ | ||
88 | /* Note: the constraints in the asm statement didn't always work properly | ||
89 | with gcc 2.5.8. Changing from using edi to using ecx got around the | ||
90 | problem, but keep fingers crossed! */ | ||
91 | static inline void add_two_Xsig(Xsig *dest, const Xsig *x2, long int *exp) | ||
92 | { | ||
93 | asm volatile ("movl %2,%%ecx; movl %3,%%esi;\n" | ||
94 | "movl (%%esi),%%eax; addl %%eax,(%%ecx);\n" | ||
95 | "movl 4(%%esi),%%eax; adcl %%eax,4(%%ecx);\n" | ||
96 | "movl 8(%%esi),%%eax; adcl %%eax,8(%%ecx);\n" | ||
97 | "jnc 0f;\n" | ||
98 | "rcrl 8(%%ecx); rcrl 4(%%ecx); rcrl (%%ecx)\n" | ||
99 | "movl %4,%%ecx; incl (%%ecx)\n" | ||
100 | "movl $1,%%eax; jmp 1f;\n" | ||
101 | "0: xorl %%eax,%%eax;\n" | ||
102 | "1:\n" | ||
103 | :"=g" (*exp), "=g" (*dest) | ||
104 | :"g" (dest), "g" (x2), "g" (exp) | ||
105 | :"cx","si","ax"); | ||
106 | } | ||
107 | |||
108 | |||
109 | /* Negate (subtract from 1.0) the 12 byte Xsig */ | ||
110 | /* This is faster in a loop on my 386 than using the "neg" instruction. */ | ||
111 | static inline void negate_Xsig(Xsig *x) | ||
112 | { | ||
113 | asm volatile("movl %1,%%esi;\n" | ||
114 | "xorl %%ecx,%%ecx;\n" | ||
115 | "movl %%ecx,%%eax; subl (%%esi),%%eax; movl %%eax,(%%esi);\n" | ||
116 | "movl %%ecx,%%eax; sbbl 4(%%esi),%%eax; movl %%eax,4(%%esi);\n" | ||
117 | "movl %%ecx,%%eax; sbbl 8(%%esi),%%eax; movl %%eax,8(%%esi);\n" | ||
118 | :"=g" (*x):"g" (x):"si","ax","cx"); | ||
119 | } | ||
120 | |||
121 | #endif /* _POLY_H */ | ||
diff --git a/arch/x86/math-emu/poly_2xm1.c b/arch/x86/math-emu/poly_2xm1.c new file mode 100644 index 000000000000..9766ad5e9743 --- /dev/null +++ b/arch/x86/math-emu/poly_2xm1.c | |||
@@ -0,0 +1,156 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | poly_2xm1.c | | ||
3 | | | | ||
4 | | Function to compute 2^x-1 by a polynomial approximation. | | ||
5 | | | | ||
6 | | Copyright (C) 1992,1993,1994,1997 | | ||
7 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | | ||
8 | | E-mail billm@suburbia.net | | ||
9 | | | | ||
10 | | | | ||
11 | +---------------------------------------------------------------------------*/ | ||
12 | |||
13 | #include "exception.h" | ||
14 | #include "reg_constant.h" | ||
15 | #include "fpu_emu.h" | ||
16 | #include "fpu_system.h" | ||
17 | #include "control_w.h" | ||
18 | #include "poly.h" | ||
19 | |||
20 | |||
21 | #define HIPOWER 11 | ||
22 | static const unsigned long long lterms[HIPOWER] = | ||
23 | { | ||
24 | 0x0000000000000000LL, /* This term done separately as 12 bytes */ | ||
25 | 0xf5fdeffc162c7543LL, | ||
26 | 0x1c6b08d704a0bfa6LL, | ||
27 | 0x0276556df749cc21LL, | ||
28 | 0x002bb0ffcf14f6b8LL, | ||
29 | 0x0002861225ef751cLL, | ||
30 | 0x00001ffcbfcd5422LL, | ||
31 | 0x00000162c005d5f1LL, | ||
32 | 0x0000000da96ccb1bLL, | ||
33 | 0x0000000078d1b897LL, | ||
34 | 0x000000000422b029LL | ||
35 | }; | ||
36 | |||
37 | static const Xsig hiterm = MK_XSIG(0xb17217f7, 0xd1cf79ab, 0xc8a39194); | ||
38 | |||
39 | /* Four slices: 0.0 : 0.25 : 0.50 : 0.75 : 1.0, | ||
40 | These numbers are 2^(1/4), 2^(1/2), and 2^(3/4) | ||
41 | */ | ||
42 | static const Xsig shiftterm0 = MK_XSIG(0, 0, 0); | ||
43 | static const Xsig shiftterm1 = MK_XSIG(0x9837f051, 0x8db8a96f, 0x46ad2318); | ||
44 | static const Xsig shiftterm2 = MK_XSIG(0xb504f333, 0xf9de6484, 0x597d89b3); | ||
45 | static const Xsig shiftterm3 = MK_XSIG(0xd744fcca, 0xd69d6af4, 0x39a68bb9); | ||
46 | |||
47 | static const Xsig *shiftterm[] = { &shiftterm0, &shiftterm1, | ||
48 | &shiftterm2, &shiftterm3 }; | ||
49 | |||
50 | |||
51 | /*--- poly_2xm1() -----------------------------------------------------------+ | ||
52 | | Requires st(0) which is TAG_Valid and < 1. | | ||
53 | +---------------------------------------------------------------------------*/ | ||
54 | int poly_2xm1(u_char sign, FPU_REG *arg, FPU_REG *result) | ||
55 | { | ||
56 | long int exponent, shift; | ||
57 | unsigned long long Xll; | ||
58 | Xsig accumulator, Denom, argSignif; | ||
59 | u_char tag; | ||
60 | |||
61 | exponent = exponent16(arg); | ||
62 | |||
63 | #ifdef PARANOID | ||
64 | if ( exponent >= 0 ) /* Don't want a |number| >= 1.0 */ | ||
65 | { | ||
66 | /* Number negative, too large, or not Valid. */ | ||
67 | EXCEPTION(EX_INTERNAL|0x127); | ||
68 | return 1; | ||
69 | } | ||
70 | #endif /* PARANOID */ | ||
71 | |||
72 | argSignif.lsw = 0; | ||
73 | XSIG_LL(argSignif) = Xll = significand(arg); | ||
74 | |||
75 | if ( exponent == -1 ) | ||
76 | { | ||
77 | shift = (argSignif.msw & 0x40000000) ? 3 : 2; | ||
78 | /* subtract 0.5 or 0.75 */ | ||
79 | exponent -= 2; | ||
80 | XSIG_LL(argSignif) <<= 2; | ||
81 | Xll <<= 2; | ||
82 | } | ||
83 | else if ( exponent == -2 ) | ||
84 | { | ||
85 | shift = 1; | ||
86 | /* subtract 0.25 */ | ||
87 | exponent--; | ||
88 | XSIG_LL(argSignif) <<= 1; | ||
89 | Xll <<= 1; | ||
90 | } | ||
91 | else | ||
92 | shift = 0; | ||
93 | |||
94 | if ( exponent < -2 ) | ||
95 | { | ||
96 | /* Shift the argument right by the required places. */ | ||
97 | if ( FPU_shrx(&Xll, -2-exponent) >= 0x80000000U ) | ||
98 | Xll++; /* round up */ | ||
99 | } | ||
100 | |||
101 | accumulator.lsw = accumulator.midw = accumulator.msw = 0; | ||
102 | polynomial_Xsig(&accumulator, &Xll, lterms, HIPOWER-1); | ||
103 | mul_Xsig_Xsig(&accumulator, &argSignif); | ||
104 | shr_Xsig(&accumulator, 3); | ||
105 | |||
106 | mul_Xsig_Xsig(&argSignif, &hiterm); /* The leading term */ | ||
107 | add_two_Xsig(&accumulator, &argSignif, &exponent); | ||
108 | |||
109 | if ( shift ) | ||
110 | { | ||
111 | /* The argument is large, use the identity: | ||
112 | f(x+a) = f(a) * (f(x) + 1) - 1; | ||
113 | */ | ||
114 | shr_Xsig(&accumulator, - exponent); | ||
115 | accumulator.msw |= 0x80000000; /* add 1.0 */ | ||
116 | mul_Xsig_Xsig(&accumulator, shiftterm[shift]); | ||
117 | accumulator.msw &= 0x3fffffff; /* subtract 1.0 */ | ||
118 | exponent = 1; | ||
119 | } | ||
120 | |||
121 | if ( sign != SIGN_POS ) | ||
122 | { | ||
123 | /* The argument is negative, use the identity: | ||
124 | f(-x) = -f(x) / (1 + f(x)) | ||
125 | */ | ||
126 | Denom.lsw = accumulator.lsw; | ||
127 | XSIG_LL(Denom) = XSIG_LL(accumulator); | ||
128 | if ( exponent < 0 ) | ||
129 | shr_Xsig(&Denom, - exponent); | ||
130 | else if ( exponent > 0 ) | ||
131 | { | ||
132 | /* exponent must be 1 here */ | ||
133 | XSIG_LL(Denom) <<= 1; | ||
134 | if ( Denom.lsw & 0x80000000 ) | ||
135 | XSIG_LL(Denom) |= 1; | ||
136 | (Denom.lsw) <<= 1; | ||
137 | } | ||
138 | Denom.msw |= 0x80000000; /* add 1.0 */ | ||
139 | div_Xsig(&accumulator, &Denom, &accumulator); | ||
140 | } | ||
141 | |||
142 | /* Convert to 64 bit signed-compatible */ | ||
143 | exponent += round_Xsig(&accumulator); | ||
144 | |||
145 | result = &st(0); | ||
146 | significand(result) = XSIG_LL(accumulator); | ||
147 | setexponent16(result, exponent); | ||
148 | |||
149 | tag = FPU_round(result, 1, 0, FULL_PRECISION, sign); | ||
150 | |||
151 | setsign(result, sign); | ||
152 | FPU_settag0(tag); | ||
153 | |||
154 | return 0; | ||
155 | |||
156 | } | ||
diff --git a/arch/x86/math-emu/poly_atan.c b/arch/x86/math-emu/poly_atan.c new file mode 100644 index 000000000000..82f702952f69 --- /dev/null +++ b/arch/x86/math-emu/poly_atan.c | |||
@@ -0,0 +1,229 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | poly_atan.c | | ||
3 | | | | ||
4 | | Compute the arctan of a FPU_REG, using a polynomial approximation. | | ||
5 | | | | ||
6 | | Copyright (C) 1992,1993,1994,1997 | | ||
7 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | | ||
8 | | E-mail billm@suburbia.net | | ||
9 | | | | ||
10 | | | | ||
11 | +---------------------------------------------------------------------------*/ | ||
12 | |||
13 | #include "exception.h" | ||
14 | #include "reg_constant.h" | ||
15 | #include "fpu_emu.h" | ||
16 | #include "fpu_system.h" | ||
17 | #include "status_w.h" | ||
18 | #include "control_w.h" | ||
19 | #include "poly.h" | ||
20 | |||
21 | |||
22 | #define HIPOWERon 6 /* odd poly, negative terms */ | ||
23 | static const unsigned long long oddnegterms[HIPOWERon] = | ||
24 | { | ||
25 | 0x0000000000000000LL, /* Dummy (not for - 1.0) */ | ||
26 | 0x015328437f756467LL, | ||
27 | 0x0005dda27b73dec6LL, | ||
28 | 0x0000226bf2bfb91aLL, | ||
29 | 0x000000ccc439c5f7LL, | ||
30 | 0x0000000355438407LL | ||
31 | } ; | ||
32 | |||
33 | #define HIPOWERop 6 /* odd poly, positive terms */ | ||
34 | static const unsigned long long oddplterms[HIPOWERop] = | ||
35 | { | ||
36 | /* 0xaaaaaaaaaaaaaaabLL, transferred to fixedpterm[] */ | ||
37 | 0x0db55a71875c9ac2LL, | ||
38 | 0x0029fce2d67880b0LL, | ||
39 | 0x0000dfd3908b4596LL, | ||
40 | 0x00000550fd61dab4LL, | ||
41 | 0x0000001c9422b3f9LL, | ||
42 | 0x000000003e3301e1LL | ||
43 | }; | ||
44 | |||
45 | static const unsigned long long denomterm = 0xebd9b842c5c53a0eLL; | ||
46 | |||
47 | static const Xsig fixedpterm = MK_XSIG(0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa); | ||
48 | |||
49 | static const Xsig pi_signif = MK_XSIG(0xc90fdaa2, 0x2168c234, 0xc4c6628b); | ||
50 | |||
51 | |||
52 | /*--- poly_atan() -----------------------------------------------------------+ | ||
53 | | | | ||
54 | +---------------------------------------------------------------------------*/ | ||
55 | void poly_atan(FPU_REG *st0_ptr, u_char st0_tag, | ||
56 | FPU_REG *st1_ptr, u_char st1_tag) | ||
57 | { | ||
58 | u_char transformed, inverted, | ||
59 | sign1, sign2; | ||
60 | int exponent; | ||
61 | long int dummy_exp; | ||
62 | Xsig accumulator, Numer, Denom, accumulatore, argSignif, | ||
63 | argSq, argSqSq; | ||
64 | u_char tag; | ||
65 | |||
66 | sign1 = getsign(st0_ptr); | ||
67 | sign2 = getsign(st1_ptr); | ||
68 | if ( st0_tag == TAG_Valid ) | ||
69 | { | ||
70 | exponent = exponent(st0_ptr); | ||
71 | } | ||
72 | else | ||
73 | { | ||
74 | /* This gives non-compatible stack contents... */ | ||
75 | FPU_to_exp16(st0_ptr, st0_ptr); | ||
76 | exponent = exponent16(st0_ptr); | ||
77 | } | ||
78 | if ( st1_tag == TAG_Valid ) | ||
79 | { | ||
80 | exponent -= exponent(st1_ptr); | ||
81 | } | ||
82 | else | ||
83 | { | ||
84 | /* This gives non-compatible stack contents... */ | ||
85 | FPU_to_exp16(st1_ptr, st1_ptr); | ||
86 | exponent -= exponent16(st1_ptr); | ||
87 | } | ||
88 | |||
89 | if ( (exponent < 0) || ((exponent == 0) && | ||
90 | ((st0_ptr->sigh < st1_ptr->sigh) || | ||
91 | ((st0_ptr->sigh == st1_ptr->sigh) && | ||
92 | (st0_ptr->sigl < st1_ptr->sigl))) ) ) | ||
93 | { | ||
94 | inverted = 1; | ||
95 | Numer.lsw = Denom.lsw = 0; | ||
96 | XSIG_LL(Numer) = significand(st0_ptr); | ||
97 | XSIG_LL(Denom) = significand(st1_ptr); | ||
98 | } | ||
99 | else | ||
100 | { | ||
101 | inverted = 0; | ||
102 | exponent = -exponent; | ||
103 | Numer.lsw = Denom.lsw = 0; | ||
104 | XSIG_LL(Numer) = significand(st1_ptr); | ||
105 | XSIG_LL(Denom) = significand(st0_ptr); | ||
106 | } | ||
107 | div_Xsig(&Numer, &Denom, &argSignif); | ||
108 | exponent += norm_Xsig(&argSignif); | ||
109 | |||
110 | if ( (exponent >= -1) | ||
111 | || ((exponent == -2) && (argSignif.msw > 0xd413ccd0)) ) | ||
112 | { | ||
113 | /* The argument is greater than sqrt(2)-1 (=0.414213562...) */ | ||
114 | /* Convert the argument by an identity for atan */ | ||
115 | transformed = 1; | ||
116 | |||
117 | if ( exponent >= 0 ) | ||
118 | { | ||
119 | #ifdef PARANOID | ||
120 | if ( !( (exponent == 0) && | ||
121 | (argSignif.lsw == 0) && (argSignif.midw == 0) && | ||
122 | (argSignif.msw == 0x80000000) ) ) | ||
123 | { | ||
124 | EXCEPTION(EX_INTERNAL|0x104); /* There must be a logic error */ | ||
125 | return; | ||
126 | } | ||
127 | #endif /* PARANOID */ | ||
128 | argSignif.msw = 0; /* Make the transformed arg -> 0.0 */ | ||
129 | } | ||
130 | else | ||
131 | { | ||
132 | Numer.lsw = Denom.lsw = argSignif.lsw; | ||
133 | XSIG_LL(Numer) = XSIG_LL(Denom) = XSIG_LL(argSignif); | ||
134 | |||
135 | if ( exponent < -1 ) | ||
136 | shr_Xsig(&Numer, -1-exponent); | ||
137 | negate_Xsig(&Numer); | ||
138 | |||
139 | shr_Xsig(&Denom, -exponent); | ||
140 | Denom.msw |= 0x80000000; | ||
141 | |||
142 | div_Xsig(&Numer, &Denom, &argSignif); | ||
143 | |||
144 | exponent = -1 + norm_Xsig(&argSignif); | ||
145 | } | ||
146 | } | ||
147 | else | ||
148 | { | ||
149 | transformed = 0; | ||
150 | } | ||
151 | |||
152 | argSq.lsw = argSignif.lsw; argSq.midw = argSignif.midw; | ||
153 | argSq.msw = argSignif.msw; | ||
154 | mul_Xsig_Xsig(&argSq, &argSq); | ||
155 | |||
156 | argSqSq.lsw = argSq.lsw; argSqSq.midw = argSq.midw; argSqSq.msw = argSq.msw; | ||
157 | mul_Xsig_Xsig(&argSqSq, &argSqSq); | ||
158 | |||
159 | accumulatore.lsw = argSq.lsw; | ||
160 | XSIG_LL(accumulatore) = XSIG_LL(argSq); | ||
161 | |||
162 | shr_Xsig(&argSq, 2*(-1-exponent-1)); | ||
163 | shr_Xsig(&argSqSq, 4*(-1-exponent-1)); | ||
164 | |||
165 | /* Now have argSq etc with binary point at the left | ||
166 | .1xxxxxxxx */ | ||
167 | |||
168 | /* Do the basic fixed point polynomial evaluation */ | ||
169 | accumulator.msw = accumulator.midw = accumulator.lsw = 0; | ||
170 | polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq), | ||
171 | oddplterms, HIPOWERop-1); | ||
172 | mul64_Xsig(&accumulator, &XSIG_LL(argSq)); | ||
173 | negate_Xsig(&accumulator); | ||
174 | polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq), oddnegterms, HIPOWERon-1); | ||
175 | negate_Xsig(&accumulator); | ||
176 | add_two_Xsig(&accumulator, &fixedpterm, &dummy_exp); | ||
177 | |||
178 | mul64_Xsig(&accumulatore, &denomterm); | ||
179 | shr_Xsig(&accumulatore, 1 + 2*(-1-exponent)); | ||
180 | accumulatore.msw |= 0x80000000; | ||
181 | |||
182 | div_Xsig(&accumulator, &accumulatore, &accumulator); | ||
183 | |||
184 | mul_Xsig_Xsig(&accumulator, &argSignif); | ||
185 | mul_Xsig_Xsig(&accumulator, &argSq); | ||
186 | |||
187 | shr_Xsig(&accumulator, 3); | ||
188 | negate_Xsig(&accumulator); | ||
189 | add_Xsig_Xsig(&accumulator, &argSignif); | ||
190 | |||
191 | if ( transformed ) | ||
192 | { | ||
193 | /* compute pi/4 - accumulator */ | ||
194 | shr_Xsig(&accumulator, -1-exponent); | ||
195 | negate_Xsig(&accumulator); | ||
196 | add_Xsig_Xsig(&accumulator, &pi_signif); | ||
197 | exponent = -1; | ||
198 | } | ||
199 | |||
200 | if ( inverted ) | ||
201 | { | ||
202 | /* compute pi/2 - accumulator */ | ||
203 | shr_Xsig(&accumulator, -exponent); | ||
204 | negate_Xsig(&accumulator); | ||
205 | add_Xsig_Xsig(&accumulator, &pi_signif); | ||
206 | exponent = 0; | ||
207 | } | ||
208 | |||
209 | if ( sign1 ) | ||
210 | { | ||
211 | /* compute pi - accumulator */ | ||
212 | shr_Xsig(&accumulator, 1 - exponent); | ||
213 | negate_Xsig(&accumulator); | ||
214 | add_Xsig_Xsig(&accumulator, &pi_signif); | ||
215 | exponent = 1; | ||
216 | } | ||
217 | |||
218 | exponent += round_Xsig(&accumulator); | ||
219 | |||
220 | significand(st1_ptr) = XSIG_LL(accumulator); | ||
221 | setexponent16(st1_ptr, exponent); | ||
222 | |||
223 | tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign2); | ||
224 | FPU_settagi(1, tag); | ||
225 | |||
226 | set_precision_flag_up(); /* We do not really know if up or down, | ||
227 | use this as the default. */ | ||
228 | |||
229 | } | ||
diff --git a/arch/x86/math-emu/poly_l2.c b/arch/x86/math-emu/poly_l2.c new file mode 100644 index 000000000000..dd00e1d5b074 --- /dev/null +++ b/arch/x86/math-emu/poly_l2.c | |||
@@ -0,0 +1,272 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | poly_l2.c | | ||
3 | | | | ||
4 | | Compute the base 2 log of a FPU_REG, using a polynomial approximation. | | ||
5 | | | | ||
6 | | Copyright (C) 1992,1993,1994,1997 | | ||
7 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | | ||
8 | | E-mail billm@suburbia.net | | ||
9 | | | | ||
10 | | | | ||
11 | +---------------------------------------------------------------------------*/ | ||
12 | |||
13 | |||
14 | #include "exception.h" | ||
15 | #include "reg_constant.h" | ||
16 | #include "fpu_emu.h" | ||
17 | #include "fpu_system.h" | ||
18 | #include "control_w.h" | ||
19 | #include "poly.h" | ||
20 | |||
21 | |||
22 | static void log2_kernel(FPU_REG const *arg, u_char argsign, | ||
23 | Xsig *accum_result, long int *expon); | ||
24 | |||
25 | |||
26 | /*--- poly_l2() -------------------------------------------------------------+ | ||
27 | | Base 2 logarithm by a polynomial approximation. | | ||
28 | +---------------------------------------------------------------------------*/ | ||
29 | void poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign) | ||
30 | { | ||
31 | long int exponent, expon, expon_expon; | ||
32 | Xsig accumulator, expon_accum, yaccum; | ||
33 | u_char sign, argsign; | ||
34 | FPU_REG x; | ||
35 | int tag; | ||
36 | |||
37 | exponent = exponent16(st0_ptr); | ||
38 | |||
39 | /* From st0_ptr, make a number > sqrt(2)/2 and < sqrt(2) */ | ||
40 | if ( st0_ptr->sigh > (unsigned)0xb504f334 ) | ||
41 | { | ||
42 | /* Treat as sqrt(2)/2 < st0_ptr < 1 */ | ||
43 | significand(&x) = - significand(st0_ptr); | ||
44 | setexponent16(&x, -1); | ||
45 | exponent++; | ||
46 | argsign = SIGN_NEG; | ||
47 | } | ||
48 | else | ||
49 | { | ||
50 | /* Treat as 1 <= st0_ptr < sqrt(2) */ | ||
51 | x.sigh = st0_ptr->sigh - 0x80000000; | ||
52 | x.sigl = st0_ptr->sigl; | ||
53 | setexponent16(&x, 0); | ||
54 | argsign = SIGN_POS; | ||
55 | } | ||
56 | tag = FPU_normalize_nuo(&x); | ||
57 | |||
58 | if ( tag == TAG_Zero ) | ||
59 | { | ||
60 | expon = 0; | ||
61 | accumulator.msw = accumulator.midw = accumulator.lsw = 0; | ||
62 | } | ||
63 | else | ||
64 | { | ||
65 | log2_kernel(&x, argsign, &accumulator, &expon); | ||
66 | } | ||
67 | |||
68 | if ( exponent < 0 ) | ||
69 | { | ||
70 | sign = SIGN_NEG; | ||
71 | exponent = -exponent; | ||
72 | } | ||
73 | else | ||
74 | sign = SIGN_POS; | ||
75 | expon_accum.msw = exponent; expon_accum.midw = expon_accum.lsw = 0; | ||
76 | if ( exponent ) | ||
77 | { | ||
78 | expon_expon = 31 + norm_Xsig(&expon_accum); | ||
79 | shr_Xsig(&accumulator, expon_expon - expon); | ||
80 | |||
81 | if ( sign ^ argsign ) | ||
82 | negate_Xsig(&accumulator); | ||
83 | add_Xsig_Xsig(&accumulator, &expon_accum); | ||
84 | } | ||
85 | else | ||
86 | { | ||
87 | expon_expon = expon; | ||
88 | sign = argsign; | ||
89 | } | ||
90 | |||
91 | yaccum.lsw = 0; XSIG_LL(yaccum) = significand(st1_ptr); | ||
92 | mul_Xsig_Xsig(&accumulator, &yaccum); | ||
93 | |||
94 | expon_expon += round_Xsig(&accumulator); | ||
95 | |||
96 | if ( accumulator.msw == 0 ) | ||
97 | { | ||
98 | FPU_copy_to_reg1(&CONST_Z, TAG_Zero); | ||
99 | return; | ||
100 | } | ||
101 | |||
102 | significand(st1_ptr) = XSIG_LL(accumulator); | ||
103 | setexponent16(st1_ptr, expon_expon + exponent16(st1_ptr) + 1); | ||
104 | |||
105 | tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign ^ st1_sign); | ||
106 | FPU_settagi(1, tag); | ||
107 | |||
108 | set_precision_flag_up(); /* 80486 appears to always do this */ | ||
109 | |||
110 | return; | ||
111 | |||
112 | } | ||
113 | |||
114 | |||
115 | /*--- poly_l2p1() -----------------------------------------------------------+ | ||
116 | | Base 2 logarithm by a polynomial approximation. | | ||
117 | | log2(x+1) | | ||
118 | +---------------------------------------------------------------------------*/ | ||
119 | int poly_l2p1(u_char sign0, u_char sign1, | ||
120 | FPU_REG *st0_ptr, FPU_REG *st1_ptr, FPU_REG *dest) | ||
121 | { | ||
122 | u_char tag; | ||
123 | long int exponent; | ||
124 | Xsig accumulator, yaccum; | ||
125 | |||
126 | if ( exponent16(st0_ptr) < 0 ) | ||
127 | { | ||
128 | log2_kernel(st0_ptr, sign0, &accumulator, &exponent); | ||
129 | |||
130 | yaccum.lsw = 0; | ||
131 | XSIG_LL(yaccum) = significand(st1_ptr); | ||
132 | mul_Xsig_Xsig(&accumulator, &yaccum); | ||
133 | |||
134 | exponent += round_Xsig(&accumulator); | ||
135 | |||
136 | exponent += exponent16(st1_ptr) + 1; | ||
137 | if ( exponent < EXP_WAY_UNDER ) exponent = EXP_WAY_UNDER; | ||
138 | |||
139 | significand(dest) = XSIG_LL(accumulator); | ||
140 | setexponent16(dest, exponent); | ||
141 | |||
142 | tag = FPU_round(dest, 1, 0, FULL_PRECISION, sign0 ^ sign1); | ||
143 | FPU_settagi(1, tag); | ||
144 | |||
145 | if ( tag == TAG_Valid ) | ||
146 | set_precision_flag_up(); /* 80486 appears to always do this */ | ||
147 | } | ||
148 | else | ||
149 | { | ||
150 | /* The magnitude of st0_ptr is far too large. */ | ||
151 | |||
152 | if ( sign0 != SIGN_POS ) | ||
153 | { | ||
154 | /* Trying to get the log of a negative number. */ | ||
155 | #ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */ | ||
156 | changesign(st1_ptr); | ||
157 | #else | ||
158 | if ( arith_invalid(1) < 0 ) | ||
159 | return 1; | ||
160 | #endif /* PECULIAR_486 */ | ||
161 | } | ||
162 | |||
163 | /* 80486 appears to do this */ | ||
164 | if ( sign0 == SIGN_NEG ) | ||
165 | set_precision_flag_down(); | ||
166 | else | ||
167 | set_precision_flag_up(); | ||
168 | } | ||
169 | |||
170 | if ( exponent(dest) <= EXP_UNDER ) | ||
171 | EXCEPTION(EX_Underflow); | ||
172 | |||
173 | return 0; | ||
174 | |||
175 | } | ||
176 | |||
177 | |||
178 | |||
179 | |||
180 | #undef HIPOWER | ||
181 | #define HIPOWER 10 | ||
182 | static const unsigned long long logterms[HIPOWER] = | ||
183 | { | ||
184 | 0x2a8eca5705fc2ef0LL, | ||
185 | 0xf6384ee1d01febceLL, | ||
186 | 0x093bb62877cdf642LL, | ||
187 | 0x006985d8a9ec439bLL, | ||
188 | 0x0005212c4f55a9c8LL, | ||
189 | 0x00004326a16927f0LL, | ||
190 | 0x0000038d1d80a0e7LL, | ||
191 | 0x0000003141cc80c6LL, | ||
192 | 0x00000002b1668c9fLL, | ||
193 | 0x000000002c7a46aaLL | ||
194 | }; | ||
195 | |||
196 | static const unsigned long leadterm = 0xb8000000; | ||
197 | |||
198 | |||
199 | /*--- log2_kernel() ---------------------------------------------------------+ | ||
200 | | Base 2 logarithm by a polynomial approximation. | | ||
201 | | log2(x+1) | | ||
202 | +---------------------------------------------------------------------------*/ | ||
203 | static void log2_kernel(FPU_REG const *arg, u_char argsign, Xsig *accum_result, | ||
204 | long int *expon) | ||
205 | { | ||
206 | long int exponent, adj; | ||
207 | unsigned long long Xsq; | ||
208 | Xsig accumulator, Numer, Denom, argSignif, arg_signif; | ||
209 | |||
210 | exponent = exponent16(arg); | ||
211 | Numer.lsw = Denom.lsw = 0; | ||
212 | XSIG_LL(Numer) = XSIG_LL(Denom) = significand(arg); | ||
213 | if ( argsign == SIGN_POS ) | ||
214 | { | ||
215 | shr_Xsig(&Denom, 2 - (1 + exponent)); | ||
216 | Denom.msw |= 0x80000000; | ||
217 | div_Xsig(&Numer, &Denom, &argSignif); | ||
218 | } | ||
219 | else | ||
220 | { | ||
221 | shr_Xsig(&Denom, 1 - (1 + exponent)); | ||
222 | negate_Xsig(&Denom); | ||
223 | if ( Denom.msw & 0x80000000 ) | ||
224 | { | ||
225 | div_Xsig(&Numer, &Denom, &argSignif); | ||
226 | exponent ++; | ||
227 | } | ||
228 | else | ||
229 | { | ||
230 | /* Denom must be 1.0 */ | ||
231 | argSignif.lsw = Numer.lsw; argSignif.midw = Numer.midw; | ||
232 | argSignif.msw = Numer.msw; | ||
233 | } | ||
234 | } | ||
235 | |||
236 | #ifndef PECULIAR_486 | ||
237 | /* Should check here that |local_arg| is within the valid range */ | ||
238 | if ( exponent >= -2 ) | ||
239 | { | ||
240 | if ( (exponent > -2) || | ||
241 | (argSignif.msw > (unsigned)0xafb0ccc0) ) | ||
242 | { | ||
243 | /* The argument is too large */ | ||
244 | } | ||
245 | } | ||
246 | #endif /* PECULIAR_486 */ | ||
247 | |||
248 | arg_signif.lsw = argSignif.lsw; XSIG_LL(arg_signif) = XSIG_LL(argSignif); | ||
249 | adj = norm_Xsig(&argSignif); | ||
250 | accumulator.lsw = argSignif.lsw; XSIG_LL(accumulator) = XSIG_LL(argSignif); | ||
251 | mul_Xsig_Xsig(&accumulator, &accumulator); | ||
252 | shr_Xsig(&accumulator, 2*(-1 - (1 + exponent + adj))); | ||
253 | Xsq = XSIG_LL(accumulator); | ||
254 | if ( accumulator.lsw & 0x80000000 ) | ||
255 | Xsq++; | ||
256 | |||
257 | accumulator.msw = accumulator.midw = accumulator.lsw = 0; | ||
258 | /* Do the basic fixed point polynomial evaluation */ | ||
259 | polynomial_Xsig(&accumulator, &Xsq, logterms, HIPOWER-1); | ||
260 | |||
261 | mul_Xsig_Xsig(&accumulator, &argSignif); | ||
262 | shr_Xsig(&accumulator, 6 - adj); | ||
263 | |||
264 | mul32_Xsig(&arg_signif, leadterm); | ||
265 | add_two_Xsig(&accumulator, &arg_signif, &exponent); | ||
266 | |||
267 | *expon = exponent + 1; | ||
268 | accum_result->lsw = accumulator.lsw; | ||
269 | accum_result->midw = accumulator.midw; | ||
270 | accum_result->msw = accumulator.msw; | ||
271 | |||
272 | } | ||
diff --git a/arch/x86/math-emu/poly_sin.c b/arch/x86/math-emu/poly_sin.c new file mode 100644 index 000000000000..a36313fb06f1 --- /dev/null +++ b/arch/x86/math-emu/poly_sin.c | |||
@@ -0,0 +1,397 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | poly_sin.c | | ||
3 | | | | ||
4 | | Computation of an approximation of the sin function and the cosine | | ||
5 | | function by a polynomial. | | ||
6 | | | | ||
7 | | Copyright (C) 1992,1993,1994,1997,1999 | | ||
8 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | | ||
9 | | E-mail billm@melbpc.org.au | | ||
10 | | | | ||
11 | | | | ||
12 | +---------------------------------------------------------------------------*/ | ||
13 | |||
14 | |||
15 | #include "exception.h" | ||
16 | #include "reg_constant.h" | ||
17 | #include "fpu_emu.h" | ||
18 | #include "fpu_system.h" | ||
19 | #include "control_w.h" | ||
20 | #include "poly.h" | ||
21 | |||
22 | |||
23 | #define N_COEFF_P 4 | ||
24 | #define N_COEFF_N 4 | ||
25 | |||
26 | static const unsigned long long pos_terms_l[N_COEFF_P] = | ||
27 | { | ||
28 | 0xaaaaaaaaaaaaaaabLL, | ||
29 | 0x00d00d00d00cf906LL, | ||
30 | 0x000006b99159a8bbLL, | ||
31 | 0x000000000d7392e6LL | ||
32 | }; | ||
33 | |||
34 | static const unsigned long long neg_terms_l[N_COEFF_N] = | ||
35 | { | ||
36 | 0x2222222222222167LL, | ||
37 | 0x0002e3bc74aab624LL, | ||
38 | 0x0000000b09229062LL, | ||
39 | 0x00000000000c7973LL | ||
40 | }; | ||
41 | |||
42 | |||
43 | |||
44 | #define N_COEFF_PH 4 | ||
45 | #define N_COEFF_NH 4 | ||
46 | static const unsigned long long pos_terms_h[N_COEFF_PH] = | ||
47 | { | ||
48 | 0x0000000000000000LL, | ||
49 | 0x05b05b05b05b0406LL, | ||
50 | 0x000049f93edd91a9LL, | ||
51 | 0x00000000c9c9ed62LL | ||
52 | }; | ||
53 | |||
54 | static const unsigned long long neg_terms_h[N_COEFF_NH] = | ||
55 | { | ||
56 | 0xaaaaaaaaaaaaaa98LL, | ||
57 | 0x001a01a01a019064LL, | ||
58 | 0x0000008f76c68a77LL, | ||
59 | 0x0000000000d58f5eLL | ||
60 | }; | ||
61 | |||
62 | |||
63 | /*--- poly_sine() -----------------------------------------------------------+ | ||
64 | | | | ||
65 | +---------------------------------------------------------------------------*/ | ||
66 | void poly_sine(FPU_REG *st0_ptr) | ||
67 | { | ||
68 | int exponent, echange; | ||
69 | Xsig accumulator, argSqrd, argTo4; | ||
70 | unsigned long fix_up, adj; | ||
71 | unsigned long long fixed_arg; | ||
72 | FPU_REG result; | ||
73 | |||
74 | exponent = exponent(st0_ptr); | ||
75 | |||
76 | accumulator.lsw = accumulator.midw = accumulator.msw = 0; | ||
77 | |||
78 | /* Split into two ranges, for arguments below and above 1.0 */ | ||
79 | /* The boundary between upper and lower is approx 0.88309101259 */ | ||
80 | if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa)) ) | ||
81 | { | ||
82 | /* The argument is <= 0.88309101259 */ | ||
83 | |||
84 | argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl; argSqrd.lsw = 0; | ||
85 | mul64_Xsig(&argSqrd, &significand(st0_ptr)); | ||
86 | shr_Xsig(&argSqrd, 2*(-1-exponent)); | ||
87 | argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; | ||
88 | argTo4.lsw = argSqrd.lsw; | ||
89 | mul_Xsig_Xsig(&argTo4, &argTo4); | ||
90 | |||
91 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l, | ||
92 | N_COEFF_N-1); | ||
93 | mul_Xsig_Xsig(&accumulator, &argSqrd); | ||
94 | negate_Xsig(&accumulator); | ||
95 | |||
96 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l, | ||
97 | N_COEFF_P-1); | ||
98 | |||
99 | shr_Xsig(&accumulator, 2); /* Divide by four */ | ||
100 | accumulator.msw |= 0x80000000; /* Add 1.0 */ | ||
101 | |||
102 | mul64_Xsig(&accumulator, &significand(st0_ptr)); | ||
103 | mul64_Xsig(&accumulator, &significand(st0_ptr)); | ||
104 | mul64_Xsig(&accumulator, &significand(st0_ptr)); | ||
105 | |||
106 | /* Divide by four, FPU_REG compatible, etc */ | ||
107 | exponent = 3*exponent; | ||
108 | |||
109 | /* The minimum exponent difference is 3 */ | ||
110 | shr_Xsig(&accumulator, exponent(st0_ptr) - exponent); | ||
111 | |||
112 | negate_Xsig(&accumulator); | ||
113 | XSIG_LL(accumulator) += significand(st0_ptr); | ||
114 | |||
115 | echange = round_Xsig(&accumulator); | ||
116 | |||
117 | setexponentpos(&result, exponent(st0_ptr) + echange); | ||
118 | } | ||
119 | else | ||
120 | { | ||
121 | /* The argument is > 0.88309101259 */ | ||
122 | /* We use sin(st(0)) = cos(pi/2-st(0)) */ | ||
123 | |||
124 | fixed_arg = significand(st0_ptr); | ||
125 | |||
126 | if ( exponent == 0 ) | ||
127 | { | ||
128 | /* The argument is >= 1.0 */ | ||
129 | |||
130 | /* Put the binary point at the left. */ | ||
131 | fixed_arg <<= 1; | ||
132 | } | ||
133 | /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ | ||
134 | fixed_arg = 0x921fb54442d18469LL - fixed_arg; | ||
135 | /* There is a special case which arises due to rounding, to fix here. */ | ||
136 | if ( fixed_arg == 0xffffffffffffffffLL ) | ||
137 | fixed_arg = 0; | ||
138 | |||
139 | XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0; | ||
140 | mul64_Xsig(&argSqrd, &fixed_arg); | ||
141 | |||
142 | XSIG_LL(argTo4) = XSIG_LL(argSqrd); argTo4.lsw = argSqrd.lsw; | ||
143 | mul_Xsig_Xsig(&argTo4, &argTo4); | ||
144 | |||
145 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h, | ||
146 | N_COEFF_NH-1); | ||
147 | mul_Xsig_Xsig(&accumulator, &argSqrd); | ||
148 | negate_Xsig(&accumulator); | ||
149 | |||
150 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h, | ||
151 | N_COEFF_PH-1); | ||
152 | negate_Xsig(&accumulator); | ||
153 | |||
154 | mul64_Xsig(&accumulator, &fixed_arg); | ||
155 | mul64_Xsig(&accumulator, &fixed_arg); | ||
156 | |||
157 | shr_Xsig(&accumulator, 3); | ||
158 | negate_Xsig(&accumulator); | ||
159 | |||
160 | add_Xsig_Xsig(&accumulator, &argSqrd); | ||
161 | |||
162 | shr_Xsig(&accumulator, 1); | ||
163 | |||
164 | accumulator.lsw |= 1; /* A zero accumulator here would cause problems */ | ||
165 | negate_Xsig(&accumulator); | ||
166 | |||
167 | /* The basic computation is complete. Now fix the answer to | ||
168 | compensate for the error due to the approximation used for | ||
169 | pi/2 | ||
170 | */ | ||
171 | |||
172 | /* This has an exponent of -65 */ | ||
173 | fix_up = 0x898cc517; | ||
174 | /* The fix-up needs to be improved for larger args */ | ||
175 | if ( argSqrd.msw & 0xffc00000 ) | ||
176 | { | ||
177 | /* Get about 32 bit precision in these: */ | ||
178 | fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6; | ||
179 | } | ||
180 | fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg)); | ||
181 | |||
182 | adj = accumulator.lsw; /* temp save */ | ||
183 | accumulator.lsw -= fix_up; | ||
184 | if ( accumulator.lsw > adj ) | ||
185 | XSIG_LL(accumulator) --; | ||
186 | |||
187 | echange = round_Xsig(&accumulator); | ||
188 | |||
189 | setexponentpos(&result, echange - 1); | ||
190 | } | ||
191 | |||
192 | significand(&result) = XSIG_LL(accumulator); | ||
193 | setsign(&result, getsign(st0_ptr)); | ||
194 | FPU_copy_to_reg0(&result, TAG_Valid); | ||
195 | |||
196 | #ifdef PARANOID | ||
197 | if ( (exponent(&result) >= 0) | ||
198 | && (significand(&result) > 0x8000000000000000LL) ) | ||
199 | { | ||
200 | EXCEPTION(EX_INTERNAL|0x150); | ||
201 | } | ||
202 | #endif /* PARANOID */ | ||
203 | |||
204 | } | ||
205 | |||
206 | |||
207 | |||
208 | /*--- poly_cos() ------------------------------------------------------------+ | ||
209 | | | | ||
210 | +---------------------------------------------------------------------------*/ | ||
211 | void poly_cos(FPU_REG *st0_ptr) | ||
212 | { | ||
213 | FPU_REG result; | ||
214 | long int exponent, exp2, echange; | ||
215 | Xsig accumulator, argSqrd, fix_up, argTo4; | ||
216 | unsigned long long fixed_arg; | ||
217 | |||
218 | #ifdef PARANOID | ||
219 | if ( (exponent(st0_ptr) > 0) | ||
220 | || ((exponent(st0_ptr) == 0) | ||
221 | && (significand(st0_ptr) > 0xc90fdaa22168c234LL)) ) | ||
222 | { | ||
223 | EXCEPTION(EX_Invalid); | ||
224 | FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); | ||
225 | return; | ||
226 | } | ||
227 | #endif /* PARANOID */ | ||
228 | |||
229 | exponent = exponent(st0_ptr); | ||
230 | |||
231 | accumulator.lsw = accumulator.midw = accumulator.msw = 0; | ||
232 | |||
233 | if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54)) ) | ||
234 | { | ||
235 | /* arg is < 0.687705 */ | ||
236 | |||
237 | argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl; | ||
238 | argSqrd.lsw = 0; | ||
239 | mul64_Xsig(&argSqrd, &significand(st0_ptr)); | ||
240 | |||
241 | if ( exponent < -1 ) | ||
242 | { | ||
243 | /* shift the argument right by the required places */ | ||
244 | shr_Xsig(&argSqrd, 2*(-1-exponent)); | ||
245 | } | ||
246 | |||
247 | argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; | ||
248 | argTo4.lsw = argSqrd.lsw; | ||
249 | mul_Xsig_Xsig(&argTo4, &argTo4); | ||
250 | |||
251 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h, | ||
252 | N_COEFF_NH-1); | ||
253 | mul_Xsig_Xsig(&accumulator, &argSqrd); | ||
254 | negate_Xsig(&accumulator); | ||
255 | |||
256 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h, | ||
257 | N_COEFF_PH-1); | ||
258 | negate_Xsig(&accumulator); | ||
259 | |||
260 | mul64_Xsig(&accumulator, &significand(st0_ptr)); | ||
261 | mul64_Xsig(&accumulator, &significand(st0_ptr)); | ||
262 | shr_Xsig(&accumulator, -2*(1+exponent)); | ||
263 | |||
264 | shr_Xsig(&accumulator, 3); | ||
265 | negate_Xsig(&accumulator); | ||
266 | |||
267 | add_Xsig_Xsig(&accumulator, &argSqrd); | ||
268 | |||
269 | shr_Xsig(&accumulator, 1); | ||
270 | |||
271 | /* It doesn't matter if accumulator is all zero here, the | ||
272 | following code will work ok */ | ||
273 | negate_Xsig(&accumulator); | ||
274 | |||
275 | if ( accumulator.lsw & 0x80000000 ) | ||
276 | XSIG_LL(accumulator) ++; | ||
277 | if ( accumulator.msw == 0 ) | ||
278 | { | ||
279 | /* The result is 1.0 */ | ||
280 | FPU_copy_to_reg0(&CONST_1, TAG_Valid); | ||
281 | return; | ||
282 | } | ||
283 | else | ||
284 | { | ||
285 | significand(&result) = XSIG_LL(accumulator); | ||
286 | |||
287 | /* will be a valid positive nr with expon = -1 */ | ||
288 | setexponentpos(&result, -1); | ||
289 | } | ||
290 | } | ||
291 | else | ||
292 | { | ||
293 | fixed_arg = significand(st0_ptr); | ||
294 | |||
295 | if ( exponent == 0 ) | ||
296 | { | ||
297 | /* The argument is >= 1.0 */ | ||
298 | |||
299 | /* Put the binary point at the left. */ | ||
300 | fixed_arg <<= 1; | ||
301 | } | ||
302 | /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ | ||
303 | fixed_arg = 0x921fb54442d18469LL - fixed_arg; | ||
304 | /* There is a special case which arises due to rounding, to fix here. */ | ||
305 | if ( fixed_arg == 0xffffffffffffffffLL ) | ||
306 | fixed_arg = 0; | ||
307 | |||
308 | exponent = -1; | ||
309 | exp2 = -1; | ||
310 | |||
311 | /* A shift is needed here only for a narrow range of arguments, | ||
312 | i.e. for fixed_arg approx 2^-32, but we pick up more... */ | ||
313 | if ( !(LL_MSW(fixed_arg) & 0xffff0000) ) | ||
314 | { | ||
315 | fixed_arg <<= 16; | ||
316 | exponent -= 16; | ||
317 | exp2 -= 16; | ||
318 | } | ||
319 | |||
320 | XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0; | ||
321 | mul64_Xsig(&argSqrd, &fixed_arg); | ||
322 | |||
323 | if ( exponent < -1 ) | ||
324 | { | ||
325 | /* shift the argument right by the required places */ | ||
326 | shr_Xsig(&argSqrd, 2*(-1-exponent)); | ||
327 | } | ||
328 | |||
329 | argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; | ||
330 | argTo4.lsw = argSqrd.lsw; | ||
331 | mul_Xsig_Xsig(&argTo4, &argTo4); | ||
332 | |||
333 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l, | ||
334 | N_COEFF_N-1); | ||
335 | mul_Xsig_Xsig(&accumulator, &argSqrd); | ||
336 | negate_Xsig(&accumulator); | ||
337 | |||
338 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l, | ||
339 | N_COEFF_P-1); | ||
340 | |||
341 | shr_Xsig(&accumulator, 2); /* Divide by four */ | ||
342 | accumulator.msw |= 0x80000000; /* Add 1.0 */ | ||
343 | |||
344 | mul64_Xsig(&accumulator, &fixed_arg); | ||
345 | mul64_Xsig(&accumulator, &fixed_arg); | ||
346 | mul64_Xsig(&accumulator, &fixed_arg); | ||
347 | |||
348 | /* Divide by four, FPU_REG compatible, etc */ | ||
349 | exponent = 3*exponent; | ||
350 | |||
351 | /* The minimum exponent difference is 3 */ | ||
352 | shr_Xsig(&accumulator, exp2 - exponent); | ||
353 | |||
354 | negate_Xsig(&accumulator); | ||
355 | XSIG_LL(accumulator) += fixed_arg; | ||
356 | |||
357 | /* The basic computation is complete. Now fix the answer to | ||
358 | compensate for the error due to the approximation used for | ||
359 | pi/2 | ||
360 | */ | ||
361 | |||
362 | /* This has an exponent of -65 */ | ||
363 | XSIG_LL(fix_up) = 0x898cc51701b839a2ll; | ||
364 | fix_up.lsw = 0; | ||
365 | |||
366 | /* The fix-up needs to be improved for larger args */ | ||
367 | if ( argSqrd.msw & 0xffc00000 ) | ||
368 | { | ||
369 | /* Get about 32 bit precision in these: */ | ||
370 | fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2; | ||
371 | fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24; | ||
372 | } | ||
373 | |||
374 | exp2 += norm_Xsig(&accumulator); | ||
375 | shr_Xsig(&accumulator, 1); /* Prevent overflow */ | ||
376 | exp2++; | ||
377 | shr_Xsig(&fix_up, 65 + exp2); | ||
378 | |||
379 | add_Xsig_Xsig(&accumulator, &fix_up); | ||
380 | |||
381 | echange = round_Xsig(&accumulator); | ||
382 | |||
383 | setexponentpos(&result, exp2 + echange); | ||
384 | significand(&result) = XSIG_LL(accumulator); | ||
385 | } | ||
386 | |||
387 | FPU_copy_to_reg0(&result, TAG_Valid); | ||
388 | |||
389 | #ifdef PARANOID | ||
390 | if ( (exponent(&result) >= 0) | ||
391 | && (significand(&result) > 0x8000000000000000LL) ) | ||
392 | { | ||
393 | EXCEPTION(EX_INTERNAL|0x151); | ||
394 | } | ||
395 | #endif /* PARANOID */ | ||
396 | |||
397 | } | ||
diff --git a/arch/x86/math-emu/poly_tan.c b/arch/x86/math-emu/poly_tan.c new file mode 100644 index 000000000000..8df3e03b6e6f --- /dev/null +++ b/arch/x86/math-emu/poly_tan.c | |||
@@ -0,0 +1,222 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | poly_tan.c | | ||
3 | | | | ||
4 | | Compute the tan of a FPU_REG, using a polynomial approximation. | | ||
5 | | | | ||
6 | | Copyright (C) 1992,1993,1994,1997,1999 | | ||
7 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | ||
8 | | Australia. E-mail billm@melbpc.org.au | | ||
9 | | | | ||
10 | | | | ||
11 | +---------------------------------------------------------------------------*/ | ||
12 | |||
13 | #include "exception.h" | ||
14 | #include "reg_constant.h" | ||
15 | #include "fpu_emu.h" | ||
16 | #include "fpu_system.h" | ||
17 | #include "control_w.h" | ||
18 | #include "poly.h" | ||
19 | |||
20 | |||
21 | #define HiPOWERop 3 /* odd poly, positive terms */ | ||
22 | static const unsigned long long oddplterm[HiPOWERop] = | ||
23 | { | ||
24 | 0x0000000000000000LL, | ||
25 | 0x0051a1cf08fca228LL, | ||
26 | 0x0000000071284ff7LL | ||
27 | }; | ||
28 | |||
29 | #define HiPOWERon 2 /* odd poly, negative terms */ | ||
30 | static const unsigned long long oddnegterm[HiPOWERon] = | ||
31 | { | ||
32 | 0x1291a9a184244e80LL, | ||
33 | 0x0000583245819c21LL | ||
34 | }; | ||
35 | |||
36 | #define HiPOWERep 2 /* even poly, positive terms */ | ||
37 | static const unsigned long long evenplterm[HiPOWERep] = | ||
38 | { | ||
39 | 0x0e848884b539e888LL, | ||
40 | 0x00003c7f18b887daLL | ||
41 | }; | ||
42 | |||
43 | #define HiPOWERen 2 /* even poly, negative terms */ | ||
44 | static const unsigned long long evennegterm[HiPOWERen] = | ||
45 | { | ||
46 | 0xf1f0200fd51569ccLL, | ||
47 | 0x003afb46105c4432LL | ||
48 | }; | ||
49 | |||
50 | static const unsigned long long twothirds = 0xaaaaaaaaaaaaaaabLL; | ||
51 | |||
52 | |||
53 | /*--- poly_tan() ------------------------------------------------------------+ | ||
54 | | | | ||
55 | +---------------------------------------------------------------------------*/ | ||
56 | void poly_tan(FPU_REG *st0_ptr) | ||
57 | { | ||
58 | long int exponent; | ||
59 | int invert; | ||
60 | Xsig argSq, argSqSq, accumulatoro, accumulatore, accum, | ||
61 | argSignif, fix_up; | ||
62 | unsigned long adj; | ||
63 | |||
64 | exponent = exponent(st0_ptr); | ||
65 | |||
66 | #ifdef PARANOID | ||
67 | if ( signnegative(st0_ptr) ) /* Can't hack a number < 0.0 */ | ||
68 | { arith_invalid(0); return; } /* Need a positive number */ | ||
69 | #endif /* PARANOID */ | ||
70 | |||
71 | /* Split the problem into two domains, smaller and larger than pi/4 */ | ||
72 | if ( (exponent == 0) || ((exponent == -1) && (st0_ptr->sigh > 0xc90fdaa2)) ) | ||
73 | { | ||
74 | /* The argument is greater than (approx) pi/4 */ | ||
75 | invert = 1; | ||
76 | accum.lsw = 0; | ||
77 | XSIG_LL(accum) = significand(st0_ptr); | ||
78 | |||
79 | if ( exponent == 0 ) | ||
80 | { | ||
81 | /* The argument is >= 1.0 */ | ||
82 | /* Put the binary point at the left. */ | ||
83 | XSIG_LL(accum) <<= 1; | ||
84 | } | ||
85 | /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ | ||
86 | XSIG_LL(accum) = 0x921fb54442d18469LL - XSIG_LL(accum); | ||
87 | /* This is a special case which arises due to rounding. */ | ||
88 | if ( XSIG_LL(accum) == 0xffffffffffffffffLL ) | ||
89 | { | ||
90 | FPU_settag0(TAG_Valid); | ||
91 | significand(st0_ptr) = 0x8a51e04daabda360LL; | ||
92 | setexponent16(st0_ptr, (0x41 + EXTENDED_Ebias) | SIGN_Negative); | ||
93 | return; | ||
94 | } | ||
95 | |||
96 | argSignif.lsw = accum.lsw; | ||
97 | XSIG_LL(argSignif) = XSIG_LL(accum); | ||
98 | exponent = -1 + norm_Xsig(&argSignif); | ||
99 | } | ||
100 | else | ||
101 | { | ||
102 | invert = 0; | ||
103 | argSignif.lsw = 0; | ||
104 | XSIG_LL(accum) = XSIG_LL(argSignif) = significand(st0_ptr); | ||
105 | |||
106 | if ( exponent < -1 ) | ||
107 | { | ||
108 | /* shift the argument right by the required places */ | ||
109 | if ( FPU_shrx(&XSIG_LL(accum), -1-exponent) >= 0x80000000U ) | ||
110 | XSIG_LL(accum) ++; /* round up */ | ||
111 | } | ||
112 | } | ||
113 | |||
114 | XSIG_LL(argSq) = XSIG_LL(accum); argSq.lsw = accum.lsw; | ||
115 | mul_Xsig_Xsig(&argSq, &argSq); | ||
116 | XSIG_LL(argSqSq) = XSIG_LL(argSq); argSqSq.lsw = argSq.lsw; | ||
117 | mul_Xsig_Xsig(&argSqSq, &argSqSq); | ||
118 | |||
119 | /* Compute the negative terms for the numerator polynomial */ | ||
120 | accumulatoro.msw = accumulatoro.midw = accumulatoro.lsw = 0; | ||
121 | polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddnegterm, HiPOWERon-1); | ||
122 | mul_Xsig_Xsig(&accumulatoro, &argSq); | ||
123 | negate_Xsig(&accumulatoro); | ||
124 | /* Add the positive terms */ | ||
125 | polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddplterm, HiPOWERop-1); | ||
126 | |||
127 | |||
128 | /* Compute the positive terms for the denominator polynomial */ | ||
129 | accumulatore.msw = accumulatore.midw = accumulatore.lsw = 0; | ||
130 | polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evenplterm, HiPOWERep-1); | ||
131 | mul_Xsig_Xsig(&accumulatore, &argSq); | ||
132 | negate_Xsig(&accumulatore); | ||
133 | /* Add the negative terms */ | ||
134 | polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evennegterm, HiPOWERen-1); | ||
135 | /* Multiply by arg^2 */ | ||
136 | mul64_Xsig(&accumulatore, &XSIG_LL(argSignif)); | ||
137 | mul64_Xsig(&accumulatore, &XSIG_LL(argSignif)); | ||
138 | /* de-normalize and divide by 2 */ | ||
139 | shr_Xsig(&accumulatore, -2*(1+exponent) + 1); | ||
140 | negate_Xsig(&accumulatore); /* This does 1 - accumulator */ | ||
141 | |||
142 | /* Now find the ratio. */ | ||
143 | if ( accumulatore.msw == 0 ) | ||
144 | { | ||
145 | /* accumulatoro must contain 1.0 here, (actually, 0) but it | ||
146 | really doesn't matter what value we use because it will | ||
147 | have negligible effect in later calculations | ||
148 | */ | ||
149 | XSIG_LL(accum) = 0x8000000000000000LL; | ||
150 | accum.lsw = 0; | ||
151 | } | ||
152 | else | ||
153 | { | ||
154 | div_Xsig(&accumulatoro, &accumulatore, &accum); | ||
155 | } | ||
156 | |||
157 | /* Multiply by 1/3 * arg^3 */ | ||
158 | mul64_Xsig(&accum, &XSIG_LL(argSignif)); | ||
159 | mul64_Xsig(&accum, &XSIG_LL(argSignif)); | ||
160 | mul64_Xsig(&accum, &XSIG_LL(argSignif)); | ||
161 | mul64_Xsig(&accum, &twothirds); | ||
162 | shr_Xsig(&accum, -2*(exponent+1)); | ||
163 | |||
164 | /* tan(arg) = arg + accum */ | ||
165 | add_two_Xsig(&accum, &argSignif, &exponent); | ||
166 | |||
167 | if ( invert ) | ||
168 | { | ||
169 | /* We now have the value of tan(pi_2 - arg) where pi_2 is an | ||
170 | approximation for pi/2 | ||
171 | */ | ||
172 | /* The next step is to fix the answer to compensate for the | ||
173 | error due to the approximation used for pi/2 | ||
174 | */ | ||
175 | |||
176 | /* This is (approx) delta, the error in our approx for pi/2 | ||
177 | (see above). It has an exponent of -65 | ||
178 | */ | ||
179 | XSIG_LL(fix_up) = 0x898cc51701b839a2LL; | ||
180 | fix_up.lsw = 0; | ||
181 | |||
182 | if ( exponent == 0 ) | ||
183 | adj = 0xffffffff; /* We want approx 1.0 here, but | ||
184 | this is close enough. */ | ||
185 | else if ( exponent > -30 ) | ||
186 | { | ||
187 | adj = accum.msw >> -(exponent+1); /* tan */ | ||
188 | adj = mul_32_32(adj, adj); /* tan^2 */ | ||
189 | } | ||
190 | else | ||
191 | adj = 0; | ||
192 | adj = mul_32_32(0x898cc517, adj); /* delta * tan^2 */ | ||
193 | |||
194 | fix_up.msw += adj; | ||
195 | if ( !(fix_up.msw & 0x80000000) ) /* did fix_up overflow ? */ | ||
196 | { | ||
197 | /* Yes, we need to add an msb */ | ||
198 | shr_Xsig(&fix_up, 1); | ||
199 | fix_up.msw |= 0x80000000; | ||
200 | shr_Xsig(&fix_up, 64 + exponent); | ||
201 | } | ||
202 | else | ||
203 | shr_Xsig(&fix_up, 65 + exponent); | ||
204 | |||
205 | add_two_Xsig(&accum, &fix_up, &exponent); | ||
206 | |||
207 | /* accum now contains tan(pi/2 - arg). | ||
208 | Use tan(arg) = 1.0 / tan(pi/2 - arg) | ||
209 | */ | ||
210 | accumulatoro.lsw = accumulatoro.midw = 0; | ||
211 | accumulatoro.msw = 0x80000000; | ||
212 | div_Xsig(&accumulatoro, &accum, &accum); | ||
213 | exponent = - exponent - 1; | ||
214 | } | ||
215 | |||
216 | /* Transfer the result */ | ||
217 | round_Xsig(&accum); | ||
218 | FPU_settag0(TAG_Valid); | ||
219 | significand(st0_ptr) = XSIG_LL(accum); | ||
220 | setexponent16(st0_ptr, exponent + EXTENDED_Ebias); /* Result is positive. */ | ||
221 | |||
222 | } | ||
diff --git a/arch/x86/math-emu/polynom_Xsig.S b/arch/x86/math-emu/polynom_Xsig.S new file mode 100644 index 000000000000..17315c89ff3d --- /dev/null +++ b/arch/x86/math-emu/polynom_Xsig.S | |||
@@ -0,0 +1,135 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | polynomial_Xsig.S | | ||
3 | | | | ||
4 | | Fixed point arithmetic polynomial evaluation. | | ||
5 | | | | ||
6 | | Copyright (C) 1992,1993,1994,1995 | | ||
7 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | ||
8 | | Australia. E-mail billm@jacobi.maths.monash.edu.au | | ||
9 | | | | ||
10 | | Call from C as: | | ||
11 | | void polynomial_Xsig(Xsig *accum, unsigned long long x, | | ||
12 | | unsigned long long terms[], int n) | | ||
13 | | | | ||
14 | | Computes: | | ||
15 | | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x | | ||
16 | | and adds the result to the 12 byte Xsig. | | ||
17 | | The terms[] are each 8 bytes, but all computation is performed to 12 byte | | ||
18 | | precision. | | ||
19 | | | | ||
20 | | This function must be used carefully: most overflow of intermediate | | ||
21 | | results is controlled, but overflow of the result is not. | | ||
22 | | | | ||
23 | +---------------------------------------------------------------------------*/ | ||
24 | .file "polynomial_Xsig.S" | ||
25 | |||
26 | #include "fpu_emu.h" | ||
27 | |||
28 | |||
29 | #define TERM_SIZE $8 | ||
30 | #define SUM_MS -20(%ebp) /* sum ms long */ | ||
31 | #define SUM_MIDDLE -24(%ebp) /* sum middle long */ | ||
32 | #define SUM_LS -28(%ebp) /* sum ls long */ | ||
33 | #define ACCUM_MS -4(%ebp) /* accum ms long */ | ||
34 | #define ACCUM_MIDDLE -8(%ebp) /* accum middle long */ | ||
35 | #define ACCUM_LS -12(%ebp) /* accum ls long */ | ||
36 | #define OVERFLOWED -16(%ebp) /* addition overflow flag */ | ||
37 | |||
38 | .text | ||
39 | ENTRY(polynomial_Xsig) | ||
40 | pushl %ebp | ||
41 | movl %esp,%ebp | ||
42 | subl $32,%esp | ||
43 | pushl %esi | ||
44 | pushl %edi | ||
45 | pushl %ebx | ||
46 | |||
47 | movl PARAM2,%esi /* x */ | ||
48 | movl PARAM3,%edi /* terms */ | ||
49 | |||
50 | movl TERM_SIZE,%eax | ||
51 | mull PARAM4 /* n */ | ||
52 | addl %eax,%edi | ||
53 | |||
54 | movl 4(%edi),%edx /* terms[n] */ | ||
55 | movl %edx,SUM_MS | ||
56 | movl (%edi),%edx /* terms[n] */ | ||
57 | movl %edx,SUM_MIDDLE | ||
58 | xor %eax,%eax | ||
59 | movl %eax,SUM_LS | ||
60 | movb %al,OVERFLOWED | ||
61 | |||
62 | subl TERM_SIZE,%edi | ||
63 | decl PARAM4 | ||
64 | js L_accum_done | ||
65 | |||
66 | L_accum_loop: | ||
67 | xor %eax,%eax | ||
68 | movl %eax,ACCUM_MS | ||
69 | movl %eax,ACCUM_MIDDLE | ||
70 | |||
71 | movl SUM_MIDDLE,%eax | ||
72 | mull (%esi) /* x ls long */ | ||
73 | movl %edx,ACCUM_LS | ||
74 | |||
75 | movl SUM_MIDDLE,%eax | ||
76 | mull 4(%esi) /* x ms long */ | ||
77 | addl %eax,ACCUM_LS | ||
78 | adcl %edx,ACCUM_MIDDLE | ||
79 | adcl $0,ACCUM_MS | ||
80 | |||
81 | movl SUM_MS,%eax | ||
82 | mull (%esi) /* x ls long */ | ||
83 | addl %eax,ACCUM_LS | ||
84 | adcl %edx,ACCUM_MIDDLE | ||
85 | adcl $0,ACCUM_MS | ||
86 | |||
87 | movl SUM_MS,%eax | ||
88 | mull 4(%esi) /* x ms long */ | ||
89 | addl %eax,ACCUM_MIDDLE | ||
90 | adcl %edx,ACCUM_MS | ||
91 | |||
92 | testb $0xff,OVERFLOWED | ||
93 | jz L_no_overflow | ||
94 | |||
95 | movl (%esi),%eax | ||
96 | addl %eax,ACCUM_MIDDLE | ||
97 | movl 4(%esi),%eax | ||
98 | adcl %eax,ACCUM_MS /* This could overflow too */ | ||
99 | |||
100 | L_no_overflow: | ||
101 | |||
102 | /* | ||
103 | * Now put the sum of next term and the accumulator | ||
104 | * into the sum register | ||
105 | */ | ||
106 | movl ACCUM_LS,%eax | ||
107 | addl (%edi),%eax /* term ls long */ | ||
108 | movl %eax,SUM_LS | ||
109 | movl ACCUM_MIDDLE,%eax | ||
110 | adcl (%edi),%eax /* term ls long */ | ||
111 | movl %eax,SUM_MIDDLE | ||
112 | movl ACCUM_MS,%eax | ||
113 | adcl 4(%edi),%eax /* term ms long */ | ||
114 | movl %eax,SUM_MS | ||
115 | sbbb %al,%al | ||
116 | movb %al,OVERFLOWED /* Used in the next iteration */ | ||
117 | |||
118 | subl TERM_SIZE,%edi | ||
119 | decl PARAM4 | ||
120 | jns L_accum_loop | ||
121 | |||
122 | L_accum_done: | ||
123 | movl PARAM1,%edi /* accum */ | ||
124 | movl SUM_LS,%eax | ||
125 | addl %eax,(%edi) | ||
126 | movl SUM_MIDDLE,%eax | ||
127 | adcl %eax,4(%edi) | ||
128 | movl SUM_MS,%eax | ||
129 | adcl %eax,8(%edi) | ||
130 | |||
131 | popl %ebx | ||
132 | popl %edi | ||
133 | popl %esi | ||
134 | leave | ||
135 | ret | ||
diff --git a/arch/x86/math-emu/reg_add_sub.c b/arch/x86/math-emu/reg_add_sub.c new file mode 100644 index 000000000000..7cd3b37ac084 --- /dev/null +++ b/arch/x86/math-emu/reg_add_sub.c | |||
@@ -0,0 +1,374 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | reg_add_sub.c | | ||
3 | | | | ||
4 | | Functions to add or subtract two registers and put the result in a third. | | ||
5 | | | | ||
6 | | Copyright (C) 1992,1993,1997 | | ||
7 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | | ||
8 | | E-mail billm@suburbia.net | | ||
9 | | | | ||
10 | | | | ||
11 | +---------------------------------------------------------------------------*/ | ||
12 | |||
13 | /*---------------------------------------------------------------------------+ | ||
14 | | For each function, the destination may be any FPU_REG, including one of | | ||
15 | | the source FPU_REGs. | | ||
16 | | Each function returns 0 if the answer is o.k., otherwise a non-zero | | ||
17 | | value is returned, indicating either an exception condition or an | | ||
18 | | internal error. | | ||
19 | +---------------------------------------------------------------------------*/ | ||
20 | |||
21 | #include "exception.h" | ||
22 | #include "reg_constant.h" | ||
23 | #include "fpu_emu.h" | ||
24 | #include "control_w.h" | ||
25 | #include "fpu_system.h" | ||
26 | |||
27 | static | ||
28 | int add_sub_specials(FPU_REG const *a, u_char taga, u_char signa, | ||
29 | FPU_REG const *b, u_char tagb, u_char signb, | ||
30 | FPU_REG *dest, int deststnr, int control_w); | ||
31 | |||
32 | /* | ||
33 | Operates on st(0) and st(n), or on st(0) and temporary data. | ||
34 | The destination must be one of the source st(x). | ||
35 | */ | ||
36 | int FPU_add(FPU_REG const *b, u_char tagb, int deststnr, int control_w) | ||
37 | { | ||
38 | FPU_REG *a = &st(0); | ||
39 | FPU_REG *dest = &st(deststnr); | ||
40 | u_char signb = getsign(b); | ||
41 | u_char taga = FPU_gettag0(); | ||
42 | u_char signa = getsign(a); | ||
43 | u_char saved_sign = getsign(dest); | ||
44 | int diff, tag, expa, expb; | ||
45 | |||
46 | if ( !(taga | tagb) ) | ||
47 | { | ||
48 | expa = exponent(a); | ||
49 | expb = exponent(b); | ||
50 | |||
51 | valid_add: | ||
52 | /* Both registers are valid */ | ||
53 | if (!(signa ^ signb)) | ||
54 | { | ||
55 | /* signs are the same */ | ||
56 | tag = FPU_u_add(a, b, dest, control_w, signa, expa, expb); | ||
57 | } | ||
58 | else | ||
59 | { | ||
60 | /* The signs are different, so do a subtraction */ | ||
61 | diff = expa - expb; | ||
62 | if (!diff) | ||
63 | { | ||
64 | diff = a->sigh - b->sigh; /* This works only if the ms bits | ||
65 | are identical. */ | ||
66 | if (!diff) | ||
67 | { | ||
68 | diff = a->sigl > b->sigl; | ||
69 | if (!diff) | ||
70 | diff = -(a->sigl < b->sigl); | ||
71 | } | ||
72 | } | ||
73 | |||
74 | if (diff > 0) | ||
75 | { | ||
76 | tag = FPU_u_sub(a, b, dest, control_w, signa, expa, expb); | ||
77 | } | ||
78 | else if ( diff < 0 ) | ||
79 | { | ||
80 | tag = FPU_u_sub(b, a, dest, control_w, signb, expb, expa); | ||
81 | } | ||
82 | else | ||
83 | { | ||
84 | FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); | ||
85 | /* sign depends upon rounding mode */ | ||
86 | setsign(dest, ((control_w & CW_RC) != RC_DOWN) | ||
87 | ? SIGN_POS : SIGN_NEG); | ||
88 | return TAG_Zero; | ||
89 | } | ||
90 | } | ||
91 | |||
92 | if ( tag < 0 ) | ||
93 | { | ||
94 | setsign(dest, saved_sign); | ||
95 | return tag; | ||
96 | } | ||
97 | FPU_settagi(deststnr, tag); | ||
98 | return tag; | ||
99 | } | ||
100 | |||
101 | if ( taga == TAG_Special ) | ||
102 | taga = FPU_Special(a); | ||
103 | if ( tagb == TAG_Special ) | ||
104 | tagb = FPU_Special(b); | ||
105 | |||
106 | if ( ((taga == TAG_Valid) && (tagb == TW_Denormal)) | ||
107 | || ((taga == TW_Denormal) && (tagb == TAG_Valid)) | ||
108 | || ((taga == TW_Denormal) && (tagb == TW_Denormal)) ) | ||
109 | { | ||
110 | FPU_REG x, y; | ||
111 | |||
112 | if ( denormal_operand() < 0 ) | ||
113 | return FPU_Exception; | ||
114 | |||
115 | FPU_to_exp16(a, &x); | ||
116 | FPU_to_exp16(b, &y); | ||
117 | a = &x; | ||
118 | b = &y; | ||
119 | expa = exponent16(a); | ||
120 | expb = exponent16(b); | ||
121 | goto valid_add; | ||
122 | } | ||
123 | |||
124 | if ( (taga == TW_NaN) || (tagb == TW_NaN) ) | ||
125 | { | ||
126 | if ( deststnr == 0 ) | ||
127 | return real_2op_NaN(b, tagb, deststnr, a); | ||
128 | else | ||
129 | return real_2op_NaN(a, taga, deststnr, a); | ||
130 | } | ||
131 | |||
132 | return add_sub_specials(a, taga, signa, b, tagb, signb, | ||
133 | dest, deststnr, control_w); | ||
134 | } | ||
135 | |||
136 | |||
137 | /* Subtract b from a. (a-b) -> dest */ | ||
138 | int FPU_sub(int flags, int rm, int control_w) | ||
139 | { | ||
140 | FPU_REG const *a, *b; | ||
141 | FPU_REG *dest; | ||
142 | u_char taga, tagb, signa, signb, saved_sign, sign; | ||
143 | int diff, tag = 0, expa, expb, deststnr; | ||
144 | |||
145 | a = &st(0); | ||
146 | taga = FPU_gettag0(); | ||
147 | |||
148 | deststnr = 0; | ||
149 | if ( flags & LOADED ) | ||
150 | { | ||
151 | b = (FPU_REG *)rm; | ||
152 | tagb = flags & 0x0f; | ||
153 | } | ||
154 | else | ||
155 | { | ||
156 | b = &st(rm); | ||
157 | tagb = FPU_gettagi(rm); | ||
158 | |||
159 | if ( flags & DEST_RM ) | ||
160 | deststnr = rm; | ||
161 | } | ||
162 | |||
163 | signa = getsign(a); | ||
164 | signb = getsign(b); | ||
165 | |||
166 | if ( flags & REV ) | ||
167 | { | ||
168 | signa ^= SIGN_NEG; | ||
169 | signb ^= SIGN_NEG; | ||
170 | } | ||
171 | |||
172 | dest = &st(deststnr); | ||
173 | saved_sign = getsign(dest); | ||
174 | |||
175 | if ( !(taga | tagb) ) | ||
176 | { | ||
177 | expa = exponent(a); | ||
178 | expb = exponent(b); | ||
179 | |||
180 | valid_subtract: | ||
181 | /* Both registers are valid */ | ||
182 | |||
183 | diff = expa - expb; | ||
184 | |||
185 | if (!diff) | ||
186 | { | ||
187 | diff = a->sigh - b->sigh; /* Works only if ms bits are identical */ | ||
188 | if (!diff) | ||
189 | { | ||
190 | diff = a->sigl > b->sigl; | ||
191 | if (!diff) | ||
192 | diff = -(a->sigl < b->sigl); | ||
193 | } | ||
194 | } | ||
195 | |||
196 | switch ( (((int)signa)*2 + signb) / SIGN_NEG ) | ||
197 | { | ||
198 | case 0: /* P - P */ | ||
199 | case 3: /* N - N */ | ||
200 | if (diff > 0) | ||
201 | { | ||
202 | /* |a| > |b| */ | ||
203 | tag = FPU_u_sub(a, b, dest, control_w, signa, expa, expb); | ||
204 | } | ||
205 | else if ( diff == 0 ) | ||
206 | { | ||
207 | FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); | ||
208 | |||
209 | /* sign depends upon rounding mode */ | ||
210 | setsign(dest, ((control_w & CW_RC) != RC_DOWN) | ||
211 | ? SIGN_POS : SIGN_NEG); | ||
212 | return TAG_Zero; | ||
213 | } | ||
214 | else | ||
215 | { | ||
216 | sign = signa ^ SIGN_NEG; | ||
217 | tag = FPU_u_sub(b, a, dest, control_w, sign, expb, expa); | ||
218 | } | ||
219 | break; | ||
220 | case 1: /* P - N */ | ||
221 | tag = FPU_u_add(a, b, dest, control_w, SIGN_POS, expa, expb); | ||
222 | break; | ||
223 | case 2: /* N - P */ | ||
224 | tag = FPU_u_add(a, b, dest, control_w, SIGN_NEG, expa, expb); | ||
225 | break; | ||
226 | #ifdef PARANOID | ||
227 | default: | ||
228 | EXCEPTION(EX_INTERNAL|0x111); | ||
229 | return -1; | ||
230 | #endif | ||
231 | } | ||
232 | if ( tag < 0 ) | ||
233 | { | ||
234 | setsign(dest, saved_sign); | ||
235 | return tag; | ||
236 | } | ||
237 | FPU_settagi(deststnr, tag); | ||
238 | return tag; | ||
239 | } | ||
240 | |||
241 | if ( taga == TAG_Special ) | ||
242 | taga = FPU_Special(a); | ||
243 | if ( tagb == TAG_Special ) | ||
244 | tagb = FPU_Special(b); | ||
245 | |||
246 | if ( ((taga == TAG_Valid) && (tagb == TW_Denormal)) | ||
247 | || ((taga == TW_Denormal) && (tagb == TAG_Valid)) | ||
248 | || ((taga == TW_Denormal) && (tagb == TW_Denormal)) ) | ||
249 | { | ||
250 | FPU_REG x, y; | ||
251 | |||
252 | if ( denormal_operand() < 0 ) | ||
253 | return FPU_Exception; | ||
254 | |||
255 | FPU_to_exp16(a, &x); | ||
256 | FPU_to_exp16(b, &y); | ||
257 | a = &x; | ||
258 | b = &y; | ||
259 | expa = exponent16(a); | ||
260 | expb = exponent16(b); | ||
261 | |||
262 | goto valid_subtract; | ||
263 | } | ||
264 | |||
265 | if ( (taga == TW_NaN) || (tagb == TW_NaN) ) | ||
266 | { | ||
267 | FPU_REG const *d1, *d2; | ||
268 | if ( flags & REV ) | ||
269 | { | ||
270 | d1 = b; | ||
271 | d2 = a; | ||
272 | } | ||
273 | else | ||
274 | { | ||
275 | d1 = a; | ||
276 | d2 = b; | ||
277 | } | ||
278 | if ( flags & LOADED ) | ||
279 | return real_2op_NaN(b, tagb, deststnr, d1); | ||
280 | if ( flags & DEST_RM ) | ||
281 | return real_2op_NaN(a, taga, deststnr, d2); | ||
282 | else | ||
283 | return real_2op_NaN(b, tagb, deststnr, d2); | ||
284 | } | ||
285 | |||
286 | return add_sub_specials(a, taga, signa, b, tagb, signb ^ SIGN_NEG, | ||
287 | dest, deststnr, control_w); | ||
288 | } | ||
289 | |||
290 | |||
291 | static | ||
292 | int add_sub_specials(FPU_REG const *a, u_char taga, u_char signa, | ||
293 | FPU_REG const *b, u_char tagb, u_char signb, | ||
294 | FPU_REG *dest, int deststnr, int control_w) | ||
295 | { | ||
296 | if ( ((taga == TW_Denormal) || (tagb == TW_Denormal)) | ||
297 | && (denormal_operand() < 0) ) | ||
298 | return FPU_Exception; | ||
299 | |||
300 | if (taga == TAG_Zero) | ||
301 | { | ||
302 | if (tagb == TAG_Zero) | ||
303 | { | ||
304 | /* Both are zero, result will be zero. */ | ||
305 | u_char different_signs = signa ^ signb; | ||
306 | |||
307 | FPU_copy_to_regi(a, TAG_Zero, deststnr); | ||
308 | if ( different_signs ) | ||
309 | { | ||
310 | /* Signs are different. */ | ||
311 | /* Sign of answer depends upon rounding mode. */ | ||
312 | setsign(dest, ((control_w & CW_RC) != RC_DOWN) | ||
313 | ? SIGN_POS : SIGN_NEG); | ||
314 | } | ||
315 | else | ||
316 | setsign(dest, signa); /* signa may differ from the sign of a. */ | ||
317 | return TAG_Zero; | ||
318 | } | ||
319 | else | ||
320 | { | ||
321 | reg_copy(b, dest); | ||
322 | if ( (tagb == TW_Denormal) && (b->sigh & 0x80000000) ) | ||
323 | { | ||
324 | /* A pseudoDenormal, convert it. */ | ||
325 | addexponent(dest, 1); | ||
326 | tagb = TAG_Valid; | ||
327 | } | ||
328 | else if ( tagb > TAG_Empty ) | ||
329 | tagb = TAG_Special; | ||
330 | setsign(dest, signb); /* signb may differ from the sign of b. */ | ||
331 | FPU_settagi(deststnr, tagb); | ||
332 | return tagb; | ||
333 | } | ||
334 | } | ||
335 | else if (tagb == TAG_Zero) | ||
336 | { | ||
337 | reg_copy(a, dest); | ||
338 | if ( (taga == TW_Denormal) && (a->sigh & 0x80000000) ) | ||
339 | { | ||
340 | /* A pseudoDenormal */ | ||
341 | addexponent(dest, 1); | ||
342 | taga = TAG_Valid; | ||
343 | } | ||
344 | else if ( taga > TAG_Empty ) | ||
345 | taga = TAG_Special; | ||
346 | setsign(dest, signa); /* signa may differ from the sign of a. */ | ||
347 | FPU_settagi(deststnr, taga); | ||
348 | return taga; | ||
349 | } | ||
350 | else if (taga == TW_Infinity) | ||
351 | { | ||
352 | if ( (tagb != TW_Infinity) || (signa == signb) ) | ||
353 | { | ||
354 | FPU_copy_to_regi(a, TAG_Special, deststnr); | ||
355 | setsign(dest, signa); /* signa may differ from the sign of a. */ | ||
356 | return taga; | ||
357 | } | ||
358 | /* Infinity-Infinity is undefined. */ | ||
359 | return arith_invalid(deststnr); | ||
360 | } | ||
361 | else if (tagb == TW_Infinity) | ||
362 | { | ||
363 | FPU_copy_to_regi(b, TAG_Special, deststnr); | ||
364 | setsign(dest, signb); /* signb may differ from the sign of b. */ | ||
365 | return tagb; | ||
366 | } | ||
367 | |||
368 | #ifdef PARANOID | ||
369 | EXCEPTION(EX_INTERNAL|0x101); | ||
370 | #endif | ||
371 | |||
372 | return FPU_Exception; | ||
373 | } | ||
374 | |||
diff --git a/arch/x86/math-emu/reg_compare.c b/arch/x86/math-emu/reg_compare.c new file mode 100644 index 000000000000..f37c5b5a35ad --- /dev/null +++ b/arch/x86/math-emu/reg_compare.c | |||
@@ -0,0 +1,381 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | reg_compare.c | | ||
3 | | | | ||
4 | | Compare two floating point registers | | ||
5 | | | | ||
6 | | Copyright (C) 1992,1993,1994,1997 | | ||
7 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | | ||
8 | | E-mail billm@suburbia.net | | ||
9 | | | | ||
10 | | | | ||
11 | +---------------------------------------------------------------------------*/ | ||
12 | |||
13 | /*---------------------------------------------------------------------------+ | ||
14 | | compare() is the core FPU_REG comparison function | | ||
15 | +---------------------------------------------------------------------------*/ | ||
16 | |||
17 | #include "fpu_system.h" | ||
18 | #include "exception.h" | ||
19 | #include "fpu_emu.h" | ||
20 | #include "control_w.h" | ||
21 | #include "status_w.h" | ||
22 | |||
23 | |||
24 | static int compare(FPU_REG const *b, int tagb) | ||
25 | { | ||
26 | int diff, exp0, expb; | ||
27 | u_char st0_tag; | ||
28 | FPU_REG *st0_ptr; | ||
29 | FPU_REG x, y; | ||
30 | u_char st0_sign, signb = getsign(b); | ||
31 | |||
32 | st0_ptr = &st(0); | ||
33 | st0_tag = FPU_gettag0(); | ||
34 | st0_sign = getsign(st0_ptr); | ||
35 | |||
36 | if ( tagb == TAG_Special ) | ||
37 | tagb = FPU_Special(b); | ||
38 | if ( st0_tag == TAG_Special ) | ||
39 | st0_tag = FPU_Special(st0_ptr); | ||
40 | |||
41 | if ( ((st0_tag != TAG_Valid) && (st0_tag != TW_Denormal)) | ||
42 | || ((tagb != TAG_Valid) && (tagb != TW_Denormal)) ) | ||
43 | { | ||
44 | if ( st0_tag == TAG_Zero ) | ||
45 | { | ||
46 | if ( tagb == TAG_Zero ) return COMP_A_eq_B; | ||
47 | if ( tagb == TAG_Valid ) | ||
48 | return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B); | ||
49 | if ( tagb == TW_Denormal ) | ||
50 | return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B) | ||
51 | | COMP_Denormal; | ||
52 | } | ||
53 | else if ( tagb == TAG_Zero ) | ||
54 | { | ||
55 | if ( st0_tag == TAG_Valid ) | ||
56 | return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B); | ||
57 | if ( st0_tag == TW_Denormal ) | ||
58 | return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B) | ||
59 | | COMP_Denormal; | ||
60 | } | ||
61 | |||
62 | if ( st0_tag == TW_Infinity ) | ||
63 | { | ||
64 | if ( (tagb == TAG_Valid) || (tagb == TAG_Zero) ) | ||
65 | return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B); | ||
66 | else if ( tagb == TW_Denormal ) | ||
67 | return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B) | ||
68 | | COMP_Denormal; | ||
69 | else if ( tagb == TW_Infinity ) | ||
70 | { | ||
71 | /* The 80486 book says that infinities can be equal! */ | ||
72 | return (st0_sign == signb) ? COMP_A_eq_B : | ||
73 | ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B); | ||
74 | } | ||
75 | /* Fall through to the NaN code */ | ||
76 | } | ||
77 | else if ( tagb == TW_Infinity ) | ||
78 | { | ||
79 | if ( (st0_tag == TAG_Valid) || (st0_tag == TAG_Zero) ) | ||
80 | return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B); | ||
81 | if ( st0_tag == TW_Denormal ) | ||
82 | return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B) | ||
83 | | COMP_Denormal; | ||
84 | /* Fall through to the NaN code */ | ||
85 | } | ||
86 | |||
87 | /* The only possibility now should be that one of the arguments | ||
88 | is a NaN */ | ||
89 | if ( (st0_tag == TW_NaN) || (tagb == TW_NaN) ) | ||
90 | { | ||
91 | int signalling = 0, unsupported = 0; | ||
92 | if ( st0_tag == TW_NaN ) | ||
93 | { | ||
94 | signalling = (st0_ptr->sigh & 0xc0000000) == 0x80000000; | ||
95 | unsupported = !((exponent(st0_ptr) == EXP_OVER) | ||
96 | && (st0_ptr->sigh & 0x80000000)); | ||
97 | } | ||
98 | if ( tagb == TW_NaN ) | ||
99 | { | ||
100 | signalling |= (b->sigh & 0xc0000000) == 0x80000000; | ||
101 | unsupported |= !((exponent(b) == EXP_OVER) | ||
102 | && (b->sigh & 0x80000000)); | ||
103 | } | ||
104 | if ( signalling || unsupported ) | ||
105 | return COMP_No_Comp | COMP_SNaN | COMP_NaN; | ||
106 | else | ||
107 | /* Neither is a signaling NaN */ | ||
108 | return COMP_No_Comp | COMP_NaN; | ||
109 | } | ||
110 | |||
111 | EXCEPTION(EX_Invalid); | ||
112 | } | ||
113 | |||
114 | if (st0_sign != signb) | ||
115 | { | ||
116 | return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B) | ||
117 | | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ? | ||
118 | COMP_Denormal : 0); | ||
119 | } | ||
120 | |||
121 | if ( (st0_tag == TW_Denormal) || (tagb == TW_Denormal) ) | ||
122 | { | ||
123 | FPU_to_exp16(st0_ptr, &x); | ||
124 | FPU_to_exp16(b, &y); | ||
125 | st0_ptr = &x; | ||
126 | b = &y; | ||
127 | exp0 = exponent16(st0_ptr); | ||
128 | expb = exponent16(b); | ||
129 | } | ||
130 | else | ||
131 | { | ||
132 | exp0 = exponent(st0_ptr); | ||
133 | expb = exponent(b); | ||
134 | } | ||
135 | |||
136 | #ifdef PARANOID | ||
137 | if (!(st0_ptr->sigh & 0x80000000)) EXCEPTION(EX_Invalid); | ||
138 | if (!(b->sigh & 0x80000000)) EXCEPTION(EX_Invalid); | ||
139 | #endif /* PARANOID */ | ||
140 | |||
141 | diff = exp0 - expb; | ||
142 | if ( diff == 0 ) | ||
143 | { | ||
144 | diff = st0_ptr->sigh - b->sigh; /* Works only if ms bits are | ||
145 | identical */ | ||
146 | if ( diff == 0 ) | ||
147 | { | ||
148 | diff = st0_ptr->sigl > b->sigl; | ||
149 | if ( diff == 0 ) | ||
150 | diff = -(st0_ptr->sigl < b->sigl); | ||
151 | } | ||
152 | } | ||
153 | |||
154 | if ( diff > 0 ) | ||
155 | { | ||
156 | return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B) | ||
157 | | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ? | ||
158 | COMP_Denormal : 0); | ||
159 | } | ||
160 | if ( diff < 0 ) | ||
161 | { | ||
162 | return ((st0_sign == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B) | ||
163 | | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ? | ||
164 | COMP_Denormal : 0); | ||
165 | } | ||
166 | |||
167 | return COMP_A_eq_B | ||
168 | | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ? | ||
169 | COMP_Denormal : 0); | ||
170 | |||
171 | } | ||
172 | |||
173 | |||
174 | /* This function requires that st(0) is not empty */ | ||
175 | int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag) | ||
176 | { | ||
177 | int f = 0, c; | ||
178 | |||
179 | c = compare(loaded_data, loaded_tag); | ||
180 | |||
181 | if (c & COMP_NaN) | ||
182 | { | ||
183 | EXCEPTION(EX_Invalid); | ||
184 | f = SW_C3 | SW_C2 | SW_C0; | ||
185 | } | ||
186 | else | ||
187 | switch (c & 7) | ||
188 | { | ||
189 | case COMP_A_lt_B: | ||
190 | f = SW_C0; | ||
191 | break; | ||
192 | case COMP_A_eq_B: | ||
193 | f = SW_C3; | ||
194 | break; | ||
195 | case COMP_A_gt_B: | ||
196 | f = 0; | ||
197 | break; | ||
198 | case COMP_No_Comp: | ||
199 | f = SW_C3 | SW_C2 | SW_C0; | ||
200 | break; | ||
201 | #ifdef PARANOID | ||
202 | default: | ||
203 | EXCEPTION(EX_INTERNAL|0x121); | ||
204 | f = SW_C3 | SW_C2 | SW_C0; | ||
205 | break; | ||
206 | #endif /* PARANOID */ | ||
207 | } | ||
208 | setcc(f); | ||
209 | if (c & COMP_Denormal) | ||
210 | { | ||
211 | return denormal_operand() < 0; | ||
212 | } | ||
213 | return 0; | ||
214 | } | ||
215 | |||
216 | |||
217 | static int compare_st_st(int nr) | ||
218 | { | ||
219 | int f = 0, c; | ||
220 | FPU_REG *st_ptr; | ||
221 | |||
222 | if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) ) | ||
223 | { | ||
224 | setcc(SW_C3 | SW_C2 | SW_C0); | ||
225 | /* Stack fault */ | ||
226 | EXCEPTION(EX_StackUnder); | ||
227 | return !(control_word & CW_Invalid); | ||
228 | } | ||
229 | |||
230 | st_ptr = &st(nr); | ||
231 | c = compare(st_ptr, FPU_gettagi(nr)); | ||
232 | if (c & COMP_NaN) | ||
233 | { | ||
234 | setcc(SW_C3 | SW_C2 | SW_C0); | ||
235 | EXCEPTION(EX_Invalid); | ||
236 | return !(control_word & CW_Invalid); | ||
237 | } | ||
238 | else | ||
239 | switch (c & 7) | ||
240 | { | ||
241 | case COMP_A_lt_B: | ||
242 | f = SW_C0; | ||
243 | break; | ||
244 | case COMP_A_eq_B: | ||
245 | f = SW_C3; | ||
246 | break; | ||
247 | case COMP_A_gt_B: | ||
248 | f = 0; | ||
249 | break; | ||
250 | case COMP_No_Comp: | ||
251 | f = SW_C3 | SW_C2 | SW_C0; | ||
252 | break; | ||
253 | #ifdef PARANOID | ||
254 | default: | ||
255 | EXCEPTION(EX_INTERNAL|0x122); | ||
256 | f = SW_C3 | SW_C2 | SW_C0; | ||
257 | break; | ||
258 | #endif /* PARANOID */ | ||
259 | } | ||
260 | setcc(f); | ||
261 | if (c & COMP_Denormal) | ||
262 | { | ||
263 | return denormal_operand() < 0; | ||
264 | } | ||
265 | return 0; | ||
266 | } | ||
267 | |||
268 | |||
269 | static int compare_u_st_st(int nr) | ||
270 | { | ||
271 | int f = 0, c; | ||
272 | FPU_REG *st_ptr; | ||
273 | |||
274 | if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) ) | ||
275 | { | ||
276 | setcc(SW_C3 | SW_C2 | SW_C0); | ||
277 | /* Stack fault */ | ||
278 | EXCEPTION(EX_StackUnder); | ||
279 | return !(control_word & CW_Invalid); | ||
280 | } | ||
281 | |||
282 | st_ptr = &st(nr); | ||
283 | c = compare(st_ptr, FPU_gettagi(nr)); | ||
284 | if (c & COMP_NaN) | ||
285 | { | ||
286 | setcc(SW_C3 | SW_C2 | SW_C0); | ||
287 | if (c & COMP_SNaN) /* This is the only difference between | ||
288 | un-ordered and ordinary comparisons */ | ||
289 | { | ||
290 | EXCEPTION(EX_Invalid); | ||
291 | return !(control_word & CW_Invalid); | ||
292 | } | ||
293 | return 0; | ||
294 | } | ||
295 | else | ||
296 | switch (c & 7) | ||
297 | { | ||
298 | case COMP_A_lt_B: | ||
299 | f = SW_C0; | ||
300 | break; | ||
301 | case COMP_A_eq_B: | ||
302 | f = SW_C3; | ||
303 | break; | ||
304 | case COMP_A_gt_B: | ||
305 | f = 0; | ||
306 | break; | ||
307 | case COMP_No_Comp: | ||
308 | f = SW_C3 | SW_C2 | SW_C0; | ||
309 | break; | ||
310 | #ifdef PARANOID | ||
311 | default: | ||
312 | EXCEPTION(EX_INTERNAL|0x123); | ||
313 | f = SW_C3 | SW_C2 | SW_C0; | ||
314 | break; | ||
315 | #endif /* PARANOID */ | ||
316 | } | ||
317 | setcc(f); | ||
318 | if (c & COMP_Denormal) | ||
319 | { | ||
320 | return denormal_operand() < 0; | ||
321 | } | ||
322 | return 0; | ||
323 | } | ||
324 | |||
325 | /*---------------------------------------------------------------------------*/ | ||
326 | |||
327 | void fcom_st(void) | ||
328 | { | ||
329 | /* fcom st(i) */ | ||
330 | compare_st_st(FPU_rm); | ||
331 | } | ||
332 | |||
333 | |||
334 | void fcompst(void) | ||
335 | { | ||
336 | /* fcomp st(i) */ | ||
337 | if ( !compare_st_st(FPU_rm) ) | ||
338 | FPU_pop(); | ||
339 | } | ||
340 | |||
341 | |||
342 | void fcompp(void) | ||
343 | { | ||
344 | /* fcompp */ | ||
345 | if (FPU_rm != 1) | ||
346 | { | ||
347 | FPU_illegal(); | ||
348 | return; | ||
349 | } | ||
350 | if ( !compare_st_st(1) ) | ||
351 | poppop(); | ||
352 | } | ||
353 | |||
354 | |||
355 | void fucom_(void) | ||
356 | { | ||
357 | /* fucom st(i) */ | ||
358 | compare_u_st_st(FPU_rm); | ||
359 | |||
360 | } | ||
361 | |||
362 | |||
363 | void fucomp(void) | ||
364 | { | ||
365 | /* fucomp st(i) */ | ||
366 | if ( !compare_u_st_st(FPU_rm) ) | ||
367 | FPU_pop(); | ||
368 | } | ||
369 | |||
370 | |||
371 | void fucompp(void) | ||
372 | { | ||
373 | /* fucompp */ | ||
374 | if (FPU_rm == 1) | ||
375 | { | ||
376 | if ( !compare_u_st_st(1) ) | ||
377 | poppop(); | ||
378 | } | ||
379 | else | ||
380 | FPU_illegal(); | ||
381 | } | ||
diff --git a/arch/x86/math-emu/reg_constant.c b/arch/x86/math-emu/reg_constant.c new file mode 100644 index 000000000000..a85015801969 --- /dev/null +++ b/arch/x86/math-emu/reg_constant.c | |||
@@ -0,0 +1,120 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | reg_constant.c | | ||
3 | | | | ||
4 | | All of the constant FPU_REGs | | ||
5 | | | | ||
6 | | Copyright (C) 1992,1993,1994,1997 | | ||
7 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | ||
8 | | Australia. E-mail billm@suburbia.net | | ||
9 | | | | ||
10 | | | | ||
11 | +---------------------------------------------------------------------------*/ | ||
12 | |||
13 | #include "fpu_system.h" | ||
14 | #include "fpu_emu.h" | ||
15 | #include "status_w.h" | ||
16 | #include "reg_constant.h" | ||
17 | #include "control_w.h" | ||
18 | |||
19 | |||
20 | #define MAKE_REG(s,e,l,h) { l, h, \ | ||
21 | ((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) } | ||
22 | |||
23 | FPU_REG const CONST_1 = MAKE_REG(POS, 0, 0x00000000, 0x80000000); | ||
24 | #if 0 | ||
25 | FPU_REG const CONST_2 = MAKE_REG(POS, 1, 0x00000000, 0x80000000); | ||
26 | FPU_REG const CONST_HALF = MAKE_REG(POS, -1, 0x00000000, 0x80000000); | ||
27 | #endif /* 0 */ | ||
28 | static FPU_REG const CONST_L2T = MAKE_REG(POS, 1, 0xcd1b8afe, 0xd49a784b); | ||
29 | static FPU_REG const CONST_L2E = MAKE_REG(POS, 0, 0x5c17f0bc, 0xb8aa3b29); | ||
30 | FPU_REG const CONST_PI = MAKE_REG(POS, 1, 0x2168c235, 0xc90fdaa2); | ||
31 | FPU_REG const CONST_PI2 = MAKE_REG(POS, 0, 0x2168c235, 0xc90fdaa2); | ||
32 | FPU_REG const CONST_PI4 = MAKE_REG(POS, -1, 0x2168c235, 0xc90fdaa2); | ||
33 | static FPU_REG const CONST_LG2 = MAKE_REG(POS, -2, 0xfbcff799, 0x9a209a84); | ||
34 | static FPU_REG const CONST_LN2 = MAKE_REG(POS, -1, 0xd1cf79ac, 0xb17217f7); | ||
35 | |||
36 | /* Extra bits to take pi/2 to more than 128 bits precision. */ | ||
37 | FPU_REG const CONST_PI2extra = MAKE_REG(NEG, -66, | ||
38 | 0xfc8f8cbb, 0xece675d1); | ||
39 | |||
40 | /* Only the sign (and tag) is used in internal zeroes */ | ||
41 | FPU_REG const CONST_Z = MAKE_REG(POS, EXP_UNDER, 0x0, 0x0); | ||
42 | |||
43 | /* Only the sign and significand (and tag) are used in internal NaNs */ | ||
44 | /* The 80486 never generates one of these | ||
45 | FPU_REG const CONST_SNAN = MAKE_REG(POS, EXP_OVER, 0x00000001, 0x80000000); | ||
46 | */ | ||
47 | /* This is the real indefinite QNaN */ | ||
48 | FPU_REG const CONST_QNaN = MAKE_REG(NEG, EXP_OVER, 0x00000000, 0xC0000000); | ||
49 | |||
50 | /* Only the sign (and tag) is used in internal infinities */ | ||
51 | FPU_REG const CONST_INF = MAKE_REG(POS, EXP_OVER, 0x00000000, 0x80000000); | ||
52 | |||
53 | |||
54 | static void fld_const(FPU_REG const *c, int adj, u_char tag) | ||
55 | { | ||
56 | FPU_REG *st_new_ptr; | ||
57 | |||
58 | if ( STACK_OVERFLOW ) | ||
59 | { | ||
60 | FPU_stack_overflow(); | ||
61 | return; | ||
62 | } | ||
63 | push(); | ||
64 | reg_copy(c, st_new_ptr); | ||
65 | st_new_ptr->sigl += adj; /* For all our fldxxx constants, we don't need to | ||
66 | borrow or carry. */ | ||
67 | FPU_settag0(tag); | ||
68 | clear_C1(); | ||
69 | } | ||
70 | |||
71 | /* A fast way to find out whether x is one of RC_DOWN or RC_CHOP | ||
72 | (and not one of RC_RND or RC_UP). | ||
73 | */ | ||
74 | #define DOWN_OR_CHOP(x) (x & RC_DOWN) | ||
75 | |||
76 | static void fld1(int rc) | ||
77 | { | ||
78 | fld_const(&CONST_1, 0, TAG_Valid); | ||
79 | } | ||
80 | |||
81 | static void fldl2t(int rc) | ||
82 | { | ||
83 | fld_const(&CONST_L2T, (rc == RC_UP) ? 1 : 0, TAG_Valid); | ||
84 | } | ||
85 | |||
86 | static void fldl2e(int rc) | ||
87 | { | ||
88 | fld_const(&CONST_L2E, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid); | ||
89 | } | ||
90 | |||
91 | static void fldpi(int rc) | ||
92 | { | ||
93 | fld_const(&CONST_PI, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid); | ||
94 | } | ||
95 | |||
96 | static void fldlg2(int rc) | ||
97 | { | ||
98 | fld_const(&CONST_LG2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid); | ||
99 | } | ||
100 | |||
101 | static void fldln2(int rc) | ||
102 | { | ||
103 | fld_const(&CONST_LN2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid); | ||
104 | } | ||
105 | |||
106 | static void fldz(int rc) | ||
107 | { | ||
108 | fld_const(&CONST_Z, 0, TAG_Zero); | ||
109 | } | ||
110 | |||
111 | typedef void (*FUNC_RC)(int); | ||
112 | |||
113 | static FUNC_RC constants_table[] = { | ||
114 | fld1, fldl2t, fldl2e, fldpi, fldlg2, fldln2, fldz, (FUNC_RC)FPU_illegal | ||
115 | }; | ||
116 | |||
117 | void fconst(void) | ||
118 | { | ||
119 | (constants_table[FPU_rm])(control_word & CW_RC); | ||
120 | } | ||
diff --git a/arch/x86/math-emu/reg_constant.h b/arch/x86/math-emu/reg_constant.h new file mode 100644 index 000000000000..1bffaec3a134 --- /dev/null +++ b/arch/x86/math-emu/reg_constant.h | |||
@@ -0,0 +1,25 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | reg_constant.h | | ||
3 | | | | ||
4 | | Copyright (C) 1992 W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | ||
5 | | Australia. E-mail billm@vaxc.cc.monash.edu.au | | ||
6 | | | | ||
7 | +---------------------------------------------------------------------------*/ | ||
8 | |||
9 | #ifndef _REG_CONSTANT_H_ | ||
10 | #define _REG_CONSTANT_H_ | ||
11 | |||
12 | #include "fpu_emu.h" | ||
13 | |||
14 | extern FPU_REG const CONST_1; | ||
15 | extern FPU_REG const CONST_PI; | ||
16 | extern FPU_REG const CONST_PI2; | ||
17 | extern FPU_REG const CONST_PI2extra; | ||
18 | extern FPU_REG const CONST_PI4; | ||
19 | extern FPU_REG const CONST_Z; | ||
20 | extern FPU_REG const CONST_PINF; | ||
21 | extern FPU_REG const CONST_INF; | ||
22 | extern FPU_REG const CONST_MINF; | ||
23 | extern FPU_REG const CONST_QNaN; | ||
24 | |||
25 | #endif /* _REG_CONSTANT_H_ */ | ||
diff --git a/arch/x86/math-emu/reg_convert.c b/arch/x86/math-emu/reg_convert.c new file mode 100644 index 000000000000..45a258752703 --- /dev/null +++ b/arch/x86/math-emu/reg_convert.c | |||
@@ -0,0 +1,53 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | reg_convert.c | | ||
3 | | | | ||
4 | | Convert register representation. | | ||
5 | | | | ||
6 | | Copyright (C) 1992,1993,1994,1996,1997 | | ||
7 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | | ||
8 | | E-mail billm@suburbia.net | | ||
9 | | | | ||
10 | | | | ||
11 | +---------------------------------------------------------------------------*/ | ||
12 | |||
13 | #include "exception.h" | ||
14 | #include "fpu_emu.h" | ||
15 | |||
16 | |||
17 | int FPU_to_exp16(FPU_REG const *a, FPU_REG *x) | ||
18 | { | ||
19 | int sign = getsign(a); | ||
20 | |||
21 | *(long long *)&(x->sigl) = *(const long long *)&(a->sigl); | ||
22 | |||
23 | /* Set up the exponent as a 16 bit quantity. */ | ||
24 | setexponent16(x, exponent(a)); | ||
25 | |||
26 | if ( exponent16(x) == EXP_UNDER ) | ||
27 | { | ||
28 | /* The number is a de-normal or pseudodenormal. */ | ||
29 | /* We only deal with the significand and exponent. */ | ||
30 | |||
31 | if (x->sigh & 0x80000000) | ||
32 | { | ||
33 | /* Is a pseudodenormal. */ | ||
34 | /* This is non-80486 behaviour because the number | ||
35 | loses its 'denormal' identity. */ | ||
36 | addexponent(x, 1); | ||
37 | } | ||
38 | else | ||
39 | { | ||
40 | /* Is a denormal. */ | ||
41 | addexponent(x, 1); | ||
42 | FPU_normalize_nuo(x); | ||
43 | } | ||
44 | } | ||
45 | |||
46 | if ( !(x->sigh & 0x80000000) ) | ||
47 | { | ||
48 | EXCEPTION(EX_INTERNAL | 0x180); | ||
49 | } | ||
50 | |||
51 | return sign; | ||
52 | } | ||
53 | |||
diff --git a/arch/x86/math-emu/reg_divide.c b/arch/x86/math-emu/reg_divide.c new file mode 100644 index 000000000000..5cee7ff920d9 --- /dev/null +++ b/arch/x86/math-emu/reg_divide.c | |||
@@ -0,0 +1,207 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | reg_divide.c | | ||
3 | | | | ||
4 | | Divide one FPU_REG by another and put the result in a destination FPU_REG.| | ||
5 | | | | ||
6 | | Copyright (C) 1996 | | ||
7 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | | ||
8 | | E-mail billm@jacobi.maths.monash.edu.au | | ||
9 | | | | ||
10 | | Return value is the tag of the answer, or-ed with FPU_Exception if | | ||
11 | | one was raised, or -1 on internal error. | | ||
12 | | | | ||
13 | +---------------------------------------------------------------------------*/ | ||
14 | |||
15 | /*---------------------------------------------------------------------------+ | ||
16 | | The destination may be any FPU_REG, including one of the source FPU_REGs. | | ||
17 | +---------------------------------------------------------------------------*/ | ||
18 | |||
19 | #include "exception.h" | ||
20 | #include "reg_constant.h" | ||
21 | #include "fpu_emu.h" | ||
22 | #include "fpu_system.h" | ||
23 | |||
24 | /* | ||
25 | Divide one register by another and put the result into a third register. | ||
26 | */ | ||
27 | int FPU_div(int flags, int rm, int control_w) | ||
28 | { | ||
29 | FPU_REG x, y; | ||
30 | FPU_REG const *a, *b, *st0_ptr, *st_ptr; | ||
31 | FPU_REG *dest; | ||
32 | u_char taga, tagb, signa, signb, sign, saved_sign; | ||
33 | int tag, deststnr; | ||
34 | |||
35 | if ( flags & DEST_RM ) | ||
36 | deststnr = rm; | ||
37 | else | ||
38 | deststnr = 0; | ||
39 | |||
40 | if ( flags & REV ) | ||
41 | { | ||
42 | b = &st(0); | ||
43 | st0_ptr = b; | ||
44 | tagb = FPU_gettag0(); | ||
45 | if ( flags & LOADED ) | ||
46 | { | ||
47 | a = (FPU_REG *)rm; | ||
48 | taga = flags & 0x0f; | ||
49 | } | ||
50 | else | ||
51 | { | ||
52 | a = &st(rm); | ||
53 | st_ptr = a; | ||
54 | taga = FPU_gettagi(rm); | ||
55 | } | ||
56 | } | ||
57 | else | ||
58 | { | ||
59 | a = &st(0); | ||
60 | st0_ptr = a; | ||
61 | taga = FPU_gettag0(); | ||
62 | if ( flags & LOADED ) | ||
63 | { | ||
64 | b = (FPU_REG *)rm; | ||
65 | tagb = flags & 0x0f; | ||
66 | } | ||
67 | else | ||
68 | { | ||
69 | b = &st(rm); | ||
70 | st_ptr = b; | ||
71 | tagb = FPU_gettagi(rm); | ||
72 | } | ||
73 | } | ||
74 | |||
75 | signa = getsign(a); | ||
76 | signb = getsign(b); | ||
77 | |||
78 | sign = signa ^ signb; | ||
79 | |||
80 | dest = &st(deststnr); | ||
81 | saved_sign = getsign(dest); | ||
82 | |||
83 | if ( !(taga | tagb) ) | ||
84 | { | ||
85 | /* Both regs Valid, this should be the most common case. */ | ||
86 | reg_copy(a, &x); | ||
87 | reg_copy(b, &y); | ||
88 | setpositive(&x); | ||
89 | setpositive(&y); | ||
90 | tag = FPU_u_div(&x, &y, dest, control_w, sign); | ||
91 | |||
92 | if ( tag < 0 ) | ||
93 | return tag; | ||
94 | |||
95 | FPU_settagi(deststnr, tag); | ||
96 | return tag; | ||
97 | } | ||
98 | |||
99 | if ( taga == TAG_Special ) | ||
100 | taga = FPU_Special(a); | ||
101 | if ( tagb == TAG_Special ) | ||
102 | tagb = FPU_Special(b); | ||
103 | |||
104 | if ( ((taga == TAG_Valid) && (tagb == TW_Denormal)) | ||
105 | || ((taga == TW_Denormal) && (tagb == TAG_Valid)) | ||
106 | || ((taga == TW_Denormal) && (tagb == TW_Denormal)) ) | ||
107 | { | ||
108 | if ( denormal_operand() < 0 ) | ||
109 | return FPU_Exception; | ||
110 | |||
111 | FPU_to_exp16(a, &x); | ||
112 | FPU_to_exp16(b, &y); | ||
113 | tag = FPU_u_div(&x, &y, dest, control_w, sign); | ||
114 | if ( tag < 0 ) | ||
115 | return tag; | ||
116 | |||
117 | FPU_settagi(deststnr, tag); | ||
118 | return tag; | ||
119 | } | ||
120 | else if ( (taga <= TW_Denormal) && (tagb <= TW_Denormal) ) | ||
121 | { | ||
122 | if ( tagb != TAG_Zero ) | ||
123 | { | ||
124 | /* Want to find Zero/Valid */ | ||
125 | if ( tagb == TW_Denormal ) | ||
126 | { | ||
127 | if ( denormal_operand() < 0 ) | ||
128 | return FPU_Exception; | ||
129 | } | ||
130 | |||
131 | /* The result is zero. */ | ||
132 | FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); | ||
133 | setsign(dest, sign); | ||
134 | return TAG_Zero; | ||
135 | } | ||
136 | /* We have an exception condition, either 0/0 or Valid/Zero. */ | ||
137 | if ( taga == TAG_Zero ) | ||
138 | { | ||
139 | /* 0/0 */ | ||
140 | return arith_invalid(deststnr); | ||
141 | } | ||
142 | /* Valid/Zero */ | ||
143 | return FPU_divide_by_zero(deststnr, sign); | ||
144 | } | ||
145 | /* Must have infinities, NaNs, etc */ | ||
146 | else if ( (taga == TW_NaN) || (tagb == TW_NaN) ) | ||
147 | { | ||
148 | if ( flags & LOADED ) | ||
149 | return real_2op_NaN((FPU_REG *)rm, flags & 0x0f, 0, st0_ptr); | ||
150 | |||
151 | if ( flags & DEST_RM ) | ||
152 | { | ||
153 | int tag; | ||
154 | tag = FPU_gettag0(); | ||
155 | if ( tag == TAG_Special ) | ||
156 | tag = FPU_Special(st0_ptr); | ||
157 | return real_2op_NaN(st0_ptr, tag, rm, (flags & REV) ? st0_ptr : &st(rm)); | ||
158 | } | ||
159 | else | ||
160 | { | ||
161 | int tag; | ||
162 | tag = FPU_gettagi(rm); | ||
163 | if ( tag == TAG_Special ) | ||
164 | tag = FPU_Special(&st(rm)); | ||
165 | return real_2op_NaN(&st(rm), tag, 0, (flags & REV) ? st0_ptr : &st(rm)); | ||
166 | } | ||
167 | } | ||
168 | else if (taga == TW_Infinity) | ||
169 | { | ||
170 | if (tagb == TW_Infinity) | ||
171 | { | ||
172 | /* infinity/infinity */ | ||
173 | return arith_invalid(deststnr); | ||
174 | } | ||
175 | else | ||
176 | { | ||
177 | /* tagb must be Valid or Zero */ | ||
178 | if ( (tagb == TW_Denormal) && (denormal_operand() < 0) ) | ||
179 | return FPU_Exception; | ||
180 | |||
181 | /* Infinity divided by Zero or Valid does | ||
182 | not raise and exception, but returns Infinity */ | ||
183 | FPU_copy_to_regi(a, TAG_Special, deststnr); | ||
184 | setsign(dest, sign); | ||
185 | return taga; | ||
186 | } | ||
187 | } | ||
188 | else if (tagb == TW_Infinity) | ||
189 | { | ||
190 | if ( (taga == TW_Denormal) && (denormal_operand() < 0) ) | ||
191 | return FPU_Exception; | ||
192 | |||
193 | /* The result is zero. */ | ||
194 | FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); | ||
195 | setsign(dest, sign); | ||
196 | return TAG_Zero; | ||
197 | } | ||
198 | #ifdef PARANOID | ||
199 | else | ||
200 | { | ||
201 | EXCEPTION(EX_INTERNAL|0x102); | ||
202 | return FPU_Exception; | ||
203 | } | ||
204 | #endif /* PARANOID */ | ||
205 | |||
206 | return 0; | ||
207 | } | ||
diff --git a/arch/x86/math-emu/reg_ld_str.c b/arch/x86/math-emu/reg_ld_str.c new file mode 100644 index 000000000000..e976caef6498 --- /dev/null +++ b/arch/x86/math-emu/reg_ld_str.c | |||
@@ -0,0 +1,1375 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | reg_ld_str.c | | ||
3 | | | | ||
4 | | All of the functions which transfer data between user memory and FPU_REGs.| | ||
5 | | | | ||
6 | | Copyright (C) 1992,1993,1994,1996,1997 | | ||
7 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | | ||
8 | | E-mail billm@suburbia.net | | ||
9 | | | | ||
10 | | | | ||
11 | +---------------------------------------------------------------------------*/ | ||
12 | |||
13 | /*---------------------------------------------------------------------------+ | ||
14 | | Note: | | ||
15 | | The file contains code which accesses user memory. | | ||
16 | | Emulator static data may change when user memory is accessed, due to | | ||
17 | | other processes using the emulator while swapping is in progress. | | ||
18 | +---------------------------------------------------------------------------*/ | ||
19 | |||
20 | #include "fpu_emu.h" | ||
21 | |||
22 | #include <asm/uaccess.h> | ||
23 | |||
24 | #include "fpu_system.h" | ||
25 | #include "exception.h" | ||
26 | #include "reg_constant.h" | ||
27 | #include "control_w.h" | ||
28 | #include "status_w.h" | ||
29 | |||
30 | |||
31 | #define DOUBLE_Emax 1023 /* largest valid exponent */ | ||
32 | #define DOUBLE_Ebias 1023 | ||
33 | #define DOUBLE_Emin (-1022) /* smallest valid exponent */ | ||
34 | |||
35 | #define SINGLE_Emax 127 /* largest valid exponent */ | ||
36 | #define SINGLE_Ebias 127 | ||
37 | #define SINGLE_Emin (-126) /* smallest valid exponent */ | ||
38 | |||
39 | |||
40 | static u_char normalize_no_excep(FPU_REG *r, int exp, int sign) | ||
41 | { | ||
42 | u_char tag; | ||
43 | |||
44 | setexponent16(r, exp); | ||
45 | |||
46 | tag = FPU_normalize_nuo(r); | ||
47 | stdexp(r); | ||
48 | if ( sign ) | ||
49 | setnegative(r); | ||
50 | |||
51 | return tag; | ||
52 | } | ||
53 | |||
54 | |||
55 | int FPU_tagof(FPU_REG *ptr) | ||
56 | { | ||
57 | int exp; | ||
58 | |||
59 | exp = exponent16(ptr) & 0x7fff; | ||
60 | if ( exp == 0 ) | ||
61 | { | ||
62 | if ( !(ptr->sigh | ptr->sigl) ) | ||
63 | { | ||
64 | return TAG_Zero; | ||
65 | } | ||
66 | /* The number is a de-normal or pseudodenormal. */ | ||
67 | return TAG_Special; | ||
68 | } | ||
69 | |||
70 | if ( exp == 0x7fff ) | ||
71 | { | ||
72 | /* Is an Infinity, a NaN, or an unsupported data type. */ | ||
73 | return TAG_Special; | ||
74 | } | ||
75 | |||
76 | if ( !(ptr->sigh & 0x80000000) ) | ||
77 | { | ||
78 | /* Unsupported data type. */ | ||
79 | /* Valid numbers have the ms bit set to 1. */ | ||
80 | /* Unnormal. */ | ||
81 | return TAG_Special; | ||
82 | } | ||
83 | |||
84 | return TAG_Valid; | ||
85 | } | ||
86 | |||
87 | |||
88 | /* Get a long double from user memory */ | ||
89 | int FPU_load_extended(long double __user *s, int stnr) | ||
90 | { | ||
91 | FPU_REG *sti_ptr = &st(stnr); | ||
92 | |||
93 | RE_ENTRANT_CHECK_OFF; | ||
94 | FPU_access_ok(VERIFY_READ, s, 10); | ||
95 | __copy_from_user(sti_ptr, s, 10); | ||
96 | RE_ENTRANT_CHECK_ON; | ||
97 | |||
98 | return FPU_tagof(sti_ptr); | ||
99 | } | ||
100 | |||
101 | |||
102 | /* Get a double from user memory */ | ||
103 | int FPU_load_double(double __user *dfloat, FPU_REG *loaded_data) | ||
104 | { | ||
105 | int exp, tag, negative; | ||
106 | unsigned m64, l64; | ||
107 | |||
108 | RE_ENTRANT_CHECK_OFF; | ||
109 | FPU_access_ok(VERIFY_READ, dfloat, 8); | ||
110 | FPU_get_user(m64, 1 + (unsigned long __user *) dfloat); | ||
111 | FPU_get_user(l64, (unsigned long __user *) dfloat); | ||
112 | RE_ENTRANT_CHECK_ON; | ||
113 | |||
114 | negative = (m64 & 0x80000000) ? SIGN_Negative : SIGN_Positive; | ||
115 | exp = ((m64 & 0x7ff00000) >> 20) - DOUBLE_Ebias + EXTENDED_Ebias; | ||
116 | m64 &= 0xfffff; | ||
117 | if ( exp > DOUBLE_Emax + EXTENDED_Ebias ) | ||
118 | { | ||
119 | /* Infinity or NaN */ | ||
120 | if ((m64 == 0) && (l64 == 0)) | ||
121 | { | ||
122 | /* +- infinity */ | ||
123 | loaded_data->sigh = 0x80000000; | ||
124 | loaded_data->sigl = 0x00000000; | ||
125 | exp = EXP_Infinity + EXTENDED_Ebias; | ||
126 | tag = TAG_Special; | ||
127 | } | ||
128 | else | ||
129 | { | ||
130 | /* Must be a signaling or quiet NaN */ | ||
131 | exp = EXP_NaN + EXTENDED_Ebias; | ||
132 | loaded_data->sigh = (m64 << 11) | 0x80000000; | ||
133 | loaded_data->sigh |= l64 >> 21; | ||
134 | loaded_data->sigl = l64 << 11; | ||
135 | tag = TAG_Special; /* The calling function must look for NaNs */ | ||
136 | } | ||
137 | } | ||
138 | else if ( exp < DOUBLE_Emin + EXTENDED_Ebias ) | ||
139 | { | ||
140 | /* Zero or de-normal */ | ||
141 | if ((m64 == 0) && (l64 == 0)) | ||
142 | { | ||
143 | /* Zero */ | ||
144 | reg_copy(&CONST_Z, loaded_data); | ||
145 | exp = 0; | ||
146 | tag = TAG_Zero; | ||
147 | } | ||
148 | else | ||
149 | { | ||
150 | /* De-normal */ | ||
151 | loaded_data->sigh = m64 << 11; | ||
152 | loaded_data->sigh |= l64 >> 21; | ||
153 | loaded_data->sigl = l64 << 11; | ||
154 | |||
155 | return normalize_no_excep(loaded_data, DOUBLE_Emin, negative) | ||
156 | | (denormal_operand() < 0 ? FPU_Exception : 0); | ||
157 | } | ||
158 | } | ||
159 | else | ||
160 | { | ||
161 | loaded_data->sigh = (m64 << 11) | 0x80000000; | ||
162 | loaded_data->sigh |= l64 >> 21; | ||
163 | loaded_data->sigl = l64 << 11; | ||
164 | |||
165 | tag = TAG_Valid; | ||
166 | } | ||
167 | |||
168 | setexponent16(loaded_data, exp | negative); | ||
169 | |||
170 | return tag; | ||
171 | } | ||
172 | |||
173 | |||
174 | /* Get a float from user memory */ | ||
175 | int FPU_load_single(float __user *single, FPU_REG *loaded_data) | ||
176 | { | ||
177 | unsigned m32; | ||
178 | int exp, tag, negative; | ||
179 | |||
180 | RE_ENTRANT_CHECK_OFF; | ||
181 | FPU_access_ok(VERIFY_READ, single, 4); | ||
182 | FPU_get_user(m32, (unsigned long __user *) single); | ||
183 | RE_ENTRANT_CHECK_ON; | ||
184 | |||
185 | negative = (m32 & 0x80000000) ? SIGN_Negative : SIGN_Positive; | ||
186 | |||
187 | if (!(m32 & 0x7fffffff)) | ||
188 | { | ||
189 | /* Zero */ | ||
190 | reg_copy(&CONST_Z, loaded_data); | ||
191 | addexponent(loaded_data, negative); | ||
192 | return TAG_Zero; | ||
193 | } | ||
194 | exp = ((m32 & 0x7f800000) >> 23) - SINGLE_Ebias + EXTENDED_Ebias; | ||
195 | m32 = (m32 & 0x7fffff) << 8; | ||
196 | if ( exp < SINGLE_Emin + EXTENDED_Ebias ) | ||
197 | { | ||
198 | /* De-normals */ | ||
199 | loaded_data->sigh = m32; | ||
200 | loaded_data->sigl = 0; | ||
201 | |||
202 | return normalize_no_excep(loaded_data, SINGLE_Emin, negative) | ||
203 | | (denormal_operand() < 0 ? FPU_Exception : 0); | ||
204 | } | ||
205 | else if ( exp > SINGLE_Emax + EXTENDED_Ebias ) | ||
206 | { | ||
207 | /* Infinity or NaN */ | ||
208 | if ( m32 == 0 ) | ||
209 | { | ||
210 | /* +- infinity */ | ||
211 | loaded_data->sigh = 0x80000000; | ||
212 | loaded_data->sigl = 0x00000000; | ||
213 | exp = EXP_Infinity + EXTENDED_Ebias; | ||
214 | tag = TAG_Special; | ||
215 | } | ||
216 | else | ||
217 | { | ||
218 | /* Must be a signaling or quiet NaN */ | ||
219 | exp = EXP_NaN + EXTENDED_Ebias; | ||
220 | loaded_data->sigh = m32 | 0x80000000; | ||
221 | loaded_data->sigl = 0; | ||
222 | tag = TAG_Special; /* The calling function must look for NaNs */ | ||
223 | } | ||
224 | } | ||
225 | else | ||
226 | { | ||
227 | loaded_data->sigh = m32 | 0x80000000; | ||
228 | loaded_data->sigl = 0; | ||
229 | tag = TAG_Valid; | ||
230 | } | ||
231 | |||
232 | setexponent16(loaded_data, exp | negative); /* Set the sign. */ | ||
233 | |||
234 | return tag; | ||
235 | } | ||
236 | |||
237 | |||
238 | /* Get a long long from user memory */ | ||
239 | int FPU_load_int64(long long __user *_s) | ||
240 | { | ||
241 | long long s; | ||
242 | int sign; | ||
243 | FPU_REG *st0_ptr = &st(0); | ||
244 | |||
245 | RE_ENTRANT_CHECK_OFF; | ||
246 | FPU_access_ok(VERIFY_READ, _s, 8); | ||
247 | if (copy_from_user(&s,_s,8)) | ||
248 | FPU_abort; | ||
249 | RE_ENTRANT_CHECK_ON; | ||
250 | |||
251 | if (s == 0) | ||
252 | { | ||
253 | reg_copy(&CONST_Z, st0_ptr); | ||
254 | return TAG_Zero; | ||
255 | } | ||
256 | |||
257 | if (s > 0) | ||
258 | sign = SIGN_Positive; | ||
259 | else | ||
260 | { | ||
261 | s = -s; | ||
262 | sign = SIGN_Negative; | ||
263 | } | ||
264 | |||
265 | significand(st0_ptr) = s; | ||
266 | |||
267 | return normalize_no_excep(st0_ptr, 63, sign); | ||
268 | } | ||
269 | |||
270 | |||
271 | /* Get a long from user memory */ | ||
272 | int FPU_load_int32(long __user *_s, FPU_REG *loaded_data) | ||
273 | { | ||
274 | long s; | ||
275 | int negative; | ||
276 | |||
277 | RE_ENTRANT_CHECK_OFF; | ||
278 | FPU_access_ok(VERIFY_READ, _s, 4); | ||
279 | FPU_get_user(s, _s); | ||
280 | RE_ENTRANT_CHECK_ON; | ||
281 | |||
282 | if (s == 0) | ||
283 | { reg_copy(&CONST_Z, loaded_data); return TAG_Zero; } | ||
284 | |||
285 | if (s > 0) | ||
286 | negative = SIGN_Positive; | ||
287 | else | ||
288 | { | ||
289 | s = -s; | ||
290 | negative = SIGN_Negative; | ||
291 | } | ||
292 | |||
293 | loaded_data->sigh = s; | ||
294 | loaded_data->sigl = 0; | ||
295 | |||
296 | return normalize_no_excep(loaded_data, 31, negative); | ||
297 | } | ||
298 | |||
299 | |||
300 | /* Get a short from user memory */ | ||
301 | int FPU_load_int16(short __user *_s, FPU_REG *loaded_data) | ||
302 | { | ||
303 | int s, negative; | ||
304 | |||
305 | RE_ENTRANT_CHECK_OFF; | ||
306 | FPU_access_ok(VERIFY_READ, _s, 2); | ||
307 | /* Cast as short to get the sign extended. */ | ||
308 | FPU_get_user(s, _s); | ||
309 | RE_ENTRANT_CHECK_ON; | ||
310 | |||
311 | if (s == 0) | ||
312 | { reg_copy(&CONST_Z, loaded_data); return TAG_Zero; } | ||
313 | |||
314 | if (s > 0) | ||
315 | negative = SIGN_Positive; | ||
316 | else | ||
317 | { | ||
318 | s = -s; | ||
319 | negative = SIGN_Negative; | ||
320 | } | ||
321 | |||
322 | loaded_data->sigh = s << 16; | ||
323 | loaded_data->sigl = 0; | ||
324 | |||
325 | return normalize_no_excep(loaded_data, 15, negative); | ||
326 | } | ||
327 | |||
328 | |||
329 | /* Get a packed bcd array from user memory */ | ||
330 | int FPU_load_bcd(u_char __user *s) | ||
331 | { | ||
332 | FPU_REG *st0_ptr = &st(0); | ||
333 | int pos; | ||
334 | u_char bcd; | ||
335 | long long l=0; | ||
336 | int sign; | ||
337 | |||
338 | RE_ENTRANT_CHECK_OFF; | ||
339 | FPU_access_ok(VERIFY_READ, s, 10); | ||
340 | RE_ENTRANT_CHECK_ON; | ||
341 | for ( pos = 8; pos >= 0; pos--) | ||
342 | { | ||
343 | l *= 10; | ||
344 | RE_ENTRANT_CHECK_OFF; | ||
345 | FPU_get_user(bcd, s+pos); | ||
346 | RE_ENTRANT_CHECK_ON; | ||
347 | l += bcd >> 4; | ||
348 | l *= 10; | ||
349 | l += bcd & 0x0f; | ||
350 | } | ||
351 | |||
352 | RE_ENTRANT_CHECK_OFF; | ||
353 | FPU_get_user(sign, s+9); | ||
354 | sign = sign & 0x80 ? SIGN_Negative : SIGN_Positive; | ||
355 | RE_ENTRANT_CHECK_ON; | ||
356 | |||
357 | if ( l == 0 ) | ||
358 | { | ||
359 | reg_copy(&CONST_Z, st0_ptr); | ||
360 | addexponent(st0_ptr, sign); /* Set the sign. */ | ||
361 | return TAG_Zero; | ||
362 | } | ||
363 | else | ||
364 | { | ||
365 | significand(st0_ptr) = l; | ||
366 | return normalize_no_excep(st0_ptr, 63, sign); | ||
367 | } | ||
368 | } | ||
369 | |||
370 | /*===========================================================================*/ | ||
371 | |||
372 | /* Put a long double into user memory */ | ||
373 | int FPU_store_extended(FPU_REG *st0_ptr, u_char st0_tag, long double __user *d) | ||
374 | { | ||
375 | /* | ||
376 | The only exception raised by an attempt to store to an | ||
377 | extended format is the Invalid Stack exception, i.e. | ||
378 | attempting to store from an empty register. | ||
379 | */ | ||
380 | |||
381 | if ( st0_tag != TAG_Empty ) | ||
382 | { | ||
383 | RE_ENTRANT_CHECK_OFF; | ||
384 | FPU_access_ok(VERIFY_WRITE, d, 10); | ||
385 | |||
386 | FPU_put_user(st0_ptr->sigl, (unsigned long __user *) d); | ||
387 | FPU_put_user(st0_ptr->sigh, (unsigned long __user *) ((u_char __user *)d + 4)); | ||
388 | FPU_put_user(exponent16(st0_ptr), (unsigned short __user *) ((u_char __user *)d + 8)); | ||
389 | RE_ENTRANT_CHECK_ON; | ||
390 | |||
391 | return 1; | ||
392 | } | ||
393 | |||
394 | /* Empty register (stack underflow) */ | ||
395 | EXCEPTION(EX_StackUnder); | ||
396 | if ( control_word & CW_Invalid ) | ||
397 | { | ||
398 | /* The masked response */ | ||
399 | /* Put out the QNaN indefinite */ | ||
400 | RE_ENTRANT_CHECK_OFF; | ||
401 | FPU_access_ok(VERIFY_WRITE,d,10); | ||
402 | FPU_put_user(0, (unsigned long __user *) d); | ||
403 | FPU_put_user(0xc0000000, 1 + (unsigned long __user *) d); | ||
404 | FPU_put_user(0xffff, 4 + (short __user *) d); | ||
405 | RE_ENTRANT_CHECK_ON; | ||
406 | return 1; | ||
407 | } | ||
408 | else | ||
409 | return 0; | ||
410 | |||
411 | } | ||
412 | |||
413 | |||
414 | /* Put a double into user memory */ | ||
415 | int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, double __user *dfloat) | ||
416 | { | ||
417 | unsigned long l[2]; | ||
418 | unsigned long increment = 0; /* avoid gcc warnings */ | ||
419 | int precision_loss; | ||
420 | int exp; | ||
421 | FPU_REG tmp; | ||
422 | |||
423 | if ( st0_tag == TAG_Valid ) | ||
424 | { | ||
425 | reg_copy(st0_ptr, &tmp); | ||
426 | exp = exponent(&tmp); | ||
427 | |||
428 | if ( exp < DOUBLE_Emin ) /* It may be a denormal */ | ||
429 | { | ||
430 | addexponent(&tmp, -DOUBLE_Emin + 52); /* largest exp to be 51 */ | ||
431 | |||
432 | denormal_arg: | ||
433 | |||
434 | if ( (precision_loss = FPU_round_to_int(&tmp, st0_tag)) ) | ||
435 | { | ||
436 | #ifdef PECULIAR_486 | ||
437 | /* Did it round to a non-denormal ? */ | ||
438 | /* This behaviour might be regarded as peculiar, it appears | ||
439 | that the 80486 rounds to the dest precision, then | ||
440 | converts to decide underflow. */ | ||
441 | if ( !((tmp.sigh == 0x00100000) && (tmp.sigl == 0) && | ||
442 | (st0_ptr->sigl & 0x000007ff)) ) | ||
443 | #endif /* PECULIAR_486 */ | ||
444 | { | ||
445 | EXCEPTION(EX_Underflow); | ||
446 | /* This is a special case: see sec 16.2.5.1 of | ||
447 | the 80486 book */ | ||
448 | if ( !(control_word & CW_Underflow) ) | ||
449 | return 0; | ||
450 | } | ||
451 | EXCEPTION(precision_loss); | ||
452 | if ( !(control_word & CW_Precision) ) | ||
453 | return 0; | ||
454 | } | ||
455 | l[0] = tmp.sigl; | ||
456 | l[1] = tmp.sigh; | ||
457 | } | ||
458 | else | ||
459 | { | ||
460 | if ( tmp.sigl & 0x000007ff ) | ||
461 | { | ||
462 | precision_loss = 1; | ||
463 | switch (control_word & CW_RC) | ||
464 | { | ||
465 | case RC_RND: | ||
466 | /* Rounding can get a little messy.. */ | ||
467 | increment = ((tmp.sigl & 0x7ff) > 0x400) | /* nearest */ | ||
468 | ((tmp.sigl & 0xc00) == 0xc00); /* odd -> even */ | ||
469 | break; | ||
470 | case RC_DOWN: /* towards -infinity */ | ||
471 | increment = signpositive(&tmp) ? 0 : tmp.sigl & 0x7ff; | ||
472 | break; | ||
473 | case RC_UP: /* towards +infinity */ | ||
474 | increment = signpositive(&tmp) ? tmp.sigl & 0x7ff : 0; | ||
475 | break; | ||
476 | case RC_CHOP: | ||
477 | increment = 0; | ||
478 | break; | ||
479 | } | ||
480 | |||
481 | /* Truncate the mantissa */ | ||
482 | tmp.sigl &= 0xfffff800; | ||
483 | |||
484 | if ( increment ) | ||
485 | { | ||
486 | if ( tmp.sigl >= 0xfffff800 ) | ||
487 | { | ||
488 | /* the sigl part overflows */ | ||
489 | if ( tmp.sigh == 0xffffffff ) | ||
490 | { | ||
491 | /* The sigh part overflows */ | ||
492 | tmp.sigh = 0x80000000; | ||
493 | exp++; | ||
494 | if (exp >= EXP_OVER) | ||
495 | goto overflow; | ||
496 | } | ||
497 | else | ||
498 | { | ||
499 | tmp.sigh ++; | ||
500 | } | ||
501 | tmp.sigl = 0x00000000; | ||
502 | } | ||
503 | else | ||
504 | { | ||
505 | /* We only need to increment sigl */ | ||
506 | tmp.sigl += 0x00000800; | ||
507 | } | ||
508 | } | ||
509 | } | ||
510 | else | ||
511 | precision_loss = 0; | ||
512 | |||
513 | l[0] = (tmp.sigl >> 11) | (tmp.sigh << 21); | ||
514 | l[1] = ((tmp.sigh >> 11) & 0xfffff); | ||
515 | |||
516 | if ( exp > DOUBLE_Emax ) | ||
517 | { | ||
518 | overflow: | ||
519 | EXCEPTION(EX_Overflow); | ||
520 | if ( !(control_word & CW_Overflow) ) | ||
521 | return 0; | ||
522 | set_precision_flag_up(); | ||
523 | if ( !(control_word & CW_Precision) ) | ||
524 | return 0; | ||
525 | |||
526 | /* This is a special case: see sec 16.2.5.1 of the 80486 book */ | ||
527 | /* Overflow to infinity */ | ||
528 | l[0] = 0x00000000; /* Set to */ | ||
529 | l[1] = 0x7ff00000; /* + INF */ | ||
530 | } | ||
531 | else | ||
532 | { | ||
533 | if ( precision_loss ) | ||
534 | { | ||
535 | if ( increment ) | ||
536 | set_precision_flag_up(); | ||
537 | else | ||
538 | set_precision_flag_down(); | ||
539 | } | ||
540 | /* Add the exponent */ | ||
541 | l[1] |= (((exp+DOUBLE_Ebias) & 0x7ff) << 20); | ||
542 | } | ||
543 | } | ||
544 | } | ||
545 | else if (st0_tag == TAG_Zero) | ||
546 | { | ||
547 | /* Number is zero */ | ||
548 | l[0] = 0; | ||
549 | l[1] = 0; | ||
550 | } | ||
551 | else if ( st0_tag == TAG_Special ) | ||
552 | { | ||
553 | st0_tag = FPU_Special(st0_ptr); | ||
554 | if ( st0_tag == TW_Denormal ) | ||
555 | { | ||
556 | /* A denormal will always underflow. */ | ||
557 | #ifndef PECULIAR_486 | ||
558 | /* An 80486 is supposed to be able to generate | ||
559 | a denormal exception here, but... */ | ||
560 | /* Underflow has priority. */ | ||
561 | if ( control_word & CW_Underflow ) | ||
562 | denormal_operand(); | ||
563 | #endif /* PECULIAR_486 */ | ||
564 | reg_copy(st0_ptr, &tmp); | ||
565 | goto denormal_arg; | ||
566 | } | ||
567 | else if (st0_tag == TW_Infinity) | ||
568 | { | ||
569 | l[0] = 0; | ||
570 | l[1] = 0x7ff00000; | ||
571 | } | ||
572 | else if (st0_tag == TW_NaN) | ||
573 | { | ||
574 | /* Is it really a NaN ? */ | ||
575 | if ( (exponent(st0_ptr) == EXP_OVER) | ||
576 | && (st0_ptr->sigh & 0x80000000) ) | ||
577 | { | ||
578 | /* See if we can get a valid NaN from the FPU_REG */ | ||
579 | l[0] = (st0_ptr->sigl >> 11) | (st0_ptr->sigh << 21); | ||
580 | l[1] = ((st0_ptr->sigh >> 11) & 0xfffff); | ||
581 | if ( !(st0_ptr->sigh & 0x40000000) ) | ||
582 | { | ||
583 | /* It is a signalling NaN */ | ||
584 | EXCEPTION(EX_Invalid); | ||
585 | if ( !(control_word & CW_Invalid) ) | ||
586 | return 0; | ||
587 | l[1] |= (0x40000000 >> 11); | ||
588 | } | ||
589 | l[1] |= 0x7ff00000; | ||
590 | } | ||
591 | else | ||
592 | { | ||
593 | /* It is an unsupported data type */ | ||
594 | EXCEPTION(EX_Invalid); | ||
595 | if ( !(control_word & CW_Invalid) ) | ||
596 | return 0; | ||
597 | l[0] = 0; | ||
598 | l[1] = 0xfff80000; | ||
599 | } | ||
600 | } | ||
601 | } | ||
602 | else if ( st0_tag == TAG_Empty ) | ||
603 | { | ||
604 | /* Empty register (stack underflow) */ | ||
605 | EXCEPTION(EX_StackUnder); | ||
606 | if ( control_word & CW_Invalid ) | ||
607 | { | ||
608 | /* The masked response */ | ||
609 | /* Put out the QNaN indefinite */ | ||
610 | RE_ENTRANT_CHECK_OFF; | ||
611 | FPU_access_ok(VERIFY_WRITE,dfloat,8); | ||
612 | FPU_put_user(0, (unsigned long __user *) dfloat); | ||
613 | FPU_put_user(0xfff80000, 1 + (unsigned long __user *) dfloat); | ||
614 | RE_ENTRANT_CHECK_ON; | ||
615 | return 1; | ||
616 | } | ||
617 | else | ||
618 | return 0; | ||
619 | } | ||
620 | if ( getsign(st0_ptr) ) | ||
621 | l[1] |= 0x80000000; | ||
622 | |||
623 | RE_ENTRANT_CHECK_OFF; | ||
624 | FPU_access_ok(VERIFY_WRITE,dfloat,8); | ||
625 | FPU_put_user(l[0], (unsigned long __user *)dfloat); | ||
626 | FPU_put_user(l[1], 1 + (unsigned long __user *)dfloat); | ||
627 | RE_ENTRANT_CHECK_ON; | ||
628 | |||
629 | return 1; | ||
630 | } | ||
631 | |||
632 | |||
633 | /* Put a float into user memory */ | ||
634 | int FPU_store_single(FPU_REG *st0_ptr, u_char st0_tag, float __user *single) | ||
635 | { | ||
636 | long templ = 0; | ||
637 | unsigned long increment = 0; /* avoid gcc warnings */ | ||
638 | int precision_loss; | ||
639 | int exp; | ||
640 | FPU_REG tmp; | ||
641 | |||
642 | if ( st0_tag == TAG_Valid ) | ||
643 | { | ||
644 | |||
645 | reg_copy(st0_ptr, &tmp); | ||
646 | exp = exponent(&tmp); | ||
647 | |||
648 | if ( exp < SINGLE_Emin ) | ||
649 | { | ||
650 | addexponent(&tmp, -SINGLE_Emin + 23); /* largest exp to be 22 */ | ||
651 | |||
652 | denormal_arg: | ||
653 | |||
654 | if ( (precision_loss = FPU_round_to_int(&tmp, st0_tag)) ) | ||
655 | { | ||
656 | #ifdef PECULIAR_486 | ||
657 | /* Did it round to a non-denormal ? */ | ||
658 | /* This behaviour might be regarded as peculiar, it appears | ||
659 | that the 80486 rounds to the dest precision, then | ||
660 | converts to decide underflow. */ | ||
661 | if ( !((tmp.sigl == 0x00800000) && | ||
662 | ((st0_ptr->sigh & 0x000000ff) || st0_ptr->sigl)) ) | ||
663 | #endif /* PECULIAR_486 */ | ||
664 | { | ||
665 | EXCEPTION(EX_Underflow); | ||
666 | /* This is a special case: see sec 16.2.5.1 of | ||
667 | the 80486 book */ | ||
668 | if ( !(control_word & CW_Underflow) ) | ||
669 | return 0; | ||
670 | } | ||
671 | EXCEPTION(precision_loss); | ||
672 | if ( !(control_word & CW_Precision) ) | ||
673 | return 0; | ||
674 | } | ||
675 | templ = tmp.sigl; | ||
676 | } | ||
677 | else | ||
678 | { | ||
679 | if ( tmp.sigl | (tmp.sigh & 0x000000ff) ) | ||
680 | { | ||
681 | unsigned long sigh = tmp.sigh; | ||
682 | unsigned long sigl = tmp.sigl; | ||
683 | |||
684 | precision_loss = 1; | ||
685 | switch (control_word & CW_RC) | ||
686 | { | ||
687 | case RC_RND: | ||
688 | increment = ((sigh & 0xff) > 0x80) /* more than half */ | ||
689 | || (((sigh & 0xff) == 0x80) && sigl) /* more than half */ | ||
690 | || ((sigh & 0x180) == 0x180); /* round to even */ | ||
691 | break; | ||
692 | case RC_DOWN: /* towards -infinity */ | ||
693 | increment = signpositive(&tmp) | ||
694 | ? 0 : (sigl | (sigh & 0xff)); | ||
695 | break; | ||
696 | case RC_UP: /* towards +infinity */ | ||
697 | increment = signpositive(&tmp) | ||
698 | ? (sigl | (sigh & 0xff)) : 0; | ||
699 | break; | ||
700 | case RC_CHOP: | ||
701 | increment = 0; | ||
702 | break; | ||
703 | } | ||
704 | |||
705 | /* Truncate part of the mantissa */ | ||
706 | tmp.sigl = 0; | ||
707 | |||
708 | if (increment) | ||
709 | { | ||
710 | if ( sigh >= 0xffffff00 ) | ||
711 | { | ||
712 | /* The sigh part overflows */ | ||
713 | tmp.sigh = 0x80000000; | ||
714 | exp++; | ||
715 | if ( exp >= EXP_OVER ) | ||
716 | goto overflow; | ||
717 | } | ||
718 | else | ||
719 | { | ||
720 | tmp.sigh &= 0xffffff00; | ||
721 | tmp.sigh += 0x100; | ||
722 | } | ||
723 | } | ||
724 | else | ||
725 | { | ||
726 | tmp.sigh &= 0xffffff00; /* Finish the truncation */ | ||
727 | } | ||
728 | } | ||
729 | else | ||
730 | precision_loss = 0; | ||
731 | |||
732 | templ = (tmp.sigh >> 8) & 0x007fffff; | ||
733 | |||
734 | if ( exp > SINGLE_Emax ) | ||
735 | { | ||
736 | overflow: | ||
737 | EXCEPTION(EX_Overflow); | ||
738 | if ( !(control_word & CW_Overflow) ) | ||
739 | return 0; | ||
740 | set_precision_flag_up(); | ||
741 | if ( !(control_word & CW_Precision) ) | ||
742 | return 0; | ||
743 | |||
744 | /* This is a special case: see sec 16.2.5.1 of the 80486 book. */ | ||
745 | /* Masked response is overflow to infinity. */ | ||
746 | templ = 0x7f800000; | ||
747 | } | ||
748 | else | ||
749 | { | ||
750 | if ( precision_loss ) | ||
751 | { | ||
752 | if ( increment ) | ||
753 | set_precision_flag_up(); | ||
754 | else | ||
755 | set_precision_flag_down(); | ||
756 | } | ||
757 | /* Add the exponent */ | ||
758 | templ |= ((exp+SINGLE_Ebias) & 0xff) << 23; | ||
759 | } | ||
760 | } | ||
761 | } | ||
762 | else if (st0_tag == TAG_Zero) | ||
763 | { | ||
764 | templ = 0; | ||
765 | } | ||
766 | else if ( st0_tag == TAG_Special ) | ||
767 | { | ||
768 | st0_tag = FPU_Special(st0_ptr); | ||
769 | if (st0_tag == TW_Denormal) | ||
770 | { | ||
771 | reg_copy(st0_ptr, &tmp); | ||
772 | |||
773 | /* A denormal will always underflow. */ | ||
774 | #ifndef PECULIAR_486 | ||
775 | /* An 80486 is supposed to be able to generate | ||
776 | a denormal exception here, but... */ | ||
777 | /* Underflow has priority. */ | ||
778 | if ( control_word & CW_Underflow ) | ||
779 | denormal_operand(); | ||
780 | #endif /* PECULIAR_486 */ | ||
781 | goto denormal_arg; | ||
782 | } | ||
783 | else if (st0_tag == TW_Infinity) | ||
784 | { | ||
785 | templ = 0x7f800000; | ||
786 | } | ||
787 | else if (st0_tag == TW_NaN) | ||
788 | { | ||
789 | /* Is it really a NaN ? */ | ||
790 | if ( (exponent(st0_ptr) == EXP_OVER) && (st0_ptr->sigh & 0x80000000) ) | ||
791 | { | ||
792 | /* See if we can get a valid NaN from the FPU_REG */ | ||
793 | templ = st0_ptr->sigh >> 8; | ||
794 | if ( !(st0_ptr->sigh & 0x40000000) ) | ||
795 | { | ||
796 | /* It is a signalling NaN */ | ||
797 | EXCEPTION(EX_Invalid); | ||
798 | if ( !(control_word & CW_Invalid) ) | ||
799 | return 0; | ||
800 | templ |= (0x40000000 >> 8); | ||
801 | } | ||
802 | templ |= 0x7f800000; | ||
803 | } | ||
804 | else | ||
805 | { | ||
806 | /* It is an unsupported data type */ | ||
807 | EXCEPTION(EX_Invalid); | ||
808 | if ( !(control_word & CW_Invalid) ) | ||
809 | return 0; | ||
810 | templ = 0xffc00000; | ||
811 | } | ||
812 | } | ||
813 | #ifdef PARANOID | ||
814 | else | ||
815 | { | ||
816 | EXCEPTION(EX_INTERNAL|0x164); | ||
817 | return 0; | ||
818 | } | ||
819 | #endif | ||
820 | } | ||
821 | else if ( st0_tag == TAG_Empty ) | ||
822 | { | ||
823 | /* Empty register (stack underflow) */ | ||
824 | EXCEPTION(EX_StackUnder); | ||
825 | if ( control_word & EX_Invalid ) | ||
826 | { | ||
827 | /* The masked response */ | ||
828 | /* Put out the QNaN indefinite */ | ||
829 | RE_ENTRANT_CHECK_OFF; | ||
830 | FPU_access_ok(VERIFY_WRITE,single,4); | ||
831 | FPU_put_user(0xffc00000, (unsigned long __user *) single); | ||
832 | RE_ENTRANT_CHECK_ON; | ||
833 | return 1; | ||
834 | } | ||
835 | else | ||
836 | return 0; | ||
837 | } | ||
838 | #ifdef PARANOID | ||
839 | else | ||
840 | { | ||
841 | EXCEPTION(EX_INTERNAL|0x163); | ||
842 | return 0; | ||
843 | } | ||
844 | #endif | ||
845 | if ( getsign(st0_ptr) ) | ||
846 | templ |= 0x80000000; | ||
847 | |||
848 | RE_ENTRANT_CHECK_OFF; | ||
849 | FPU_access_ok(VERIFY_WRITE,single,4); | ||
850 | FPU_put_user(templ,(unsigned long __user *) single); | ||
851 | RE_ENTRANT_CHECK_ON; | ||
852 | |||
853 | return 1; | ||
854 | } | ||
855 | |||
856 | |||
857 | /* Put a long long into user memory */ | ||
858 | int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag, long long __user *d) | ||
859 | { | ||
860 | FPU_REG t; | ||
861 | long long tll; | ||
862 | int precision_loss; | ||
863 | |||
864 | if ( st0_tag == TAG_Empty ) | ||
865 | { | ||
866 | /* Empty register (stack underflow) */ | ||
867 | EXCEPTION(EX_StackUnder); | ||
868 | goto invalid_operand; | ||
869 | } | ||
870 | else if ( st0_tag == TAG_Special ) | ||
871 | { | ||
872 | st0_tag = FPU_Special(st0_ptr); | ||
873 | if ( (st0_tag == TW_Infinity) || | ||
874 | (st0_tag == TW_NaN) ) | ||
875 | { | ||
876 | EXCEPTION(EX_Invalid); | ||
877 | goto invalid_operand; | ||
878 | } | ||
879 | } | ||
880 | |||
881 | reg_copy(st0_ptr, &t); | ||
882 | precision_loss = FPU_round_to_int(&t, st0_tag); | ||
883 | ((long *)&tll)[0] = t.sigl; | ||
884 | ((long *)&tll)[1] = t.sigh; | ||
885 | if ( (precision_loss == 1) || | ||
886 | ((t.sigh & 0x80000000) && | ||
887 | !((t.sigh == 0x80000000) && (t.sigl == 0) && | ||
888 | signnegative(&t))) ) | ||
889 | { | ||
890 | EXCEPTION(EX_Invalid); | ||
891 | /* This is a special case: see sec 16.2.5.1 of the 80486 book */ | ||
892 | invalid_operand: | ||
893 | if ( control_word & EX_Invalid ) | ||
894 | { | ||
895 | /* Produce something like QNaN "indefinite" */ | ||
896 | tll = 0x8000000000000000LL; | ||
897 | } | ||
898 | else | ||
899 | return 0; | ||
900 | } | ||
901 | else | ||
902 | { | ||
903 | if ( precision_loss ) | ||
904 | set_precision_flag(precision_loss); | ||
905 | if ( signnegative(&t) ) | ||
906 | tll = - tll; | ||
907 | } | ||
908 | |||
909 | RE_ENTRANT_CHECK_OFF; | ||
910 | FPU_access_ok(VERIFY_WRITE,d,8); | ||
911 | if (copy_to_user(d, &tll, 8)) | ||
912 | FPU_abort; | ||
913 | RE_ENTRANT_CHECK_ON; | ||
914 | |||
915 | return 1; | ||
916 | } | ||
917 | |||
918 | |||
919 | /* Put a long into user memory */ | ||
920 | int FPU_store_int32(FPU_REG *st0_ptr, u_char st0_tag, long __user *d) | ||
921 | { | ||
922 | FPU_REG t; | ||
923 | int precision_loss; | ||
924 | |||
925 | if ( st0_tag == TAG_Empty ) | ||
926 | { | ||
927 | /* Empty register (stack underflow) */ | ||
928 | EXCEPTION(EX_StackUnder); | ||
929 | goto invalid_operand; | ||
930 | } | ||
931 | else if ( st0_tag == TAG_Special ) | ||
932 | { | ||
933 | st0_tag = FPU_Special(st0_ptr); | ||
934 | if ( (st0_tag == TW_Infinity) || | ||
935 | (st0_tag == TW_NaN) ) | ||
936 | { | ||
937 | EXCEPTION(EX_Invalid); | ||
938 | goto invalid_operand; | ||
939 | } | ||
940 | } | ||
941 | |||
942 | reg_copy(st0_ptr, &t); | ||
943 | precision_loss = FPU_round_to_int(&t, st0_tag); | ||
944 | if (t.sigh || | ||
945 | ((t.sigl & 0x80000000) && | ||
946 | !((t.sigl == 0x80000000) && signnegative(&t))) ) | ||
947 | { | ||
948 | EXCEPTION(EX_Invalid); | ||
949 | /* This is a special case: see sec 16.2.5.1 of the 80486 book */ | ||
950 | invalid_operand: | ||
951 | if ( control_word & EX_Invalid ) | ||
952 | { | ||
953 | /* Produce something like QNaN "indefinite" */ | ||
954 | t.sigl = 0x80000000; | ||
955 | } | ||
956 | else | ||
957 | return 0; | ||
958 | } | ||
959 | else | ||
960 | { | ||
961 | if ( precision_loss ) | ||
962 | set_precision_flag(precision_loss); | ||
963 | if ( signnegative(&t) ) | ||
964 | t.sigl = -(long)t.sigl; | ||
965 | } | ||
966 | |||
967 | RE_ENTRANT_CHECK_OFF; | ||
968 | FPU_access_ok(VERIFY_WRITE,d,4); | ||
969 | FPU_put_user(t.sigl, (unsigned long __user *) d); | ||
970 | RE_ENTRANT_CHECK_ON; | ||
971 | |||
972 | return 1; | ||
973 | } | ||
974 | |||
975 | |||
976 | /* Put a short into user memory */ | ||
977 | int FPU_store_int16(FPU_REG *st0_ptr, u_char st0_tag, short __user *d) | ||
978 | { | ||
979 | FPU_REG t; | ||
980 | int precision_loss; | ||
981 | |||
982 | if ( st0_tag == TAG_Empty ) | ||
983 | { | ||
984 | /* Empty register (stack underflow) */ | ||
985 | EXCEPTION(EX_StackUnder); | ||
986 | goto invalid_operand; | ||
987 | } | ||
988 | else if ( st0_tag == TAG_Special ) | ||
989 | { | ||
990 | st0_tag = FPU_Special(st0_ptr); | ||
991 | if ( (st0_tag == TW_Infinity) || | ||
992 | (st0_tag == TW_NaN) ) | ||
993 | { | ||
994 | EXCEPTION(EX_Invalid); | ||
995 | goto invalid_operand; | ||
996 | } | ||
997 | } | ||
998 | |||
999 | reg_copy(st0_ptr, &t); | ||
1000 | precision_loss = FPU_round_to_int(&t, st0_tag); | ||
1001 | if (t.sigh || | ||
1002 | ((t.sigl & 0xffff8000) && | ||
1003 | !((t.sigl == 0x8000) && signnegative(&t))) ) | ||
1004 | { | ||
1005 | EXCEPTION(EX_Invalid); | ||
1006 | /* This is a special case: see sec 16.2.5.1 of the 80486 book */ | ||
1007 | invalid_operand: | ||
1008 | if ( control_word & EX_Invalid ) | ||
1009 | { | ||
1010 | /* Produce something like QNaN "indefinite" */ | ||
1011 | t.sigl = 0x8000; | ||
1012 | } | ||
1013 | else | ||
1014 | return 0; | ||
1015 | } | ||
1016 | else | ||
1017 | { | ||
1018 | if ( precision_loss ) | ||
1019 | set_precision_flag(precision_loss); | ||
1020 | if ( signnegative(&t) ) | ||
1021 | t.sigl = -t.sigl; | ||
1022 | } | ||
1023 | |||
1024 | RE_ENTRANT_CHECK_OFF; | ||
1025 | FPU_access_ok(VERIFY_WRITE,d,2); | ||
1026 | FPU_put_user((short)t.sigl, d); | ||
1027 | RE_ENTRANT_CHECK_ON; | ||
1028 | |||
1029 | return 1; | ||
1030 | } | ||
1031 | |||
1032 | |||
1033 | /* Put a packed bcd array into user memory */ | ||
1034 | int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d) | ||
1035 | { | ||
1036 | FPU_REG t; | ||
1037 | unsigned long long ll; | ||
1038 | u_char b; | ||
1039 | int i, precision_loss; | ||
1040 | u_char sign = (getsign(st0_ptr) == SIGN_NEG) ? 0x80 : 0; | ||
1041 | |||
1042 | if ( st0_tag == TAG_Empty ) | ||
1043 | { | ||
1044 | /* Empty register (stack underflow) */ | ||
1045 | EXCEPTION(EX_StackUnder); | ||
1046 | goto invalid_operand; | ||
1047 | } | ||
1048 | else if ( st0_tag == TAG_Special ) | ||
1049 | { | ||
1050 | st0_tag = FPU_Special(st0_ptr); | ||
1051 | if ( (st0_tag == TW_Infinity) || | ||
1052 | (st0_tag == TW_NaN) ) | ||
1053 | { | ||
1054 | EXCEPTION(EX_Invalid); | ||
1055 | goto invalid_operand; | ||
1056 | } | ||
1057 | } | ||
1058 | |||
1059 | reg_copy(st0_ptr, &t); | ||
1060 | precision_loss = FPU_round_to_int(&t, st0_tag); | ||
1061 | ll = significand(&t); | ||
1062 | |||
1063 | /* Check for overflow, by comparing with 999999999999999999 decimal. */ | ||
1064 | if ( (t.sigh > 0x0de0b6b3) || | ||
1065 | ((t.sigh == 0x0de0b6b3) && (t.sigl > 0xa763ffff)) ) | ||
1066 | { | ||
1067 | EXCEPTION(EX_Invalid); | ||
1068 | /* This is a special case: see sec 16.2.5.1 of the 80486 book */ | ||
1069 | invalid_operand: | ||
1070 | if ( control_word & CW_Invalid ) | ||
1071 | { | ||
1072 | /* Produce the QNaN "indefinite" */ | ||
1073 | RE_ENTRANT_CHECK_OFF; | ||
1074 | FPU_access_ok(VERIFY_WRITE,d,10); | ||
1075 | for ( i = 0; i < 7; i++) | ||
1076 | FPU_put_user(0, d+i); /* These bytes "undefined" */ | ||
1077 | FPU_put_user(0xc0, d+7); /* This byte "undefined" */ | ||
1078 | FPU_put_user(0xff, d+8); | ||
1079 | FPU_put_user(0xff, d+9); | ||
1080 | RE_ENTRANT_CHECK_ON; | ||
1081 | return 1; | ||
1082 | } | ||
1083 | else | ||
1084 | return 0; | ||
1085 | } | ||
1086 | else if ( precision_loss ) | ||
1087 | { | ||
1088 | /* Precision loss doesn't stop the data transfer */ | ||
1089 | set_precision_flag(precision_loss); | ||
1090 | } | ||
1091 | |||
1092 | RE_ENTRANT_CHECK_OFF; | ||
1093 | FPU_access_ok(VERIFY_WRITE,d,10); | ||
1094 | RE_ENTRANT_CHECK_ON; | ||
1095 | for ( i = 0; i < 9; i++) | ||
1096 | { | ||
1097 | b = FPU_div_small(&ll, 10); | ||
1098 | b |= (FPU_div_small(&ll, 10)) << 4; | ||
1099 | RE_ENTRANT_CHECK_OFF; | ||
1100 | FPU_put_user(b, d+i); | ||
1101 | RE_ENTRANT_CHECK_ON; | ||
1102 | } | ||
1103 | RE_ENTRANT_CHECK_OFF; | ||
1104 | FPU_put_user(sign, d+9); | ||
1105 | RE_ENTRANT_CHECK_ON; | ||
1106 | |||
1107 | return 1; | ||
1108 | } | ||
1109 | |||
1110 | /*===========================================================================*/ | ||
1111 | |||
1112 | /* r gets mangled such that sig is int, sign: | ||
1113 | it is NOT normalized */ | ||
1114 | /* The return value (in eax) is zero if the result is exact, | ||
1115 | if bits are changed due to rounding, truncation, etc, then | ||
1116 | a non-zero value is returned */ | ||
1117 | /* Overflow is signalled by a non-zero return value (in eax). | ||
1118 | In the case of overflow, the returned significand always has the | ||
1119 | largest possible value */ | ||
1120 | int FPU_round_to_int(FPU_REG *r, u_char tag) | ||
1121 | { | ||
1122 | u_char very_big; | ||
1123 | unsigned eax; | ||
1124 | |||
1125 | if (tag == TAG_Zero) | ||
1126 | { | ||
1127 | /* Make sure that zero is returned */ | ||
1128 | significand(r) = 0; | ||
1129 | return 0; /* o.k. */ | ||
1130 | } | ||
1131 | |||
1132 | if (exponent(r) > 63) | ||
1133 | { | ||
1134 | r->sigl = r->sigh = ~0; /* The largest representable number */ | ||
1135 | return 1; /* overflow */ | ||
1136 | } | ||
1137 | |||
1138 | eax = FPU_shrxs(&r->sigl, 63 - exponent(r)); | ||
1139 | very_big = !(~(r->sigh) | ~(r->sigl)); /* test for 0xfff...fff */ | ||
1140 | #define half_or_more (eax & 0x80000000) | ||
1141 | #define frac_part (eax) | ||
1142 | #define more_than_half ((eax & 0x80000001) == 0x80000001) | ||
1143 | switch (control_word & CW_RC) | ||
1144 | { | ||
1145 | case RC_RND: | ||
1146 | if ( more_than_half /* nearest */ | ||
1147 | || (half_or_more && (r->sigl & 1)) ) /* odd -> even */ | ||
1148 | { | ||
1149 | if ( very_big ) return 1; /* overflow */ | ||
1150 | significand(r) ++; | ||
1151 | return PRECISION_LOST_UP; | ||
1152 | } | ||
1153 | break; | ||
1154 | case RC_DOWN: | ||
1155 | if (frac_part && getsign(r)) | ||
1156 | { | ||
1157 | if ( very_big ) return 1; /* overflow */ | ||
1158 | significand(r) ++; | ||
1159 | return PRECISION_LOST_UP; | ||
1160 | } | ||
1161 | break; | ||
1162 | case RC_UP: | ||
1163 | if (frac_part && !getsign(r)) | ||
1164 | { | ||
1165 | if ( very_big ) return 1; /* overflow */ | ||
1166 | significand(r) ++; | ||
1167 | return PRECISION_LOST_UP; | ||
1168 | } | ||
1169 | break; | ||
1170 | case RC_CHOP: | ||
1171 | break; | ||
1172 | } | ||
1173 | |||
1174 | return eax ? PRECISION_LOST_DOWN : 0; | ||
1175 | |||
1176 | } | ||
1177 | |||
1178 | /*===========================================================================*/ | ||
1179 | |||
1180 | u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user *s) | ||
1181 | { | ||
1182 | unsigned short tag_word = 0; | ||
1183 | u_char tag; | ||
1184 | int i; | ||
1185 | |||
1186 | if ( (addr_modes.default_mode == VM86) || | ||
1187 | ((addr_modes.default_mode == PM16) | ||
1188 | ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX)) ) | ||
1189 | { | ||
1190 | RE_ENTRANT_CHECK_OFF; | ||
1191 | FPU_access_ok(VERIFY_READ, s, 0x0e); | ||
1192 | FPU_get_user(control_word, (unsigned short __user *) s); | ||
1193 | FPU_get_user(partial_status, (unsigned short __user *) (s+2)); | ||
1194 | FPU_get_user(tag_word, (unsigned short __user *) (s+4)); | ||
1195 | FPU_get_user(instruction_address.offset, (unsigned short __user *) (s+6)); | ||
1196 | FPU_get_user(instruction_address.selector, (unsigned short __user *) (s+8)); | ||
1197 | FPU_get_user(operand_address.offset, (unsigned short __user *) (s+0x0a)); | ||
1198 | FPU_get_user(operand_address.selector, (unsigned short __user *) (s+0x0c)); | ||
1199 | RE_ENTRANT_CHECK_ON; | ||
1200 | s += 0x0e; | ||
1201 | if ( addr_modes.default_mode == VM86 ) | ||
1202 | { | ||
1203 | instruction_address.offset | ||
1204 | += (instruction_address.selector & 0xf000) << 4; | ||
1205 | operand_address.offset += (operand_address.selector & 0xf000) << 4; | ||
1206 | } | ||
1207 | } | ||
1208 | else | ||
1209 | { | ||
1210 | RE_ENTRANT_CHECK_OFF; | ||
1211 | FPU_access_ok(VERIFY_READ, s, 0x1c); | ||
1212 | FPU_get_user(control_word, (unsigned short __user *) s); | ||
1213 | FPU_get_user(partial_status, (unsigned short __user *) (s+4)); | ||
1214 | FPU_get_user(tag_word, (unsigned short __user *) (s+8)); | ||
1215 | FPU_get_user(instruction_address.offset, (unsigned long __user *) (s+0x0c)); | ||
1216 | FPU_get_user(instruction_address.selector, (unsigned short __user *) (s+0x10)); | ||
1217 | FPU_get_user(instruction_address.opcode, (unsigned short __user *) (s+0x12)); | ||
1218 | FPU_get_user(operand_address.offset, (unsigned long __user *) (s+0x14)); | ||
1219 | FPU_get_user(operand_address.selector, (unsigned long __user *) (s+0x18)); | ||
1220 | RE_ENTRANT_CHECK_ON; | ||
1221 | s += 0x1c; | ||
1222 | } | ||
1223 | |||
1224 | #ifdef PECULIAR_486 | ||
1225 | control_word &= ~0xe080; | ||
1226 | #endif /* PECULIAR_486 */ | ||
1227 | |||
1228 | top = (partial_status >> SW_Top_Shift) & 7; | ||
1229 | |||
1230 | if ( partial_status & ~control_word & CW_Exceptions ) | ||
1231 | partial_status |= (SW_Summary | SW_Backward); | ||
1232 | else | ||
1233 | partial_status &= ~(SW_Summary | SW_Backward); | ||
1234 | |||
1235 | for ( i = 0; i < 8; i++ ) | ||
1236 | { | ||
1237 | tag = tag_word & 3; | ||
1238 | tag_word >>= 2; | ||
1239 | |||
1240 | if ( tag == TAG_Empty ) | ||
1241 | /* New tag is empty. Accept it */ | ||
1242 | FPU_settag(i, TAG_Empty); | ||
1243 | else if ( FPU_gettag(i) == TAG_Empty ) | ||
1244 | { | ||
1245 | /* Old tag is empty and new tag is not empty. New tag is determined | ||
1246 | by old reg contents */ | ||
1247 | if ( exponent(&fpu_register(i)) == - EXTENDED_Ebias ) | ||
1248 | { | ||
1249 | if ( !(fpu_register(i).sigl | fpu_register(i).sigh) ) | ||
1250 | FPU_settag(i, TAG_Zero); | ||
1251 | else | ||
1252 | FPU_settag(i, TAG_Special); | ||
1253 | } | ||
1254 | else if ( exponent(&fpu_register(i)) == 0x7fff - EXTENDED_Ebias ) | ||
1255 | { | ||
1256 | FPU_settag(i, TAG_Special); | ||
1257 | } | ||
1258 | else if ( fpu_register(i).sigh & 0x80000000 ) | ||
1259 | FPU_settag(i, TAG_Valid); | ||
1260 | else | ||
1261 | FPU_settag(i, TAG_Special); /* An Un-normal */ | ||
1262 | } | ||
1263 | /* Else old tag is not empty and new tag is not empty. Old tag | ||
1264 | remains correct */ | ||
1265 | } | ||
1266 | |||
1267 | return s; | ||
1268 | } | ||
1269 | |||
1270 | |||
1271 | void frstor(fpu_addr_modes addr_modes, u_char __user *data_address) | ||
1272 | { | ||
1273 | int i, regnr; | ||
1274 | u_char __user *s = fldenv(addr_modes, data_address); | ||
1275 | int offset = (top & 7) * 10, other = 80 - offset; | ||
1276 | |||
1277 | /* Copy all registers in stack order. */ | ||
1278 | RE_ENTRANT_CHECK_OFF; | ||
1279 | FPU_access_ok(VERIFY_READ,s,80); | ||
1280 | __copy_from_user(register_base+offset, s, other); | ||
1281 | if ( offset ) | ||
1282 | __copy_from_user(register_base, s+other, offset); | ||
1283 | RE_ENTRANT_CHECK_ON; | ||
1284 | |||
1285 | for ( i = 0; i < 8; i++ ) | ||
1286 | { | ||
1287 | regnr = (i+top) & 7; | ||
1288 | if ( FPU_gettag(regnr) != TAG_Empty ) | ||
1289 | /* The loaded data over-rides all other cases. */ | ||
1290 | FPU_settag(regnr, FPU_tagof(&st(i))); | ||
1291 | } | ||
1292 | |||
1293 | } | ||
1294 | |||
1295 | |||
1296 | u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d) | ||
1297 | { | ||
1298 | if ( (addr_modes.default_mode == VM86) || | ||
1299 | ((addr_modes.default_mode == PM16) | ||
1300 | ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX)) ) | ||
1301 | { | ||
1302 | RE_ENTRANT_CHECK_OFF; | ||
1303 | FPU_access_ok(VERIFY_WRITE,d,14); | ||
1304 | #ifdef PECULIAR_486 | ||
1305 | FPU_put_user(control_word & ~0xe080, (unsigned long __user *) d); | ||
1306 | #else | ||
1307 | FPU_put_user(control_word, (unsigned short __user *) d); | ||
1308 | #endif /* PECULIAR_486 */ | ||
1309 | FPU_put_user(status_word(), (unsigned short __user *) (d+2)); | ||
1310 | FPU_put_user(fpu_tag_word, (unsigned short __user *) (d+4)); | ||
1311 | FPU_put_user(instruction_address.offset, (unsigned short __user *) (d+6)); | ||
1312 | FPU_put_user(operand_address.offset, (unsigned short __user *) (d+0x0a)); | ||
1313 | if ( addr_modes.default_mode == VM86 ) | ||
1314 | { | ||
1315 | FPU_put_user((instruction_address.offset & 0xf0000) >> 4, | ||
1316 | (unsigned short __user *) (d+8)); | ||
1317 | FPU_put_user((operand_address.offset & 0xf0000) >> 4, | ||
1318 | (unsigned short __user *) (d+0x0c)); | ||
1319 | } | ||
1320 | else | ||
1321 | { | ||
1322 | FPU_put_user(instruction_address.selector, (unsigned short __user *) (d+8)); | ||
1323 | FPU_put_user(operand_address.selector, (unsigned short __user *) (d+0x0c)); | ||
1324 | } | ||
1325 | RE_ENTRANT_CHECK_ON; | ||
1326 | d += 0x0e; | ||
1327 | } | ||
1328 | else | ||
1329 | { | ||
1330 | RE_ENTRANT_CHECK_OFF; | ||
1331 | FPU_access_ok(VERIFY_WRITE, d, 7*4); | ||
1332 | #ifdef PECULIAR_486 | ||
1333 | control_word &= ~0xe080; | ||
1334 | /* An 80486 sets nearly all of the reserved bits to 1. */ | ||
1335 | control_word |= 0xffff0040; | ||
1336 | partial_status = status_word() | 0xffff0000; | ||
1337 | fpu_tag_word |= 0xffff0000; | ||
1338 | I387.soft.fcs &= ~0xf8000000; | ||
1339 | I387.soft.fos |= 0xffff0000; | ||
1340 | #endif /* PECULIAR_486 */ | ||
1341 | if (__copy_to_user(d, &control_word, 7*4)) | ||
1342 | FPU_abort; | ||
1343 | RE_ENTRANT_CHECK_ON; | ||
1344 | d += 0x1c; | ||
1345 | } | ||
1346 | |||
1347 | control_word |= CW_Exceptions; | ||
1348 | partial_status &= ~(SW_Summary | SW_Backward); | ||
1349 | |||
1350 | return d; | ||
1351 | } | ||
1352 | |||
1353 | |||
1354 | void fsave(fpu_addr_modes addr_modes, u_char __user *data_address) | ||
1355 | { | ||
1356 | u_char __user *d; | ||
1357 | int offset = (top & 7) * 10, other = 80 - offset; | ||
1358 | |||
1359 | d = fstenv(addr_modes, data_address); | ||
1360 | |||
1361 | RE_ENTRANT_CHECK_OFF; | ||
1362 | FPU_access_ok(VERIFY_WRITE,d,80); | ||
1363 | |||
1364 | /* Copy all registers in stack order. */ | ||
1365 | if (__copy_to_user(d, register_base+offset, other)) | ||
1366 | FPU_abort; | ||
1367 | if ( offset ) | ||
1368 | if (__copy_to_user(d+other, register_base, offset)) | ||
1369 | FPU_abort; | ||
1370 | RE_ENTRANT_CHECK_ON; | ||
1371 | |||
1372 | finit(); | ||
1373 | } | ||
1374 | |||
1375 | /*===========================================================================*/ | ||
diff --git a/arch/x86/math-emu/reg_mul.c b/arch/x86/math-emu/reg_mul.c new file mode 100644 index 000000000000..40f50b61bc67 --- /dev/null +++ b/arch/x86/math-emu/reg_mul.c | |||
@@ -0,0 +1,132 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | reg_mul.c | | ||
3 | | | | ||
4 | | Multiply one FPU_REG by another, put the result in a destination FPU_REG. | | ||
5 | | | | ||
6 | | Copyright (C) 1992,1993,1997 | | ||
7 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | | ||
8 | | E-mail billm@suburbia.net | | ||
9 | | | | ||
10 | | Returns the tag of the result if no exceptions or errors occurred. | | ||
11 | | | | ||
12 | +---------------------------------------------------------------------------*/ | ||
13 | |||
14 | /*---------------------------------------------------------------------------+ | ||
15 | | The destination may be any FPU_REG, including one of the source FPU_REGs. | | ||
16 | +---------------------------------------------------------------------------*/ | ||
17 | |||
18 | #include "fpu_emu.h" | ||
19 | #include "exception.h" | ||
20 | #include "reg_constant.h" | ||
21 | #include "fpu_system.h" | ||
22 | |||
23 | |||
24 | /* | ||
25 | Multiply two registers to give a register result. | ||
26 | The sources are st(deststnr) and (b,tagb,signb). | ||
27 | The destination is st(deststnr). | ||
28 | */ | ||
29 | /* This routine must be called with non-empty source registers */ | ||
30 | int FPU_mul(FPU_REG const *b, u_char tagb, int deststnr, int control_w) | ||
31 | { | ||
32 | FPU_REG *a = &st(deststnr); | ||
33 | FPU_REG *dest = a; | ||
34 | u_char taga = FPU_gettagi(deststnr); | ||
35 | u_char saved_sign = getsign(dest); | ||
36 | u_char sign = (getsign(a) ^ getsign(b)); | ||
37 | int tag; | ||
38 | |||
39 | |||
40 | if ( !(taga | tagb) ) | ||
41 | { | ||
42 | /* Both regs Valid, this should be the most common case. */ | ||
43 | |||
44 | tag = FPU_u_mul(a, b, dest, control_w, sign, exponent(a) + exponent(b)); | ||
45 | if ( tag < 0 ) | ||
46 | { | ||
47 | setsign(dest, saved_sign); | ||
48 | return tag; | ||
49 | } | ||
50 | FPU_settagi(deststnr, tag); | ||
51 | return tag; | ||
52 | } | ||
53 | |||
54 | if ( taga == TAG_Special ) | ||
55 | taga = FPU_Special(a); | ||
56 | if ( tagb == TAG_Special ) | ||
57 | tagb = FPU_Special(b); | ||
58 | |||
59 | if ( ((taga == TAG_Valid) && (tagb == TW_Denormal)) | ||
60 | || ((taga == TW_Denormal) && (tagb == TAG_Valid)) | ||
61 | || ((taga == TW_Denormal) && (tagb == TW_Denormal)) ) | ||
62 | { | ||
63 | FPU_REG x, y; | ||
64 | if ( denormal_operand() < 0 ) | ||
65 | return FPU_Exception; | ||
66 | |||
67 | FPU_to_exp16(a, &x); | ||
68 | FPU_to_exp16(b, &y); | ||
69 | tag = FPU_u_mul(&x, &y, dest, control_w, sign, | ||
70 | exponent16(&x) + exponent16(&y)); | ||
71 | if ( tag < 0 ) | ||
72 | { | ||
73 | setsign(dest, saved_sign); | ||
74 | return tag; | ||
75 | } | ||
76 | FPU_settagi(deststnr, tag); | ||
77 | return tag; | ||
78 | } | ||
79 | else if ( (taga <= TW_Denormal) && (tagb <= TW_Denormal) ) | ||
80 | { | ||
81 | if ( ((tagb == TW_Denormal) || (taga == TW_Denormal)) | ||
82 | && (denormal_operand() < 0) ) | ||
83 | return FPU_Exception; | ||
84 | |||
85 | /* Must have either both arguments == zero, or | ||
86 | one valid and the other zero. | ||
87 | The result is therefore zero. */ | ||
88 | FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); | ||
89 | /* The 80486 book says that the answer is +0, but a real | ||
90 | 80486 behaves this way. | ||
91 | IEEE-754 apparently says it should be this way. */ | ||
92 | setsign(dest, sign); | ||
93 | return TAG_Zero; | ||
94 | } | ||
95 | /* Must have infinities, NaNs, etc */ | ||
96 | else if ( (taga == TW_NaN) || (tagb == TW_NaN) ) | ||
97 | { | ||
98 | return real_2op_NaN(b, tagb, deststnr, &st(0)); | ||
99 | } | ||
100 | else if ( ((taga == TW_Infinity) && (tagb == TAG_Zero)) | ||
101 | || ((tagb == TW_Infinity) && (taga == TAG_Zero)) ) | ||
102 | { | ||
103 | return arith_invalid(deststnr); /* Zero*Infinity is invalid */ | ||
104 | } | ||
105 | else if ( ((taga == TW_Denormal) || (tagb == TW_Denormal)) | ||
106 | && (denormal_operand() < 0) ) | ||
107 | { | ||
108 | return FPU_Exception; | ||
109 | } | ||
110 | else if (taga == TW_Infinity) | ||
111 | { | ||
112 | FPU_copy_to_regi(a, TAG_Special, deststnr); | ||
113 | setsign(dest, sign); | ||
114 | return TAG_Special; | ||
115 | } | ||
116 | else if (tagb == TW_Infinity) | ||
117 | { | ||
118 | FPU_copy_to_regi(b, TAG_Special, deststnr); | ||
119 | setsign(dest, sign); | ||
120 | return TAG_Special; | ||
121 | } | ||
122 | |||
123 | #ifdef PARANOID | ||
124 | else | ||
125 | { | ||
126 | EXCEPTION(EX_INTERNAL|0x102); | ||
127 | return FPU_Exception; | ||
128 | } | ||
129 | #endif /* PARANOID */ | ||
130 | |||
131 | return 0; | ||
132 | } | ||
diff --git a/arch/x86/math-emu/reg_norm.S b/arch/x86/math-emu/reg_norm.S new file mode 100644 index 000000000000..8b6352efceef --- /dev/null +++ b/arch/x86/math-emu/reg_norm.S | |||
@@ -0,0 +1,147 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | reg_norm.S | | ||
3 | | | | ||
4 | | Copyright (C) 1992,1993,1994,1995,1997 | | ||
5 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | ||
6 | | Australia. E-mail billm@suburbia.net | | ||
7 | | | | ||
8 | | Normalize the value in a FPU_REG. | | ||
9 | | | | ||
10 | | Call from C as: | | ||
11 | | int FPU_normalize(FPU_REG *n) | | ||
12 | | | | ||
13 | | int FPU_normalize_nuo(FPU_REG *n) | | ||
14 | | | | ||
15 | | Return value is the tag of the answer, or-ed with FPU_Exception if | | ||
16 | | one was raised, or -1 on internal error. | | ||
17 | | | | ||
18 | +---------------------------------------------------------------------------*/ | ||
19 | |||
20 | #include "fpu_emu.h" | ||
21 | |||
22 | |||
23 | .text | ||
24 | ENTRY(FPU_normalize) | ||
25 | pushl %ebp | ||
26 | movl %esp,%ebp | ||
27 | pushl %ebx | ||
28 | |||
29 | movl PARAM1,%ebx | ||
30 | |||
31 | movl SIGH(%ebx),%edx | ||
32 | movl SIGL(%ebx),%eax | ||
33 | |||
34 | orl %edx,%edx /* ms bits */ | ||
35 | js L_done /* Already normalized */ | ||
36 | jnz L_shift_1 /* Shift left 1 - 31 bits */ | ||
37 | |||
38 | orl %eax,%eax | ||
39 | jz L_zero /* The contents are zero */ | ||
40 | |||
41 | movl %eax,%edx | ||
42 | xorl %eax,%eax | ||
43 | subw $32,EXP(%ebx) /* This can cause an underflow */ | ||
44 | |||
45 | /* We need to shift left by 1 - 31 bits */ | ||
46 | L_shift_1: | ||
47 | bsrl %edx,%ecx /* get the required shift in %ecx */ | ||
48 | subl $31,%ecx | ||
49 | negl %ecx | ||
50 | shld %cl,%eax,%edx | ||
51 | shl %cl,%eax | ||
52 | subw %cx,EXP(%ebx) /* This can cause an underflow */ | ||
53 | |||
54 | movl %edx,SIGH(%ebx) | ||
55 | movl %eax,SIGL(%ebx) | ||
56 | |||
57 | L_done: | ||
58 | cmpw EXP_OVER,EXP(%ebx) | ||
59 | jge L_overflow | ||
60 | |||
61 | cmpw EXP_UNDER,EXP(%ebx) | ||
62 | jle L_underflow | ||
63 | |||
64 | L_exit_valid: | ||
65 | movl TAG_Valid,%eax | ||
66 | |||
67 | /* Convert the exponent to 80x87 form. */ | ||
68 | addw EXTENDED_Ebias,EXP(%ebx) | ||
69 | andw $0x7fff,EXP(%ebx) | ||
70 | |||
71 | L_exit: | ||
72 | popl %ebx | ||
73 | leave | ||
74 | ret | ||
75 | |||
76 | |||
77 | L_zero: | ||
78 | movw $0,EXP(%ebx) | ||
79 | movl TAG_Zero,%eax | ||
80 | jmp L_exit | ||
81 | |||
82 | L_underflow: | ||
83 | /* Convert the exponent to 80x87 form. */ | ||
84 | addw EXTENDED_Ebias,EXP(%ebx) | ||
85 | push %ebx | ||
86 | call arith_underflow | ||
87 | pop %ebx | ||
88 | jmp L_exit | ||
89 | |||
90 | L_overflow: | ||
91 | /* Convert the exponent to 80x87 form. */ | ||
92 | addw EXTENDED_Ebias,EXP(%ebx) | ||
93 | push %ebx | ||
94 | call arith_overflow | ||
95 | pop %ebx | ||
96 | jmp L_exit | ||
97 | |||
98 | |||
99 | |||
100 | /* Normalise without reporting underflow or overflow */ | ||
101 | ENTRY(FPU_normalize_nuo) | ||
102 | pushl %ebp | ||
103 | movl %esp,%ebp | ||
104 | pushl %ebx | ||
105 | |||
106 | movl PARAM1,%ebx | ||
107 | |||
108 | movl SIGH(%ebx),%edx | ||
109 | movl SIGL(%ebx),%eax | ||
110 | |||
111 | orl %edx,%edx /* ms bits */ | ||
112 | js L_exit_nuo_valid /* Already normalized */ | ||
113 | jnz L_nuo_shift_1 /* Shift left 1 - 31 bits */ | ||
114 | |||
115 | orl %eax,%eax | ||
116 | jz L_exit_nuo_zero /* The contents are zero */ | ||
117 | |||
118 | movl %eax,%edx | ||
119 | xorl %eax,%eax | ||
120 | subw $32,EXP(%ebx) /* This can cause an underflow */ | ||
121 | |||
122 | /* We need to shift left by 1 - 31 bits */ | ||
123 | L_nuo_shift_1: | ||
124 | bsrl %edx,%ecx /* get the required shift in %ecx */ | ||
125 | subl $31,%ecx | ||
126 | negl %ecx | ||
127 | shld %cl,%eax,%edx | ||
128 | shl %cl,%eax | ||
129 | subw %cx,EXP(%ebx) /* This can cause an underflow */ | ||
130 | |||
131 | movl %edx,SIGH(%ebx) | ||
132 | movl %eax,SIGL(%ebx) | ||
133 | |||
134 | L_exit_nuo_valid: | ||
135 | movl TAG_Valid,%eax | ||
136 | |||
137 | popl %ebx | ||
138 | leave | ||
139 | ret | ||
140 | |||
141 | L_exit_nuo_zero: | ||
142 | movl TAG_Zero,%eax | ||
143 | movw EXP_UNDER,EXP(%ebx) | ||
144 | |||
145 | popl %ebx | ||
146 | leave | ||
147 | ret | ||
diff --git a/arch/x86/math-emu/reg_round.S b/arch/x86/math-emu/reg_round.S new file mode 100644 index 000000000000..d1d4e48b4f67 --- /dev/null +++ b/arch/x86/math-emu/reg_round.S | |||
@@ -0,0 +1,708 @@ | |||
1 | .file "reg_round.S" | ||
2 | /*---------------------------------------------------------------------------+ | ||
3 | | reg_round.S | | ||
4 | | | | ||
5 | | Rounding/truncation/etc for FPU basic arithmetic functions. | | ||
6 | | | | ||
7 | | Copyright (C) 1993,1995,1997 | | ||
8 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | ||
9 | | Australia. E-mail billm@suburbia.net | | ||
10 | | | | ||
11 | | This code has four possible entry points. | | ||
12 | | The following must be entered by a jmp instruction: | | ||
13 | | fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. | | ||
14 | | | | ||
15 | | The FPU_round entry point is intended to be used by C code. | | ||
16 | | From C, call as: | | ||
17 | | int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) | | ||
18 | | | | ||
19 | | Return value is the tag of the answer, or-ed with FPU_Exception if | | ||
20 | | one was raised, or -1 on internal error. | | ||
21 | | | | ||
22 | | For correct "up" and "down" rounding, the argument must have the correct | | ||
23 | | sign. | | ||
24 | | | | ||
25 | +---------------------------------------------------------------------------*/ | ||
26 | |||
27 | /*---------------------------------------------------------------------------+ | ||
28 | | Four entry points. | | ||
29 | | | | ||
30 | | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: | | ||
31 | | %eax:%ebx 64 bit significand | | ||
32 | | %edx 32 bit extension of the significand | | ||
33 | | %edi pointer to an FPU_REG for the result to be stored | | ||
34 | | stack calling function must have set up a C stack frame and | | ||
35 | | pushed %esi, %edi, and %ebx | | ||
36 | | | | ||
37 | | Needed just for the fpu_reg_round_sqrt entry point: | | ||
38 | | %cx A control word in the same format as the FPU control word. | | ||
39 | | Otherwise, PARAM4 must give such a value. | | ||
40 | | | | ||
41 | | | | ||
42 | | The significand and its extension are assumed to be exact in the | | ||
43 | | following sense: | | ||
44 | | If the significand by itself is the exact result then the significand | | ||
45 | | extension (%edx) must contain 0, otherwise the significand extension | | ||
46 | | must be non-zero. | | ||
47 | | If the significand extension is non-zero then the significand is | | ||
48 | | smaller than the magnitude of the correct exact result by an amount | | ||
49 | | greater than zero and less than one ls bit of the significand. | | ||
50 | | The significand extension is only required to have three possible | | ||
51 | | non-zero values: | | ||
52 | | less than 0x80000000 <=> the significand is less than 1/2 an ls | | ||
53 | | bit smaller than the magnitude of the | | ||
54 | | true exact result. | | ||
55 | | exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit | | ||
56 | | smaller than the magnitude of the true | | ||
57 | | exact result. | | ||
58 | | greater than 0x80000000 <=> the significand is more than 1/2 an ls | | ||
59 | | bit smaller than the magnitude of the | | ||
60 | | true exact result. | | ||
61 | | | | ||
62 | +---------------------------------------------------------------------------*/ | ||
63 | |||
64 | /*---------------------------------------------------------------------------+ | ||
65 | | The code in this module has become quite complex, but it should handle | | ||
66 | | all of the FPU flags which are set at this stage of the basic arithmetic | | ||
67 | | computations. | | ||
68 | | There are a few rare cases where the results are not set identically to | | ||
69 | | a real FPU. These require a bit more thought because at this stage the | | ||
70 | | results of the code here appear to be more consistent... | | ||
71 | | This may be changed in a future version. | | ||
72 | +---------------------------------------------------------------------------*/ | ||
73 | |||
74 | |||
75 | #include "fpu_emu.h" | ||
76 | #include "exception.h" | ||
77 | #include "control_w.h" | ||
78 | |||
79 | /* Flags for FPU_bits_lost */ | ||
80 | #define LOST_DOWN $1 | ||
81 | #define LOST_UP $2 | ||
82 | |||
83 | /* Flags for FPU_denormal */ | ||
84 | #define DENORMAL $1 | ||
85 | #define UNMASKED_UNDERFLOW $2 | ||
86 | |||
87 | |||
88 | #ifndef NON_REENTRANT_FPU | ||
89 | /* Make the code re-entrant by putting | ||
90 | local storage on the stack: */ | ||
91 | #define FPU_bits_lost (%esp) | ||
92 | #define FPU_denormal 1(%esp) | ||
93 | |||
94 | #else | ||
95 | /* Not re-entrant, so we can gain speed by putting | ||
96 | local storage in a static area: */ | ||
97 | .data | ||
98 | .align 4,0 | ||
99 | FPU_bits_lost: | ||
100 | .byte 0 | ||
101 | FPU_denormal: | ||
102 | .byte 0 | ||
103 | #endif /* NON_REENTRANT_FPU */ | ||
104 | |||
105 | |||
106 | .text | ||
107 | .globl fpu_reg_round | ||
108 | .globl fpu_Arith_exit | ||
109 | |||
110 | /* Entry point when called from C */ | ||
111 | ENTRY(FPU_round) | ||
112 | pushl %ebp | ||
113 | movl %esp,%ebp | ||
114 | pushl %esi | ||
115 | pushl %edi | ||
116 | pushl %ebx | ||
117 | |||
118 | movl PARAM1,%edi | ||
119 | movl SIGH(%edi),%eax | ||
120 | movl SIGL(%edi),%ebx | ||
121 | movl PARAM2,%edx | ||
122 | |||
123 | fpu_reg_round: /* Normal entry point */ | ||
124 | movl PARAM4,%ecx | ||
125 | |||
126 | #ifndef NON_REENTRANT_FPU | ||
127 | pushl %ebx /* adjust the stack pointer */ | ||
128 | #endif /* NON_REENTRANT_FPU */ | ||
129 | |||
130 | #ifdef PARANOID | ||
131 | /* Cannot use this here yet */ | ||
132 | /* orl %eax,%eax */ | ||
133 | /* jns L_entry_bugged */ | ||
134 | #endif /* PARANOID */ | ||
135 | |||
136 | cmpw EXP_UNDER,EXP(%edi) | ||
137 | jle L_Make_denorm /* The number is a de-normal */ | ||
138 | |||
139 | movb $0,FPU_denormal /* 0 -> not a de-normal */ | ||
140 | |||
141 | Denorm_done: | ||
142 | movb $0,FPU_bits_lost /* No bits yet lost in rounding */ | ||
143 | |||
144 | movl %ecx,%esi | ||
145 | andl CW_PC,%ecx | ||
146 | cmpl PR_64_BITS,%ecx | ||
147 | je LRound_To_64 | ||
148 | |||
149 | cmpl PR_53_BITS,%ecx | ||
150 | je LRound_To_53 | ||
151 | |||
152 | cmpl PR_24_BITS,%ecx | ||
153 | je LRound_To_24 | ||
154 | |||
155 | #ifdef PECULIAR_486 | ||
156 | /* With the precision control bits set to 01 "(reserved)", a real 80486 | ||
157 | behaves as if the precision control bits were set to 11 "64 bits" */ | ||
158 | cmpl PR_RESERVED_BITS,%ecx | ||
159 | je LRound_To_64 | ||
160 | #ifdef PARANOID | ||
161 | jmp L_bugged_denorm_486 | ||
162 | #endif /* PARANOID */ | ||
163 | #else | ||
164 | #ifdef PARANOID | ||
165 | jmp L_bugged_denorm /* There is no bug, just a bad control word */ | ||
166 | #endif /* PARANOID */ | ||
167 | #endif /* PECULIAR_486 */ | ||
168 | |||
169 | |||
170 | /* Round etc to 24 bit precision */ | ||
171 | LRound_To_24: | ||
172 | movl %esi,%ecx | ||
173 | andl CW_RC,%ecx | ||
174 | cmpl RC_RND,%ecx | ||
175 | je LRound_nearest_24 | ||
176 | |||
177 | cmpl RC_CHOP,%ecx | ||
178 | je LCheck_truncate_24 | ||
179 | |||
180 | cmpl RC_UP,%ecx /* Towards +infinity */ | ||
181 | je LUp_24 | ||
182 | |||
183 | cmpl RC_DOWN,%ecx /* Towards -infinity */ | ||
184 | je LDown_24 | ||
185 | |||
186 | #ifdef PARANOID | ||
187 | jmp L_bugged_round24 | ||
188 | #endif /* PARANOID */ | ||
189 | |||
190 | LUp_24: | ||
191 | cmpb SIGN_POS,PARAM5 | ||
192 | jne LCheck_truncate_24 /* If negative then up==truncate */ | ||
193 | |||
194 | jmp LCheck_24_round_up | ||
195 | |||
196 | LDown_24: | ||
197 | cmpb SIGN_POS,PARAM5 | ||
198 | je LCheck_truncate_24 /* If positive then down==truncate */ | ||
199 | |||
200 | LCheck_24_round_up: | ||
201 | movl %eax,%ecx | ||
202 | andl $0x000000ff,%ecx | ||
203 | orl %ebx,%ecx | ||
204 | orl %edx,%ecx | ||
205 | jnz LDo_24_round_up | ||
206 | jmp L_Re_normalise | ||
207 | |||
208 | LRound_nearest_24: | ||
209 | /* Do rounding of the 24th bit if needed (nearest or even) */ | ||
210 | movl %eax,%ecx | ||
211 | andl $0x000000ff,%ecx | ||
212 | cmpl $0x00000080,%ecx | ||
213 | jc LCheck_truncate_24 /* less than half, no increment needed */ | ||
214 | |||
215 | jne LGreater_Half_24 /* greater than half, increment needed */ | ||
216 | |||
217 | /* Possibly half, we need to check the ls bits */ | ||
218 | orl %ebx,%ebx | ||
219 | jnz LGreater_Half_24 /* greater than half, increment needed */ | ||
220 | |||
221 | orl %edx,%edx | ||
222 | jnz LGreater_Half_24 /* greater than half, increment needed */ | ||
223 | |||
224 | /* Exactly half, increment only if 24th bit is 1 (round to even) */ | ||
225 | testl $0x00000100,%eax | ||
226 | jz LDo_truncate_24 | ||
227 | |||
228 | LGreater_Half_24: /* Rounding: increment at the 24th bit */ | ||
229 | LDo_24_round_up: | ||
230 | andl $0xffffff00,%eax /* Truncate to 24 bits */ | ||
231 | xorl %ebx,%ebx | ||
232 | movb LOST_UP,FPU_bits_lost | ||
233 | addl $0x00000100,%eax | ||
234 | jmp LCheck_Round_Overflow | ||
235 | |||
236 | LCheck_truncate_24: | ||
237 | movl %eax,%ecx | ||
238 | andl $0x000000ff,%ecx | ||
239 | orl %ebx,%ecx | ||
240 | orl %edx,%ecx | ||
241 | jz L_Re_normalise /* No truncation needed */ | ||
242 | |||
243 | LDo_truncate_24: | ||
244 | andl $0xffffff00,%eax /* Truncate to 24 bits */ | ||
245 | xorl %ebx,%ebx | ||
246 | movb LOST_DOWN,FPU_bits_lost | ||
247 | jmp L_Re_normalise | ||
248 | |||
249 | |||
250 | /* Round etc to 53 bit precision */ | ||
251 | LRound_To_53: | ||
252 | movl %esi,%ecx | ||
253 | andl CW_RC,%ecx | ||
254 | cmpl RC_RND,%ecx | ||
255 | je LRound_nearest_53 | ||
256 | |||
257 | cmpl RC_CHOP,%ecx | ||
258 | je LCheck_truncate_53 | ||
259 | |||
260 | cmpl RC_UP,%ecx /* Towards +infinity */ | ||
261 | je LUp_53 | ||
262 | |||
263 | cmpl RC_DOWN,%ecx /* Towards -infinity */ | ||
264 | je LDown_53 | ||
265 | |||
266 | #ifdef PARANOID | ||
267 | jmp L_bugged_round53 | ||
268 | #endif /* PARANOID */ | ||
269 | |||
270 | LUp_53: | ||
271 | cmpb SIGN_POS,PARAM5 | ||
272 | jne LCheck_truncate_53 /* If negative then up==truncate */ | ||
273 | |||
274 | jmp LCheck_53_round_up | ||
275 | |||
276 | LDown_53: | ||
277 | cmpb SIGN_POS,PARAM5 | ||
278 | je LCheck_truncate_53 /* If positive then down==truncate */ | ||
279 | |||
280 | LCheck_53_round_up: | ||
281 | movl %ebx,%ecx | ||
282 | andl $0x000007ff,%ecx | ||
283 | orl %edx,%ecx | ||
284 | jnz LDo_53_round_up | ||
285 | jmp L_Re_normalise | ||
286 | |||
287 | LRound_nearest_53: | ||
288 | /* Do rounding of the 53rd bit if needed (nearest or even) */ | ||
289 | movl %ebx,%ecx | ||
290 | andl $0x000007ff,%ecx | ||
291 | cmpl $0x00000400,%ecx | ||
292 | jc LCheck_truncate_53 /* less than half, no increment needed */ | ||
293 | |||
294 | jnz LGreater_Half_53 /* greater than half, increment needed */ | ||
295 | |||
296 | /* Possibly half, we need to check the ls bits */ | ||
297 | orl %edx,%edx | ||
298 | jnz LGreater_Half_53 /* greater than half, increment needed */ | ||
299 | |||
300 | /* Exactly half, increment only if 53rd bit is 1 (round to even) */ | ||
301 | testl $0x00000800,%ebx | ||
302 | jz LTruncate_53 | ||
303 | |||
304 | LGreater_Half_53: /* Rounding: increment at the 53rd bit */ | ||
305 | LDo_53_round_up: | ||
306 | movb LOST_UP,FPU_bits_lost | ||
307 | andl $0xfffff800,%ebx /* Truncate to 53 bits */ | ||
308 | addl $0x00000800,%ebx | ||
309 | adcl $0,%eax | ||
310 | jmp LCheck_Round_Overflow | ||
311 | |||
312 | LCheck_truncate_53: | ||
313 | movl %ebx,%ecx | ||
314 | andl $0x000007ff,%ecx | ||
315 | orl %edx,%ecx | ||
316 | jz L_Re_normalise | ||
317 | |||
318 | LTruncate_53: | ||
319 | movb LOST_DOWN,FPU_bits_lost | ||
320 | andl $0xfffff800,%ebx /* Truncate to 53 bits */ | ||
321 | jmp L_Re_normalise | ||
322 | |||
323 | |||
324 | /* Round etc to 64 bit precision */ | ||
325 | LRound_To_64: | ||
326 | movl %esi,%ecx | ||
327 | andl CW_RC,%ecx | ||
328 | cmpl RC_RND,%ecx | ||
329 | je LRound_nearest_64 | ||
330 | |||
331 | cmpl RC_CHOP,%ecx | ||
332 | je LCheck_truncate_64 | ||
333 | |||
334 | cmpl RC_UP,%ecx /* Towards +infinity */ | ||
335 | je LUp_64 | ||
336 | |||
337 | cmpl RC_DOWN,%ecx /* Towards -infinity */ | ||
338 | je LDown_64 | ||
339 | |||
340 | #ifdef PARANOID | ||
341 | jmp L_bugged_round64 | ||
342 | #endif /* PARANOID */ | ||
343 | |||
344 | LUp_64: | ||
345 | cmpb SIGN_POS,PARAM5 | ||
346 | jne LCheck_truncate_64 /* If negative then up==truncate */ | ||
347 | |||
348 | orl %edx,%edx | ||
349 | jnz LDo_64_round_up | ||
350 | jmp L_Re_normalise | ||
351 | |||
352 | LDown_64: | ||
353 | cmpb SIGN_POS,PARAM5 | ||
354 | je LCheck_truncate_64 /* If positive then down==truncate */ | ||
355 | |||
356 | orl %edx,%edx | ||
357 | jnz LDo_64_round_up | ||
358 | jmp L_Re_normalise | ||
359 | |||
360 | LRound_nearest_64: | ||
361 | cmpl $0x80000000,%edx | ||
362 | jc LCheck_truncate_64 | ||
363 | |||
364 | jne LDo_64_round_up | ||
365 | |||
366 | /* Now test for round-to-even */ | ||
367 | testb $1,%bl | ||
368 | jz LCheck_truncate_64 | ||
369 | |||
370 | LDo_64_round_up: | ||
371 | movb LOST_UP,FPU_bits_lost | ||
372 | addl $1,%ebx | ||
373 | adcl $0,%eax | ||
374 | |||
375 | LCheck_Round_Overflow: | ||
376 | jnc L_Re_normalise | ||
377 | |||
378 | /* Overflow, adjust the result (significand to 1.0) */ | ||
379 | rcrl $1,%eax | ||
380 | rcrl $1,%ebx | ||
381 | incw EXP(%edi) | ||
382 | jmp L_Re_normalise | ||
383 | |||
384 | LCheck_truncate_64: | ||
385 | orl %edx,%edx | ||
386 | jz L_Re_normalise | ||
387 | |||
388 | LTruncate_64: | ||
389 | movb LOST_DOWN,FPU_bits_lost | ||
390 | |||
391 | L_Re_normalise: | ||
392 | testb $0xff,FPU_denormal | ||
393 | jnz Normalise_result | ||
394 | |||
395 | L_Normalised: | ||
396 | movl TAG_Valid,%edx | ||
397 | |||
398 | L_deNormalised: | ||
399 | cmpb LOST_UP,FPU_bits_lost | ||
400 | je L_precision_lost_up | ||
401 | |||
402 | cmpb LOST_DOWN,FPU_bits_lost | ||
403 | je L_precision_lost_down | ||
404 | |||
405 | L_no_precision_loss: | ||
406 | /* store the result */ | ||
407 | |||
408 | L_Store_significand: | ||
409 | movl %eax,SIGH(%edi) | ||
410 | movl %ebx,SIGL(%edi) | ||
411 | |||
412 | cmpw EXP_OVER,EXP(%edi) | ||
413 | jge L_overflow | ||
414 | |||
415 | movl %edx,%eax | ||
416 | |||
417 | /* Convert the exponent to 80x87 form. */ | ||
418 | addw EXTENDED_Ebias,EXP(%edi) | ||
419 | andw $0x7fff,EXP(%edi) | ||
420 | |||
421 | fpu_reg_round_signed_special_exit: | ||
422 | |||
423 | cmpb SIGN_POS,PARAM5 | ||
424 | je fpu_reg_round_special_exit | ||
425 | |||
426 | orw $0x8000,EXP(%edi) /* Negative sign for the result. */ | ||
427 | |||
428 | fpu_reg_round_special_exit: | ||
429 | |||
430 | #ifndef NON_REENTRANT_FPU | ||
431 | popl %ebx /* adjust the stack pointer */ | ||
432 | #endif /* NON_REENTRANT_FPU */ | ||
433 | |||
434 | fpu_Arith_exit: | ||
435 | popl %ebx | ||
436 | popl %edi | ||
437 | popl %esi | ||
438 | leave | ||
439 | ret | ||
440 | |||
441 | |||
442 | /* | ||
443 | * Set the FPU status flags to represent precision loss due to | ||
444 | * round-up. | ||
445 | */ | ||
446 | L_precision_lost_up: | ||
447 | push %edx | ||
448 | push %eax | ||
449 | call set_precision_flag_up | ||
450 | popl %eax | ||
451 | popl %edx | ||
452 | jmp L_no_precision_loss | ||
453 | |||
454 | /* | ||
455 | * Set the FPU status flags to represent precision loss due to | ||
456 | * truncation. | ||
457 | */ | ||
458 | L_precision_lost_down: | ||
459 | push %edx | ||
460 | push %eax | ||
461 | call set_precision_flag_down | ||
462 | popl %eax | ||
463 | popl %edx | ||
464 | jmp L_no_precision_loss | ||
465 | |||
466 | |||
467 | /* | ||
468 | * The number is a denormal (which might get rounded up to a normal) | ||
469 | * Shift the number right the required number of bits, which will | ||
470 | * have to be undone later... | ||
471 | */ | ||
472 | L_Make_denorm: | ||
473 | /* The action to be taken depends upon whether the underflow | ||
474 | exception is masked */ | ||
475 | testb CW_Underflow,%cl /* Underflow mask. */ | ||
476 | jz Unmasked_underflow /* Do not make a denormal. */ | ||
477 | |||
478 | movb DENORMAL,FPU_denormal | ||
479 | |||
480 | pushl %ecx /* Save */ | ||
481 | movw EXP_UNDER+1,%cx | ||
482 | subw EXP(%edi),%cx | ||
483 | |||
484 | cmpw $64,%cx /* shrd only works for 0..31 bits */ | ||
485 | jnc Denorm_shift_more_than_63 | ||
486 | |||
487 | cmpw $32,%cx /* shrd only works for 0..31 bits */ | ||
488 | jnc Denorm_shift_more_than_32 | ||
489 | |||
490 | /* | ||
491 | * We got here without jumps by assuming that the most common requirement | ||
492 | * is for a small de-normalising shift. | ||
493 | * Shift by [1..31] bits | ||
494 | */ | ||
495 | addw %cx,EXP(%edi) | ||
496 | orl %edx,%edx /* extension */ | ||
497 | setne %ch /* Save whether %edx is non-zero */ | ||
498 | xorl %edx,%edx | ||
499 | shrd %cl,%ebx,%edx | ||
500 | shrd %cl,%eax,%ebx | ||
501 | shr %cl,%eax | ||
502 | orb %ch,%dl | ||
503 | popl %ecx | ||
504 | jmp Denorm_done | ||
505 | |||
506 | /* Shift by [32..63] bits */ | ||
507 | Denorm_shift_more_than_32: | ||
508 | addw %cx,EXP(%edi) | ||
509 | subb $32,%cl | ||
510 | orl %edx,%edx | ||
511 | setne %ch | ||
512 | orb %ch,%bl | ||
513 | xorl %edx,%edx | ||
514 | shrd %cl,%ebx,%edx | ||
515 | shrd %cl,%eax,%ebx | ||
516 | shr %cl,%eax | ||
517 | orl %edx,%edx /* test these 32 bits */ | ||
518 | setne %cl | ||
519 | orb %ch,%bl | ||
520 | orb %cl,%bl | ||
521 | movl %ebx,%edx | ||
522 | movl %eax,%ebx | ||
523 | xorl %eax,%eax | ||
524 | popl %ecx | ||
525 | jmp Denorm_done | ||
526 | |||
527 | /* Shift by [64..) bits */ | ||
528 | Denorm_shift_more_than_63: | ||
529 | cmpw $64,%cx | ||
530 | jne Denorm_shift_more_than_64 | ||
531 | |||
532 | /* Exactly 64 bit shift */ | ||
533 | addw %cx,EXP(%edi) | ||
534 | xorl %ecx,%ecx | ||
535 | orl %edx,%edx | ||
536 | setne %cl | ||
537 | orl %ebx,%ebx | ||
538 | setne %ch | ||
539 | orb %ch,%cl | ||
540 | orb %cl,%al | ||
541 | movl %eax,%edx | ||
542 | xorl %eax,%eax | ||
543 | xorl %ebx,%ebx | ||
544 | popl %ecx | ||
545 | jmp Denorm_done | ||
546 | |||
547 | Denorm_shift_more_than_64: | ||
548 | movw EXP_UNDER+1,EXP(%edi) | ||
549 | /* This is easy, %eax must be non-zero, so.. */ | ||
550 | movl $1,%edx | ||
551 | xorl %eax,%eax | ||
552 | xorl %ebx,%ebx | ||
553 | popl %ecx | ||
554 | jmp Denorm_done | ||
555 | |||
556 | |||
557 | Unmasked_underflow: | ||
558 | movb UNMASKED_UNDERFLOW,FPU_denormal | ||
559 | jmp Denorm_done | ||
560 | |||
561 | |||
562 | /* Undo the de-normalisation. */ | ||
563 | Normalise_result: | ||
564 | cmpb UNMASKED_UNDERFLOW,FPU_denormal | ||
565 | je Signal_underflow | ||
566 | |||
567 | /* The number must be a denormal if we got here. */ | ||
568 | #ifdef PARANOID | ||
569 | /* But check it... just in case. */ | ||
570 | cmpw EXP_UNDER+1,EXP(%edi) | ||
571 | jne L_norm_bugged | ||
572 | #endif /* PARANOID */ | ||
573 | |||
574 | #ifdef PECULIAR_486 | ||
575 | /* | ||
576 | * This implements a special feature of 80486 behaviour. | ||
577 | * Underflow will be signalled even if the number is | ||
578 | * not a denormal after rounding. | ||
579 | * This difference occurs only for masked underflow, and not | ||
580 | * in the unmasked case. | ||
581 | * Actual 80486 behaviour differs from this in some circumstances. | ||
582 | */ | ||
583 | orl %eax,%eax /* ms bits */ | ||
584 | js LPseudoDenormal /* Will be masked underflow */ | ||
585 | #else | ||
586 | orl %eax,%eax /* ms bits */ | ||
587 | js L_Normalised /* No longer a denormal */ | ||
588 | #endif /* PECULIAR_486 */ | ||
589 | |||
590 | jnz LDenormal_adj_exponent | ||
591 | |||
592 | orl %ebx,%ebx | ||
593 | jz L_underflow_to_zero /* The contents are zero */ | ||
594 | |||
595 | LDenormal_adj_exponent: | ||
596 | decw EXP(%edi) | ||
597 | |||
598 | LPseudoDenormal: | ||
599 | testb $0xff,FPU_bits_lost /* bits lost == underflow */ | ||
600 | movl TAG_Special,%edx | ||
601 | jz L_deNormalised | ||
602 | |||
603 | /* There must be a masked underflow */ | ||
604 | push %eax | ||
605 | pushl EX_Underflow | ||
606 | call EXCEPTION | ||
607 | popl %eax | ||
608 | popl %eax | ||
609 | movl TAG_Special,%edx | ||
610 | jmp L_deNormalised | ||
611 | |||
612 | |||
613 | /* | ||
614 | * The operations resulted in a number too small to represent. | ||
615 | * Masked response. | ||
616 | */ | ||
617 | L_underflow_to_zero: | ||
618 | push %eax | ||
619 | call set_precision_flag_down | ||
620 | popl %eax | ||
621 | |||
622 | push %eax | ||
623 | pushl EX_Underflow | ||
624 | call EXCEPTION | ||
625 | popl %eax | ||
626 | popl %eax | ||
627 | |||
628 | /* Reduce the exponent to EXP_UNDER */ | ||
629 | movw EXP_UNDER,EXP(%edi) | ||
630 | movl TAG_Zero,%edx | ||
631 | jmp L_Store_significand | ||
632 | |||
633 | |||
634 | /* The operations resulted in a number too large to represent. */ | ||
635 | L_overflow: | ||
636 | addw EXTENDED_Ebias,EXP(%edi) /* Set for unmasked response. */ | ||
637 | push %edi | ||
638 | call arith_overflow | ||
639 | pop %edi | ||
640 | jmp fpu_reg_round_signed_special_exit | ||
641 | |||
642 | |||
643 | Signal_underflow: | ||
644 | /* The number may have been changed to a non-denormal */ | ||
645 | /* by the rounding operations. */ | ||
646 | cmpw EXP_UNDER,EXP(%edi) | ||
647 | jle Do_unmasked_underflow | ||
648 | |||
649 | jmp L_Normalised | ||
650 | |||
651 | Do_unmasked_underflow: | ||
652 | /* Increase the exponent by the magic number */ | ||
653 | addw $(3*(1<<13)),EXP(%edi) | ||
654 | push %eax | ||
655 | pushl EX_Underflow | ||
656 | call EXCEPTION | ||
657 | popl %eax | ||
658 | popl %eax | ||
659 | jmp L_Normalised | ||
660 | |||
661 | |||
662 | #ifdef PARANOID | ||
663 | #ifdef PECULIAR_486 | ||
664 | L_bugged_denorm_486: | ||
665 | pushl EX_INTERNAL|0x236 | ||
666 | call EXCEPTION | ||
667 | popl %ebx | ||
668 | jmp L_exception_exit | ||
669 | #else | ||
670 | L_bugged_denorm: | ||
671 | pushl EX_INTERNAL|0x230 | ||
672 | call EXCEPTION | ||
673 | popl %ebx | ||
674 | jmp L_exception_exit | ||
675 | #endif /* PECULIAR_486 */ | ||
676 | |||
677 | L_bugged_round24: | ||
678 | pushl EX_INTERNAL|0x231 | ||
679 | call EXCEPTION | ||
680 | popl %ebx | ||
681 | jmp L_exception_exit | ||
682 | |||
683 | L_bugged_round53: | ||
684 | pushl EX_INTERNAL|0x232 | ||
685 | call EXCEPTION | ||
686 | popl %ebx | ||
687 | jmp L_exception_exit | ||
688 | |||
689 | L_bugged_round64: | ||
690 | pushl EX_INTERNAL|0x233 | ||
691 | call EXCEPTION | ||
692 | popl %ebx | ||
693 | jmp L_exception_exit | ||
694 | |||
695 | L_norm_bugged: | ||
696 | pushl EX_INTERNAL|0x234 | ||
697 | call EXCEPTION | ||
698 | popl %ebx | ||
699 | jmp L_exception_exit | ||
700 | |||
701 | L_entry_bugged: | ||
702 | pushl EX_INTERNAL|0x235 | ||
703 | call EXCEPTION | ||
704 | popl %ebx | ||
705 | L_exception_exit: | ||
706 | mov $-1,%eax | ||
707 | jmp fpu_reg_round_special_exit | ||
708 | #endif /* PARANOID */ | ||
diff --git a/arch/x86/math-emu/reg_u_add.S b/arch/x86/math-emu/reg_u_add.S new file mode 100644 index 000000000000..47c4c2434d85 --- /dev/null +++ b/arch/x86/math-emu/reg_u_add.S | |||
@@ -0,0 +1,167 @@ | |||
1 | .file "reg_u_add.S" | ||
2 | /*---------------------------------------------------------------------------+ | ||
3 | | reg_u_add.S | | ||
4 | | | | ||
5 | | Add two valid (TAG_Valid) FPU_REG numbers, of the same sign, and put the | | ||
6 | | result in a destination FPU_REG. | | ||
7 | | | | ||
8 | | Copyright (C) 1992,1993,1995,1997 | | ||
9 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | | ||
10 | | E-mail billm@suburbia.net | | ||
11 | | | | ||
12 | | Call from C as: | | ||
13 | | int FPU_u_add(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ, | | ||
14 | | int control_w) | | ||
15 | | Return value is the tag of the answer, or-ed with FPU_Exception if | | ||
16 | | one was raised, or -1 on internal error. | | ||
17 | | | | ||
18 | +---------------------------------------------------------------------------*/ | ||
19 | |||
20 | /* | ||
21 | | Kernel addition routine FPU_u_add(reg *arg1, reg *arg2, reg *answ). | ||
22 | | Takes two valid reg f.p. numbers (TAG_Valid), which are | ||
23 | | treated as unsigned numbers, | ||
24 | | and returns their sum as a TAG_Valid or TAG_Special f.p. number. | ||
25 | | The returned number is normalized. | ||
26 | | Basic checks are performed if PARANOID is defined. | ||
27 | */ | ||
28 | |||
29 | #include "exception.h" | ||
30 | #include "fpu_emu.h" | ||
31 | #include "control_w.h" | ||
32 | |||
33 | .text | ||
34 | ENTRY(FPU_u_add) | ||
35 | pushl %ebp | ||
36 | movl %esp,%ebp | ||
37 | pushl %esi | ||
38 | pushl %edi | ||
39 | pushl %ebx | ||
40 | |||
41 | movl PARAM1,%esi /* source 1 */ | ||
42 | movl PARAM2,%edi /* source 2 */ | ||
43 | |||
44 | movl PARAM6,%ecx | ||
45 | movl %ecx,%edx | ||
46 | subl PARAM7,%ecx /* exp1 - exp2 */ | ||
47 | jge L_arg1_larger | ||
48 | |||
49 | /* num1 is smaller */ | ||
50 | movl SIGL(%esi),%ebx | ||
51 | movl SIGH(%esi),%eax | ||
52 | |||
53 | movl %edi,%esi | ||
54 | movl PARAM7,%edx | ||
55 | negw %cx | ||
56 | jmp L_accum_loaded | ||
57 | |||
58 | L_arg1_larger: | ||
59 | /* num1 has larger or equal exponent */ | ||
60 | movl SIGL(%edi),%ebx | ||
61 | movl SIGH(%edi),%eax | ||
62 | |||
63 | L_accum_loaded: | ||
64 | movl PARAM3,%edi /* destination */ | ||
65 | movw %dx,EXP(%edi) /* Copy exponent to destination */ | ||
66 | |||
67 | xorl %edx,%edx /* clear the extension */ | ||
68 | |||
69 | #ifdef PARANOID | ||
70 | testl $0x80000000,%eax | ||
71 | je L_bugged | ||
72 | |||
73 | testl $0x80000000,SIGH(%esi) | ||
74 | je L_bugged | ||
75 | #endif /* PARANOID */ | ||
76 | |||
77 | /* The number to be shifted is in %eax:%ebx:%edx */ | ||
78 | cmpw $32,%cx /* shrd only works for 0..31 bits */ | ||
79 | jnc L_more_than_31 | ||
80 | |||
81 | /* less than 32 bits */ | ||
82 | shrd %cl,%ebx,%edx | ||
83 | shrd %cl,%eax,%ebx | ||
84 | shr %cl,%eax | ||
85 | jmp L_shift_done | ||
86 | |||
87 | L_more_than_31: | ||
88 | cmpw $64,%cx | ||
89 | jnc L_more_than_63 | ||
90 | |||
91 | subb $32,%cl | ||
92 | jz L_exactly_32 | ||
93 | |||
94 | shrd %cl,%eax,%edx | ||
95 | shr %cl,%eax | ||
96 | orl %ebx,%ebx | ||
97 | jz L_more_31_no_low /* none of the lowest bits is set */ | ||
98 | |||
99 | orl $1,%edx /* record the fact in the extension */ | ||
100 | |||
101 | L_more_31_no_low: | ||
102 | movl %eax,%ebx | ||
103 | xorl %eax,%eax | ||
104 | jmp L_shift_done | ||
105 | |||
106 | L_exactly_32: | ||
107 | movl %ebx,%edx | ||
108 | movl %eax,%ebx | ||
109 | xorl %eax,%eax | ||
110 | jmp L_shift_done | ||
111 | |||
112 | L_more_than_63: | ||
113 | cmpw $65,%cx | ||
114 | jnc L_more_than_64 | ||
115 | |||
116 | movl %eax,%edx | ||
117 | orl %ebx,%ebx | ||
118 | jz L_more_63_no_low | ||
119 | |||
120 | orl $1,%edx | ||
121 | jmp L_more_63_no_low | ||
122 | |||
123 | L_more_than_64: | ||
124 | movl $1,%edx /* The shifted nr always at least one '1' */ | ||
125 | |||
126 | L_more_63_no_low: | ||
127 | xorl %ebx,%ebx | ||
128 | xorl %eax,%eax | ||
129 | |||
130 | L_shift_done: | ||
131 | /* Now do the addition */ | ||
132 | addl SIGL(%esi),%ebx | ||
133 | adcl SIGH(%esi),%eax | ||
134 | jnc L_round_the_result | ||
135 | |||
136 | /* Overflow, adjust the result */ | ||
137 | rcrl $1,%eax | ||
138 | rcrl $1,%ebx | ||
139 | rcrl $1,%edx | ||
140 | jnc L_no_bit_lost | ||
141 | |||
142 | orl $1,%edx | ||
143 | |||
144 | L_no_bit_lost: | ||
145 | incw EXP(%edi) | ||
146 | |||
147 | L_round_the_result: | ||
148 | jmp fpu_reg_round /* Round the result */ | ||
149 | |||
150 | |||
151 | |||
152 | #ifdef PARANOID | ||
153 | /* If we ever get here then we have problems! */ | ||
154 | L_bugged: | ||
155 | pushl EX_INTERNAL|0x201 | ||
156 | call EXCEPTION | ||
157 | pop %ebx | ||
158 | movl $-1,%eax | ||
159 | jmp L_exit | ||
160 | |||
161 | L_exit: | ||
162 | popl %ebx | ||
163 | popl %edi | ||
164 | popl %esi | ||
165 | leave | ||
166 | ret | ||
167 | #endif /* PARANOID */ | ||
diff --git a/arch/x86/math-emu/reg_u_div.S b/arch/x86/math-emu/reg_u_div.S new file mode 100644 index 000000000000..cc00654b6f9a --- /dev/null +++ b/arch/x86/math-emu/reg_u_div.S | |||
@@ -0,0 +1,471 @@ | |||
1 | .file "reg_u_div.S" | ||
2 | /*---------------------------------------------------------------------------+ | ||
3 | | reg_u_div.S | | ||
4 | | | | ||
5 | | Divide one FPU_REG by another and put the result in a destination FPU_REG.| | ||
6 | | | | ||
7 | | Copyright (C) 1992,1993,1995,1997 | | ||
8 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | | ||
9 | | E-mail billm@suburbia.net | | ||
10 | | | | ||
11 | | | | ||
12 | +---------------------------------------------------------------------------*/ | ||
13 | |||
14 | /*---------------------------------------------------------------------------+ | ||
15 | | Call from C as: | | ||
16 | | int FPU_u_div(FPU_REG *a, FPU_REG *b, FPU_REG *dest, | | ||
17 | | unsigned int control_word, char *sign) | | ||
18 | | | | ||
19 | | Does not compute the destination exponent, but does adjust it. | | ||
20 | | | | ||
21 | | Return value is the tag of the answer, or-ed with FPU_Exception if | | ||
22 | | one was raised, or -1 on internal error. | | ||
23 | +---------------------------------------------------------------------------*/ | ||
24 | |||
25 | #include "exception.h" | ||
26 | #include "fpu_emu.h" | ||
27 | #include "control_w.h" | ||
28 | |||
29 | |||
30 | /* #define dSIGL(x) (x) */ | ||
31 | /* #define dSIGH(x) 4(x) */ | ||
32 | |||
33 | |||
34 | #ifndef NON_REENTRANT_FPU | ||
35 | /* | ||
36 | Local storage on the stack: | ||
37 | Result: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 | ||
38 | Overflow flag: ovfl_flag | ||
39 | */ | ||
40 | #define FPU_accum_3 -4(%ebp) | ||
41 | #define FPU_accum_2 -8(%ebp) | ||
42 | #define FPU_accum_1 -12(%ebp) | ||
43 | #define FPU_accum_0 -16(%ebp) | ||
44 | #define FPU_result_1 -20(%ebp) | ||
45 | #define FPU_result_2 -24(%ebp) | ||
46 | #define FPU_ovfl_flag -28(%ebp) | ||
47 | |||
48 | #else | ||
49 | .data | ||
50 | /* | ||
51 | Local storage in a static area: | ||
52 | Result: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 | ||
53 | Overflow flag: ovfl_flag | ||
54 | */ | ||
55 | .align 4,0 | ||
56 | FPU_accum_3: | ||
57 | .long 0 | ||
58 | FPU_accum_2: | ||
59 | .long 0 | ||
60 | FPU_accum_1: | ||
61 | .long 0 | ||
62 | FPU_accum_0: | ||
63 | .long 0 | ||
64 | FPU_result_1: | ||
65 | .long 0 | ||
66 | FPU_result_2: | ||
67 | .long 0 | ||
68 | FPU_ovfl_flag: | ||
69 | .byte 0 | ||
70 | #endif /* NON_REENTRANT_FPU */ | ||
71 | |||
72 | #define REGA PARAM1 | ||
73 | #define REGB PARAM2 | ||
74 | #define DEST PARAM3 | ||
75 | |||
76 | .text | ||
77 | ENTRY(FPU_u_div) | ||
78 | pushl %ebp | ||
79 | movl %esp,%ebp | ||
80 | #ifndef NON_REENTRANT_FPU | ||
81 | subl $28,%esp | ||
82 | #endif /* NON_REENTRANT_FPU */ | ||
83 | |||
84 | pushl %esi | ||
85 | pushl %edi | ||
86 | pushl %ebx | ||
87 | |||
88 | movl REGA,%esi | ||
89 | movl REGB,%ebx | ||
90 | movl DEST,%edi | ||
91 | |||
92 | movswl EXP(%esi),%edx | ||
93 | movswl EXP(%ebx),%eax | ||
94 | subl %eax,%edx | ||
95 | addl EXP_BIAS,%edx | ||
96 | |||
97 | /* A denormal and a large number can cause an exponent underflow */ | ||
98 | cmpl EXP_WAY_UNDER,%edx | ||
99 | jg xExp_not_underflow | ||
100 | |||
101 | /* Set to a really low value allow correct handling */ | ||
102 | movl EXP_WAY_UNDER,%edx | ||
103 | |||
104 | xExp_not_underflow: | ||
105 | |||
106 | movw %dx,EXP(%edi) | ||
107 | |||
108 | #ifdef PARANOID | ||
109 | /* testl $0x80000000, SIGH(%esi) // Dividend */ | ||
110 | /* je L_bugged */ | ||
111 | testl $0x80000000, SIGH(%ebx) /* Divisor */ | ||
112 | je L_bugged | ||
113 | #endif /* PARANOID */ | ||
114 | |||
115 | /* Check if the divisor can be treated as having just 32 bits */ | ||
116 | cmpl $0,SIGL(%ebx) | ||
117 | jnz L_Full_Division /* Can't do a quick divide */ | ||
118 | |||
119 | /* We should be able to zip through the division here */ | ||
120 | movl SIGH(%ebx),%ecx /* The divisor */ | ||
121 | movl SIGH(%esi),%edx /* Dividend */ | ||
122 | movl SIGL(%esi),%eax /* Dividend */ | ||
123 | |||
124 | cmpl %ecx,%edx | ||
125 | setaeb FPU_ovfl_flag /* Keep a record */ | ||
126 | jb L_no_adjust | ||
127 | |||
128 | subl %ecx,%edx /* Prevent the overflow */ | ||
129 | |||
130 | L_no_adjust: | ||
131 | /* Divide the 64 bit number by the 32 bit denominator */ | ||
132 | divl %ecx | ||
133 | movl %eax,FPU_result_2 | ||
134 | |||
135 | /* Work on the remainder of the first division */ | ||
136 | xorl %eax,%eax | ||
137 | divl %ecx | ||
138 | movl %eax,FPU_result_1 | ||
139 | |||
140 | /* Work on the remainder of the 64 bit division */ | ||
141 | xorl %eax,%eax | ||
142 | divl %ecx | ||
143 | |||
144 | testb $255,FPU_ovfl_flag /* was the num > denom ? */ | ||
145 | je L_no_overflow | ||
146 | |||
147 | /* Do the shifting here */ | ||
148 | /* increase the exponent */ | ||
149 | incw EXP(%edi) | ||
150 | |||
151 | /* shift the mantissa right one bit */ | ||
152 | stc /* To set the ms bit */ | ||
153 | rcrl FPU_result_2 | ||
154 | rcrl FPU_result_1 | ||
155 | rcrl %eax | ||
156 | |||
157 | L_no_overflow: | ||
158 | jmp LRound_precision /* Do the rounding as required */ | ||
159 | |||
160 | |||
161 | /*---------------------------------------------------------------------------+ | ||
162 | | Divide: Return arg1/arg2 to arg3. | | ||
163 | | | | ||
164 | | This routine does not use the exponents of arg1 and arg2, but does | | ||
165 | | adjust the exponent of arg3. | | ||
166 | | | | ||
167 | | The maximum returned value is (ignoring exponents) | | ||
168 | | .ffffffff ffffffff | | ||
169 | | ------------------ = 1.ffffffff fffffffe | | ||
170 | | .80000000 00000000 | | ||
171 | | and the minimum is | | ||
172 | | .80000000 00000000 | | ||
173 | | ------------------ = .80000000 00000001 (rounded) | | ||
174 | | .ffffffff ffffffff | | ||
175 | | | | ||
176 | +---------------------------------------------------------------------------*/ | ||
177 | |||
178 | |||
179 | L_Full_Division: | ||
180 | /* Save extended dividend in local register */ | ||
181 | movl SIGL(%esi),%eax | ||
182 | movl %eax,FPU_accum_2 | ||
183 | movl SIGH(%esi),%eax | ||
184 | movl %eax,FPU_accum_3 | ||
185 | xorl %eax,%eax | ||
186 | movl %eax,FPU_accum_1 /* zero the extension */ | ||
187 | movl %eax,FPU_accum_0 /* zero the extension */ | ||
188 | |||
189 | movl SIGL(%esi),%eax /* Get the current num */ | ||
190 | movl SIGH(%esi),%edx | ||
191 | |||
192 | /*----------------------------------------------------------------------*/ | ||
193 | /* Initialization done. | ||
194 | Do the first 32 bits. */ | ||
195 | |||
196 | movb $0,FPU_ovfl_flag | ||
197 | cmpl SIGH(%ebx),%edx /* Test for imminent overflow */ | ||
198 | jb LLess_than_1 | ||
199 | ja LGreater_than_1 | ||
200 | |||
201 | cmpl SIGL(%ebx),%eax | ||
202 | jb LLess_than_1 | ||
203 | |||
204 | LGreater_than_1: | ||
205 | /* The dividend is greater or equal, would cause overflow */ | ||
206 | setaeb FPU_ovfl_flag /* Keep a record */ | ||
207 | |||
208 | subl SIGL(%ebx),%eax | ||
209 | sbbl SIGH(%ebx),%edx /* Prevent the overflow */ | ||
210 | movl %eax,FPU_accum_2 | ||
211 | movl %edx,FPU_accum_3 | ||
212 | |||
213 | LLess_than_1: | ||
214 | /* At this point, we have a dividend < divisor, with a record of | ||
215 | adjustment in FPU_ovfl_flag */ | ||
216 | |||
217 | /* We will divide by a number which is too large */ | ||
218 | movl SIGH(%ebx),%ecx | ||
219 | addl $1,%ecx | ||
220 | jnc LFirst_div_not_1 | ||
221 | |||
222 | /* here we need to divide by 100000000h, | ||
223 | i.e., no division at all.. */ | ||
224 | mov %edx,%eax | ||
225 | jmp LFirst_div_done | ||
226 | |||
227 | LFirst_div_not_1: | ||
228 | divl %ecx /* Divide the numerator by the augmented | ||
229 | denom ms dw */ | ||
230 | |||
231 | LFirst_div_done: | ||
232 | movl %eax,FPU_result_2 /* Put the result in the answer */ | ||
233 | |||
234 | mull SIGH(%ebx) /* mul by the ms dw of the denom */ | ||
235 | |||
236 | subl %eax,FPU_accum_2 /* Subtract from the num local reg */ | ||
237 | sbbl %edx,FPU_accum_3 | ||
238 | |||
239 | movl FPU_result_2,%eax /* Get the result back */ | ||
240 | mull SIGL(%ebx) /* now mul the ls dw of the denom */ | ||
241 | |||
242 | subl %eax,FPU_accum_1 /* Subtract from the num local reg */ | ||
243 | sbbl %edx,FPU_accum_2 | ||
244 | sbbl $0,FPU_accum_3 | ||
245 | je LDo_2nd_32_bits /* Must check for non-zero result here */ | ||
246 | |||
247 | #ifdef PARANOID | ||
248 | jb L_bugged_1 | ||
249 | #endif /* PARANOID */ | ||
250 | |||
251 | /* need to subtract another once of the denom */ | ||
252 | incl FPU_result_2 /* Correct the answer */ | ||
253 | |||
254 | movl SIGL(%ebx),%eax | ||
255 | movl SIGH(%ebx),%edx | ||
256 | subl %eax,FPU_accum_1 /* Subtract from the num local reg */ | ||
257 | sbbl %edx,FPU_accum_2 | ||
258 | |||
259 | #ifdef PARANOID | ||
260 | sbbl $0,FPU_accum_3 | ||
261 | jne L_bugged_1 /* Must check for non-zero result here */ | ||
262 | #endif /* PARANOID */ | ||
263 | |||
264 | /*----------------------------------------------------------------------*/ | ||
265 | /* Half of the main problem is done, there is just a reduced numerator | ||
266 | to handle now. | ||
267 | Work with the second 32 bits, FPU_accum_0 not used from now on */ | ||
268 | LDo_2nd_32_bits: | ||
269 | movl FPU_accum_2,%edx /* get the reduced num */ | ||
270 | movl FPU_accum_1,%eax | ||
271 | |||
272 | /* need to check for possible subsequent overflow */ | ||
273 | cmpl SIGH(%ebx),%edx | ||
274 | jb LDo_2nd_div | ||
275 | ja LPrevent_2nd_overflow | ||
276 | |||
277 | cmpl SIGL(%ebx),%eax | ||
278 | jb LDo_2nd_div | ||
279 | |||
280 | LPrevent_2nd_overflow: | ||
281 | /* The numerator is greater or equal, would cause overflow */ | ||
282 | /* prevent overflow */ | ||
283 | subl SIGL(%ebx),%eax | ||
284 | sbbl SIGH(%ebx),%edx | ||
285 | movl %edx,FPU_accum_2 | ||
286 | movl %eax,FPU_accum_1 | ||
287 | |||
288 | incl FPU_result_2 /* Reflect the subtraction in the answer */ | ||
289 | |||
290 | #ifdef PARANOID | ||
291 | je L_bugged_2 /* Can't bump the result to 1.0 */ | ||
292 | #endif /* PARANOID */ | ||
293 | |||
294 | LDo_2nd_div: | ||
295 | cmpl $0,%ecx /* augmented denom msw */ | ||
296 | jnz LSecond_div_not_1 | ||
297 | |||
298 | /* %ecx == 0, we are dividing by 1.0 */ | ||
299 | mov %edx,%eax | ||
300 | jmp LSecond_div_done | ||
301 | |||
302 | LSecond_div_not_1: | ||
303 | divl %ecx /* Divide the numerator by the denom ms dw */ | ||
304 | |||
305 | LSecond_div_done: | ||
306 | movl %eax,FPU_result_1 /* Put the result in the answer */ | ||
307 | |||
308 | mull SIGH(%ebx) /* mul by the ms dw of the denom */ | ||
309 | |||
310 | subl %eax,FPU_accum_1 /* Subtract from the num local reg */ | ||
311 | sbbl %edx,FPU_accum_2 | ||
312 | |||
313 | #ifdef PARANOID | ||
314 | jc L_bugged_2 | ||
315 | #endif /* PARANOID */ | ||
316 | |||
317 | movl FPU_result_1,%eax /* Get the result back */ | ||
318 | mull SIGL(%ebx) /* now mul the ls dw of the denom */ | ||
319 | |||
320 | subl %eax,FPU_accum_0 /* Subtract from the num local reg */ | ||
321 | sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */ | ||
322 | sbbl $0,FPU_accum_2 | ||
323 | |||
324 | #ifdef PARANOID | ||
325 | jc L_bugged_2 | ||
326 | #endif /* PARANOID */ | ||
327 | |||
328 | jz LDo_3rd_32_bits | ||
329 | |||
330 | #ifdef PARANOID | ||
331 | cmpl $1,FPU_accum_2 | ||
332 | jne L_bugged_2 | ||
333 | #endif /* PARANOID */ | ||
334 | |||
335 | /* need to subtract another once of the denom */ | ||
336 | movl SIGL(%ebx),%eax | ||
337 | movl SIGH(%ebx),%edx | ||
338 | subl %eax,FPU_accum_0 /* Subtract from the num local reg */ | ||
339 | sbbl %edx,FPU_accum_1 | ||
340 | sbbl $0,FPU_accum_2 | ||
341 | |||
342 | #ifdef PARANOID | ||
343 | jc L_bugged_2 | ||
344 | jne L_bugged_2 | ||
345 | #endif /* PARANOID */ | ||
346 | |||
347 | addl $1,FPU_result_1 /* Correct the answer */ | ||
348 | adcl $0,FPU_result_2 | ||
349 | |||
350 | #ifdef PARANOID | ||
351 | jc L_bugged_2 /* Must check for non-zero result here */ | ||
352 | #endif /* PARANOID */ | ||
353 | |||
354 | /*----------------------------------------------------------------------*/ | ||
355 | /* The division is essentially finished here, we just need to perform | ||
356 | tidying operations. | ||
357 | Deal with the 3rd 32 bits */ | ||
358 | LDo_3rd_32_bits: | ||
359 | movl FPU_accum_1,%edx /* get the reduced num */ | ||
360 | movl FPU_accum_0,%eax | ||
361 | |||
362 | /* need to check for possible subsequent overflow */ | ||
363 | cmpl SIGH(%ebx),%edx /* denom */ | ||
364 | jb LRound_prep | ||
365 | ja LPrevent_3rd_overflow | ||
366 | |||
367 | cmpl SIGL(%ebx),%eax /* denom */ | ||
368 | jb LRound_prep | ||
369 | |||
370 | LPrevent_3rd_overflow: | ||
371 | /* prevent overflow */ | ||
372 | subl SIGL(%ebx),%eax | ||
373 | sbbl SIGH(%ebx),%edx | ||
374 | movl %edx,FPU_accum_1 | ||
375 | movl %eax,FPU_accum_0 | ||
376 | |||
377 | addl $1,FPU_result_1 /* Reflect the subtraction in the answer */ | ||
378 | adcl $0,FPU_result_2 | ||
379 | jne LRound_prep | ||
380 | jnc LRound_prep | ||
381 | |||
382 | /* This is a tricky spot, there is an overflow of the answer */ | ||
383 | movb $255,FPU_ovfl_flag /* Overflow -> 1.000 */ | ||
384 | |||
385 | LRound_prep: | ||
386 | /* | ||
387 | * Prepare for rounding. | ||
388 | * To test for rounding, we just need to compare 2*accum with the | ||
389 | * denom. | ||
390 | */ | ||
391 | movl FPU_accum_0,%ecx | ||
392 | movl FPU_accum_1,%edx | ||
393 | movl %ecx,%eax | ||
394 | orl %edx,%eax | ||
395 | jz LRound_ovfl /* The accumulator contains zero. */ | ||
396 | |||
397 | /* Multiply by 2 */ | ||
398 | clc | ||
399 | rcll $1,%ecx | ||
400 | rcll $1,%edx | ||
401 | jc LRound_large /* No need to compare, denom smaller */ | ||
402 | |||
403 | subl SIGL(%ebx),%ecx | ||
404 | sbbl SIGH(%ebx),%edx | ||
405 | jnc LRound_not_small | ||
406 | |||
407 | movl $0x70000000,%eax /* Denom was larger */ | ||
408 | jmp LRound_ovfl | ||
409 | |||
410 | LRound_not_small: | ||
411 | jnz LRound_large | ||
412 | |||
413 | movl $0x80000000,%eax /* Remainder was exactly 1/2 denom */ | ||
414 | jmp LRound_ovfl | ||
415 | |||
416 | LRound_large: | ||
417 | movl $0xff000000,%eax /* Denom was smaller */ | ||
418 | |||
419 | LRound_ovfl: | ||
420 | /* We are now ready to deal with rounding, but first we must get | ||
421 | the bits properly aligned */ | ||
422 | testb $255,FPU_ovfl_flag /* was the num > denom ? */ | ||
423 | je LRound_precision | ||
424 | |||
425 | incw EXP(%edi) | ||
426 | |||
427 | /* shift the mantissa right one bit */ | ||
428 | stc /* Will set the ms bit */ | ||
429 | rcrl FPU_result_2 | ||
430 | rcrl FPU_result_1 | ||
431 | rcrl %eax | ||
432 | |||
433 | /* Round the result as required */ | ||
434 | LRound_precision: | ||
435 | decw EXP(%edi) /* binary point between 1st & 2nd bits */ | ||
436 | |||
437 | movl %eax,%edx | ||
438 | movl FPU_result_1,%ebx | ||
439 | movl FPU_result_2,%eax | ||
440 | jmp fpu_reg_round | ||
441 | |||
442 | |||
443 | #ifdef PARANOID | ||
444 | /* The logic is wrong if we got here */ | ||
445 | L_bugged: | ||
446 | pushl EX_INTERNAL|0x202 | ||
447 | call EXCEPTION | ||
448 | pop %ebx | ||
449 | jmp L_exit | ||
450 | |||
451 | L_bugged_1: | ||
452 | pushl EX_INTERNAL|0x203 | ||
453 | call EXCEPTION | ||
454 | pop %ebx | ||
455 | jmp L_exit | ||
456 | |||
457 | L_bugged_2: | ||
458 | pushl EX_INTERNAL|0x204 | ||
459 | call EXCEPTION | ||
460 | pop %ebx | ||
461 | jmp L_exit | ||
462 | |||
463 | L_exit: | ||
464 | movl $-1,%eax | ||
465 | popl %ebx | ||
466 | popl %edi | ||
467 | popl %esi | ||
468 | |||
469 | leave | ||
470 | ret | ||
471 | #endif /* PARANOID */ | ||
diff --git a/arch/x86/math-emu/reg_u_mul.S b/arch/x86/math-emu/reg_u_mul.S new file mode 100644 index 000000000000..973f12af97df --- /dev/null +++ b/arch/x86/math-emu/reg_u_mul.S | |||
@@ -0,0 +1,148 @@ | |||
1 | .file "reg_u_mul.S" | ||
2 | /*---------------------------------------------------------------------------+ | ||
3 | | reg_u_mul.S | | ||
4 | | | | ||
5 | | Core multiplication routine | | ||
6 | | | | ||
7 | | Copyright (C) 1992,1993,1995,1997 | | ||
8 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | | ||
9 | | E-mail billm@suburbia.net | | ||
10 | | | | ||
11 | | | | ||
12 | +---------------------------------------------------------------------------*/ | ||
13 | |||
14 | /*---------------------------------------------------------------------------+ | ||
15 | | Basic multiplication routine. | | ||
16 | | Does not check the resulting exponent for overflow/underflow | | ||
17 | | | | ||
18 | | FPU_u_mul(FPU_REG *a, FPU_REG *b, FPU_REG *c, unsigned int cw); | | ||
19 | | | | ||
20 | | Internal working is at approx 128 bits. | | ||
21 | | Result is rounded to nearest 53 or 64 bits, using "nearest or even". | | ||
22 | +---------------------------------------------------------------------------*/ | ||
23 | |||
24 | #include "exception.h" | ||
25 | #include "fpu_emu.h" | ||
26 | #include "control_w.h" | ||
27 | |||
28 | |||
29 | |||
30 | #ifndef NON_REENTRANT_FPU | ||
31 | /* Local storage on the stack: */ | ||
32 | #define FPU_accum_0 -4(%ebp) /* ms word */ | ||
33 | #define FPU_accum_1 -8(%ebp) | ||
34 | |||
35 | #else | ||
36 | /* Local storage in a static area: */ | ||
37 | .data | ||
38 | .align 4,0 | ||
39 | FPU_accum_0: | ||
40 | .long 0 | ||
41 | FPU_accum_1: | ||
42 | .long 0 | ||
43 | #endif /* NON_REENTRANT_FPU */ | ||
44 | |||
45 | |||
46 | .text | ||
47 | ENTRY(FPU_u_mul) | ||
48 | pushl %ebp | ||
49 | movl %esp,%ebp | ||
50 | #ifndef NON_REENTRANT_FPU | ||
51 | subl $8,%esp | ||
52 | #endif /* NON_REENTRANT_FPU */ | ||
53 | |||
54 | pushl %esi | ||
55 | pushl %edi | ||
56 | pushl %ebx | ||
57 | |||
58 | movl PARAM1,%esi | ||
59 | movl PARAM2,%edi | ||
60 | |||
61 | #ifdef PARANOID | ||
62 | testl $0x80000000,SIGH(%esi) | ||
63 | jz L_bugged | ||
64 | testl $0x80000000,SIGH(%edi) | ||
65 | jz L_bugged | ||
66 | #endif /* PARANOID */ | ||
67 | |||
68 | xorl %ecx,%ecx | ||
69 | xorl %ebx,%ebx | ||
70 | |||
71 | movl SIGL(%esi),%eax | ||
72 | mull SIGL(%edi) | ||
73 | movl %eax,FPU_accum_0 | ||
74 | movl %edx,FPU_accum_1 | ||
75 | |||
76 | movl SIGL(%esi),%eax | ||
77 | mull SIGH(%edi) | ||
78 | addl %eax,FPU_accum_1 | ||
79 | adcl %edx,%ebx | ||
80 | /* adcl $0,%ecx // overflow here is not possible */ | ||
81 | |||
82 | movl SIGH(%esi),%eax | ||
83 | mull SIGL(%edi) | ||
84 | addl %eax,FPU_accum_1 | ||
85 | adcl %edx,%ebx | ||
86 | adcl $0,%ecx | ||
87 | |||
88 | movl SIGH(%esi),%eax | ||
89 | mull SIGH(%edi) | ||
90 | addl %eax,%ebx | ||
91 | adcl %edx,%ecx | ||
92 | |||
93 | /* Get the sum of the exponents. */ | ||
94 | movl PARAM6,%eax | ||
95 | subl EXP_BIAS-1,%eax | ||
96 | |||
97 | /* Two denormals can cause an exponent underflow */ | ||
98 | cmpl EXP_WAY_UNDER,%eax | ||
99 | jg Exp_not_underflow | ||
100 | |||
101 | /* Set to a really low value allow correct handling */ | ||
102 | movl EXP_WAY_UNDER,%eax | ||
103 | |||
104 | Exp_not_underflow: | ||
105 | |||
106 | /* Have now finished with the sources */ | ||
107 | movl PARAM3,%edi /* Point to the destination */ | ||
108 | movw %ax,EXP(%edi) | ||
109 | |||
110 | /* Now make sure that the result is normalized */ | ||
111 | testl $0x80000000,%ecx | ||
112 | jnz LResult_Normalised | ||
113 | |||
114 | /* Normalize by shifting left one bit */ | ||
115 | shll $1,FPU_accum_0 | ||
116 | rcll $1,FPU_accum_1 | ||
117 | rcll $1,%ebx | ||
118 | rcll $1,%ecx | ||
119 | decw EXP(%edi) | ||
120 | |||
121 | LResult_Normalised: | ||
122 | movl FPU_accum_0,%eax | ||
123 | movl FPU_accum_1,%edx | ||
124 | orl %eax,%eax | ||
125 | jz L_extent_zero | ||
126 | |||
127 | orl $1,%edx | ||
128 | |||
129 | L_extent_zero: | ||
130 | movl %ecx,%eax | ||
131 | jmp fpu_reg_round | ||
132 | |||
133 | |||
134 | #ifdef PARANOID | ||
135 | L_bugged: | ||
136 | pushl EX_INTERNAL|0x205 | ||
137 | call EXCEPTION | ||
138 | pop %ebx | ||
139 | jmp L_exit | ||
140 | |||
141 | L_exit: | ||
142 | popl %ebx | ||
143 | popl %edi | ||
144 | popl %esi | ||
145 | leave | ||
146 | ret | ||
147 | #endif /* PARANOID */ | ||
148 | |||
diff --git a/arch/x86/math-emu/reg_u_sub.S b/arch/x86/math-emu/reg_u_sub.S new file mode 100644 index 000000000000..1b6c24801d22 --- /dev/null +++ b/arch/x86/math-emu/reg_u_sub.S | |||
@@ -0,0 +1,272 @@ | |||
1 | .file "reg_u_sub.S" | ||
2 | /*---------------------------------------------------------------------------+ | ||
3 | | reg_u_sub.S | | ||
4 | | | | ||
5 | | Core floating point subtraction routine. | | ||
6 | | | | ||
7 | | Copyright (C) 1992,1993,1995,1997 | | ||
8 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | | ||
9 | | E-mail billm@suburbia.net | | ||
10 | | | | ||
11 | | Call from C as: | | ||
12 | | int FPU_u_sub(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ, | | ||
13 | | int control_w) | | ||
14 | | Return value is the tag of the answer, or-ed with FPU_Exception if | | ||
15 | | one was raised, or -1 on internal error. | | ||
16 | | | | ||
17 | +---------------------------------------------------------------------------*/ | ||
18 | |||
19 | /* | ||
20 | | Kernel subtraction routine FPU_u_sub(reg *arg1, reg *arg2, reg *answ). | ||
21 | | Takes two valid reg f.p. numbers (TAG_Valid), which are | ||
22 | | treated as unsigned numbers, | ||
23 | | and returns their difference as a TAG_Valid or TAG_Zero f.p. | ||
24 | | number. | ||
25 | | The first number (arg1) must be the larger. | ||
26 | | The returned number is normalized. | ||
27 | | Basic checks are performed if PARANOID is defined. | ||
28 | */ | ||
29 | |||
30 | #include "exception.h" | ||
31 | #include "fpu_emu.h" | ||
32 | #include "control_w.h" | ||
33 | |||
34 | .text | ||
35 | ENTRY(FPU_u_sub) | ||
36 | pushl %ebp | ||
37 | movl %esp,%ebp | ||
38 | pushl %esi | ||
39 | pushl %edi | ||
40 | pushl %ebx | ||
41 | |||
42 | movl PARAM1,%esi /* source 1 */ | ||
43 | movl PARAM2,%edi /* source 2 */ | ||
44 | |||
45 | movl PARAM6,%ecx | ||
46 | subl PARAM7,%ecx /* exp1 - exp2 */ | ||
47 | |||
48 | #ifdef PARANOID | ||
49 | /* source 2 is always smaller than source 1 */ | ||
50 | js L_bugged_1 | ||
51 | |||
52 | testl $0x80000000,SIGH(%edi) /* The args are assumed to be be normalized */ | ||
53 | je L_bugged_2 | ||
54 | |||
55 | testl $0x80000000,SIGH(%esi) | ||
56 | je L_bugged_2 | ||
57 | #endif /* PARANOID */ | ||
58 | |||
59 | /*--------------------------------------+ | ||
60 | | Form a register holding the | | ||
61 | | smaller number | | ||
62 | +--------------------------------------*/ | ||
63 | movl SIGH(%edi),%eax /* register ms word */ | ||
64 | movl SIGL(%edi),%ebx /* register ls word */ | ||
65 | |||
66 | movl PARAM3,%edi /* destination */ | ||
67 | movl PARAM6,%edx | ||
68 | movw %dx,EXP(%edi) /* Copy exponent to destination */ | ||
69 | |||
70 | xorl %edx,%edx /* register extension */ | ||
71 | |||
72 | /*--------------------------------------+ | ||
73 | | Shift the temporary register | | ||
74 | | right the required number of | | ||
75 | | places. | | ||
76 | +--------------------------------------*/ | ||
77 | |||
78 | cmpw $32,%cx /* shrd only works for 0..31 bits */ | ||
79 | jnc L_more_than_31 | ||
80 | |||
81 | /* less than 32 bits */ | ||
82 | shrd %cl,%ebx,%edx | ||
83 | shrd %cl,%eax,%ebx | ||
84 | shr %cl,%eax | ||
85 | jmp L_shift_done | ||
86 | |||
87 | L_more_than_31: | ||
88 | cmpw $64,%cx | ||
89 | jnc L_more_than_63 | ||
90 | |||
91 | subb $32,%cl | ||
92 | jz L_exactly_32 | ||
93 | |||
94 | shrd %cl,%eax,%edx | ||
95 | shr %cl,%eax | ||
96 | orl %ebx,%ebx | ||
97 | jz L_more_31_no_low /* none of the lowest bits is set */ | ||
98 | |||
99 | orl $1,%edx /* record the fact in the extension */ | ||
100 | |||
101 | L_more_31_no_low: | ||
102 | movl %eax,%ebx | ||
103 | xorl %eax,%eax | ||
104 | jmp L_shift_done | ||
105 | |||
106 | L_exactly_32: | ||
107 | movl %ebx,%edx | ||
108 | movl %eax,%ebx | ||
109 | xorl %eax,%eax | ||
110 | jmp L_shift_done | ||
111 | |||
112 | L_more_than_63: | ||
113 | cmpw $65,%cx | ||
114 | jnc L_more_than_64 | ||
115 | |||
116 | /* Shift right by 64 bits */ | ||
117 | movl %eax,%edx | ||
118 | orl %ebx,%ebx | ||
119 | jz L_more_63_no_low | ||
120 | |||
121 | orl $1,%edx | ||
122 | jmp L_more_63_no_low | ||
123 | |||
124 | L_more_than_64: | ||
125 | jne L_more_than_65 | ||
126 | |||
127 | /* Shift right by 65 bits */ | ||
128 | /* Carry is clear if we get here */ | ||
129 | movl %eax,%edx | ||
130 | rcrl %edx | ||
131 | jnc L_shift_65_nc | ||
132 | |||
133 | orl $1,%edx | ||
134 | jmp L_more_63_no_low | ||
135 | |||
136 | L_shift_65_nc: | ||
137 | orl %ebx,%ebx | ||
138 | jz L_more_63_no_low | ||
139 | |||
140 | orl $1,%edx | ||
141 | jmp L_more_63_no_low | ||
142 | |||
143 | L_more_than_65: | ||
144 | movl $1,%edx /* The shifted nr always at least one '1' */ | ||
145 | |||
146 | L_more_63_no_low: | ||
147 | xorl %ebx,%ebx | ||
148 | xorl %eax,%eax | ||
149 | |||
150 | L_shift_done: | ||
151 | L_subtr: | ||
152 | /*------------------------------+ | ||
153 | | Do the subtraction | | ||
154 | +------------------------------*/ | ||
155 | xorl %ecx,%ecx | ||
156 | subl %edx,%ecx | ||
157 | movl %ecx,%edx | ||
158 | movl SIGL(%esi),%ecx | ||
159 | sbbl %ebx,%ecx | ||
160 | movl %ecx,%ebx | ||
161 | movl SIGH(%esi),%ecx | ||
162 | sbbl %eax,%ecx | ||
163 | movl %ecx,%eax | ||
164 | |||
165 | #ifdef PARANOID | ||
166 | /* We can never get a borrow */ | ||
167 | jc L_bugged | ||
168 | #endif /* PARANOID */ | ||
169 | |||
170 | /*--------------------------------------+ | ||
171 | | Normalize the result | | ||
172 | +--------------------------------------*/ | ||
173 | testl $0x80000000,%eax | ||
174 | jnz L_round /* no shifting needed */ | ||
175 | |||
176 | orl %eax,%eax | ||
177 | jnz L_shift_1 /* shift left 1 - 31 bits */ | ||
178 | |||
179 | orl %ebx,%ebx | ||
180 | jnz L_shift_32 /* shift left 32 - 63 bits */ | ||
181 | |||
182 | /* | ||
183 | * A rare case, the only one which is non-zero if we got here | ||
184 | * is: 1000000 .... 0000 | ||
185 | * -0111111 .... 1111 1 | ||
186 | * -------------------- | ||
187 | * 0000000 .... 0000 1 | ||
188 | */ | ||
189 | |||
190 | cmpl $0x80000000,%edx | ||
191 | jnz L_must_be_zero | ||
192 | |||
193 | /* Shift left 64 bits */ | ||
194 | subw $64,EXP(%edi) | ||
195 | xchg %edx,%eax | ||
196 | jmp fpu_reg_round | ||
197 | |||
198 | L_must_be_zero: | ||
199 | #ifdef PARANOID | ||
200 | orl %edx,%edx | ||
201 | jnz L_bugged_3 | ||
202 | #endif /* PARANOID */ | ||
203 | |||
204 | /* The result is zero */ | ||
205 | movw $0,EXP(%edi) /* exponent */ | ||
206 | movl $0,SIGL(%edi) | ||
207 | movl $0,SIGH(%edi) | ||
208 | movl TAG_Zero,%eax | ||
209 | jmp L_exit | ||
210 | |||
211 | L_shift_32: | ||
212 | movl %ebx,%eax | ||
213 | movl %edx,%ebx | ||
214 | movl $0,%edx | ||
215 | subw $32,EXP(%edi) /* Can get underflow here */ | ||
216 | |||
217 | /* We need to shift left by 1 - 31 bits */ | ||
218 | L_shift_1: | ||
219 | bsrl %eax,%ecx /* get the required shift in %ecx */ | ||
220 | subl $31,%ecx | ||
221 | negl %ecx | ||
222 | shld %cl,%ebx,%eax | ||
223 | shld %cl,%edx,%ebx | ||
224 | shl %cl,%edx | ||
225 | subw %cx,EXP(%edi) /* Can get underflow here */ | ||
226 | |||
227 | L_round: | ||
228 | jmp fpu_reg_round /* Round the result */ | ||
229 | |||
230 | |||
231 | #ifdef PARANOID | ||
232 | L_bugged_1: | ||
233 | pushl EX_INTERNAL|0x206 | ||
234 | call EXCEPTION | ||
235 | pop %ebx | ||
236 | jmp L_error_exit | ||
237 | |||
238 | L_bugged_2: | ||
239 | pushl EX_INTERNAL|0x209 | ||
240 | call EXCEPTION | ||
241 | pop %ebx | ||
242 | jmp L_error_exit | ||
243 | |||
244 | L_bugged_3: | ||
245 | pushl EX_INTERNAL|0x210 | ||
246 | call EXCEPTION | ||
247 | pop %ebx | ||
248 | jmp L_error_exit | ||
249 | |||
250 | L_bugged_4: | ||
251 | pushl EX_INTERNAL|0x211 | ||
252 | call EXCEPTION | ||
253 | pop %ebx | ||
254 | jmp L_error_exit | ||
255 | |||
256 | L_bugged: | ||
257 | pushl EX_INTERNAL|0x212 | ||
258 | call EXCEPTION | ||
259 | pop %ebx | ||
260 | jmp L_error_exit | ||
261 | |||
262 | L_error_exit: | ||
263 | movl $-1,%eax | ||
264 | |||
265 | #endif /* PARANOID */ | ||
266 | |||
267 | L_exit: | ||
268 | popl %ebx | ||
269 | popl %edi | ||
270 | popl %esi | ||
271 | leave | ||
272 | ret | ||
diff --git a/arch/x86/math-emu/round_Xsig.S b/arch/x86/math-emu/round_Xsig.S new file mode 100644 index 000000000000..bbe0e87718e4 --- /dev/null +++ b/arch/x86/math-emu/round_Xsig.S | |||
@@ -0,0 +1,141 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | round_Xsig.S | | ||
3 | | | | ||
4 | | Copyright (C) 1992,1993,1994,1995 | | ||
5 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | ||
6 | | Australia. E-mail billm@jacobi.maths.monash.edu.au | | ||
7 | | | | ||
8 | | Normalize and round a 12 byte quantity. | | ||
9 | | Call from C as: | | ||
10 | | int round_Xsig(Xsig *n) | | ||
11 | | | | ||
12 | | Normalize a 12 byte quantity. | | ||
13 | | Call from C as: | | ||
14 | | int norm_Xsig(Xsig *n) | | ||
15 | | | | ||
16 | | Each function returns the size of the shift (nr of bits). | | ||
17 | | | | ||
18 | +---------------------------------------------------------------------------*/ | ||
19 | .file "round_Xsig.S" | ||
20 | |||
21 | #include "fpu_emu.h" | ||
22 | |||
23 | |||
24 | .text | ||
25 | ENTRY(round_Xsig) | ||
26 | pushl %ebp | ||
27 | movl %esp,%ebp | ||
28 | pushl %ebx /* Reserve some space */ | ||
29 | pushl %ebx | ||
30 | pushl %esi | ||
31 | |||
32 | movl PARAM1,%esi | ||
33 | |||
34 | movl 8(%esi),%edx | ||
35 | movl 4(%esi),%ebx | ||
36 | movl (%esi),%eax | ||
37 | |||
38 | movl $0,-4(%ebp) | ||
39 | |||
40 | orl %edx,%edx /* ms bits */ | ||
41 | js L_round /* Already normalized */ | ||
42 | jnz L_shift_1 /* Shift left 1 - 31 bits */ | ||
43 | |||
44 | movl %ebx,%edx | ||
45 | movl %eax,%ebx | ||
46 | xorl %eax,%eax | ||
47 | movl $-32,-4(%ebp) | ||
48 | |||
49 | /* We need to shift left by 1 - 31 bits */ | ||
50 | L_shift_1: | ||
51 | bsrl %edx,%ecx /* get the required shift in %ecx */ | ||
52 | subl $31,%ecx | ||
53 | negl %ecx | ||
54 | subl %ecx,-4(%ebp) | ||
55 | shld %cl,%ebx,%edx | ||
56 | shld %cl,%eax,%ebx | ||
57 | shl %cl,%eax | ||
58 | |||
59 | L_round: | ||
60 | testl $0x80000000,%eax | ||
61 | jz L_exit | ||
62 | |||
63 | addl $1,%ebx | ||
64 | adcl $0,%edx | ||
65 | jnz L_exit | ||
66 | |||
67 | movl $0x80000000,%edx | ||
68 | incl -4(%ebp) | ||
69 | |||
70 | L_exit: | ||
71 | movl %edx,8(%esi) | ||
72 | movl %ebx,4(%esi) | ||
73 | movl %eax,(%esi) | ||
74 | |||
75 | movl -4(%ebp),%eax | ||
76 | |||
77 | popl %esi | ||
78 | popl %ebx | ||
79 | leave | ||
80 | ret | ||
81 | |||
82 | |||
83 | |||
84 | |||
85 | ENTRY(norm_Xsig) | ||
86 | pushl %ebp | ||
87 | movl %esp,%ebp | ||
88 | pushl %ebx /* Reserve some space */ | ||
89 | pushl %ebx | ||
90 | pushl %esi | ||
91 | |||
92 | movl PARAM1,%esi | ||
93 | |||
94 | movl 8(%esi),%edx | ||
95 | movl 4(%esi),%ebx | ||
96 | movl (%esi),%eax | ||
97 | |||
98 | movl $0,-4(%ebp) | ||
99 | |||
100 | orl %edx,%edx /* ms bits */ | ||
101 | js L_n_exit /* Already normalized */ | ||
102 | jnz L_n_shift_1 /* Shift left 1 - 31 bits */ | ||
103 | |||
104 | movl %ebx,%edx | ||
105 | movl %eax,%ebx | ||
106 | xorl %eax,%eax | ||
107 | movl $-32,-4(%ebp) | ||
108 | |||
109 | orl %edx,%edx /* ms bits */ | ||
110 | js L_n_exit /* Normalized now */ | ||
111 | jnz L_n_shift_1 /* Shift left 1 - 31 bits */ | ||
112 | |||
113 | movl %ebx,%edx | ||
114 | movl %eax,%ebx | ||
115 | xorl %eax,%eax | ||
116 | addl $-32,-4(%ebp) | ||
117 | jmp L_n_exit /* Might not be normalized, | ||
118 | but shift no more. */ | ||
119 | |||
120 | /* We need to shift left by 1 - 31 bits */ | ||
121 | L_n_shift_1: | ||
122 | bsrl %edx,%ecx /* get the required shift in %ecx */ | ||
123 | subl $31,%ecx | ||
124 | negl %ecx | ||
125 | subl %ecx,-4(%ebp) | ||
126 | shld %cl,%ebx,%edx | ||
127 | shld %cl,%eax,%ebx | ||
128 | shl %cl,%eax | ||
129 | |||
130 | L_n_exit: | ||
131 | movl %edx,8(%esi) | ||
132 | movl %ebx,4(%esi) | ||
133 | movl %eax,(%esi) | ||
134 | |||
135 | movl -4(%ebp),%eax | ||
136 | |||
137 | popl %esi | ||
138 | popl %ebx | ||
139 | leave | ||
140 | ret | ||
141 | |||
diff --git a/arch/x86/math-emu/shr_Xsig.S b/arch/x86/math-emu/shr_Xsig.S new file mode 100644 index 000000000000..31cdd118e918 --- /dev/null +++ b/arch/x86/math-emu/shr_Xsig.S | |||
@@ -0,0 +1,87 @@ | |||
1 | .file "shr_Xsig.S" | ||
2 | /*---------------------------------------------------------------------------+ | ||
3 | | shr_Xsig.S | | ||
4 | | | | ||
5 | | 12 byte right shift function | | ||
6 | | | | ||
7 | | Copyright (C) 1992,1994,1995 | | ||
8 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | ||
9 | | Australia. E-mail billm@jacobi.maths.monash.edu.au | | ||
10 | | | | ||
11 | | Call from C as: | | ||
12 | | void shr_Xsig(Xsig *arg, unsigned nr) | | ||
13 | | | | ||
14 | | Extended shift right function. | | ||
15 | | Fastest for small shifts. | | ||
16 | | Shifts the 12 byte quantity pointed to by the first arg (arg) | | ||
17 | | right by the number of bits specified by the second arg (nr). | | ||
18 | | | | ||
19 | +---------------------------------------------------------------------------*/ | ||
20 | |||
21 | #include "fpu_emu.h" | ||
22 | |||
23 | .text | ||
24 | ENTRY(shr_Xsig) | ||
25 | push %ebp | ||
26 | movl %esp,%ebp | ||
27 | pushl %esi | ||
28 | movl PARAM2,%ecx | ||
29 | movl PARAM1,%esi | ||
30 | cmpl $32,%ecx /* shrd only works for 0..31 bits */ | ||
31 | jnc L_more_than_31 | ||
32 | |||
33 | /* less than 32 bits */ | ||
34 | pushl %ebx | ||
35 | movl (%esi),%eax /* lsl */ | ||
36 | movl 4(%esi),%ebx /* midl */ | ||
37 | movl 8(%esi),%edx /* msl */ | ||
38 | shrd %cl,%ebx,%eax | ||
39 | shrd %cl,%edx,%ebx | ||
40 | shr %cl,%edx | ||
41 | movl %eax,(%esi) | ||
42 | movl %ebx,4(%esi) | ||
43 | movl %edx,8(%esi) | ||
44 | popl %ebx | ||
45 | popl %esi | ||
46 | leave | ||
47 | ret | ||
48 | |||
49 | L_more_than_31: | ||
50 | cmpl $64,%ecx | ||
51 | jnc L_more_than_63 | ||
52 | |||
53 | subb $32,%cl | ||
54 | movl 4(%esi),%eax /* midl */ | ||
55 | movl 8(%esi),%edx /* msl */ | ||
56 | shrd %cl,%edx,%eax | ||
57 | shr %cl,%edx | ||
58 | movl %eax,(%esi) | ||
59 | movl %edx,4(%esi) | ||
60 | movl $0,8(%esi) | ||
61 | popl %esi | ||
62 | leave | ||
63 | ret | ||
64 | |||
65 | L_more_than_63: | ||
66 | cmpl $96,%ecx | ||
67 | jnc L_more_than_95 | ||
68 | |||
69 | subb $64,%cl | ||
70 | movl 8(%esi),%eax /* msl */ | ||
71 | shr %cl,%eax | ||
72 | xorl %edx,%edx | ||
73 | movl %eax,(%esi) | ||
74 | movl %edx,4(%esi) | ||
75 | movl %edx,8(%esi) | ||
76 | popl %esi | ||
77 | leave | ||
78 | ret | ||
79 | |||
80 | L_more_than_95: | ||
81 | xorl %eax,%eax | ||
82 | movl %eax,(%esi) | ||
83 | movl %eax,4(%esi) | ||
84 | movl %eax,8(%esi) | ||
85 | popl %esi | ||
86 | leave | ||
87 | ret | ||
diff --git a/arch/x86/math-emu/status_w.h b/arch/x86/math-emu/status_w.h new file mode 100644 index 000000000000..59e73302aa60 --- /dev/null +++ b/arch/x86/math-emu/status_w.h | |||
@@ -0,0 +1,67 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | status_w.h | | ||
3 | | | | ||
4 | | Copyright (C) 1992,1993 | | ||
5 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | ||
6 | | Australia. E-mail billm@vaxc.cc.monash.edu.au | | ||
7 | | | | ||
8 | +---------------------------------------------------------------------------*/ | ||
9 | |||
10 | #ifndef _STATUS_H_ | ||
11 | #define _STATUS_H_ | ||
12 | |||
13 | #include "fpu_emu.h" /* for definition of PECULIAR_486 */ | ||
14 | |||
15 | #ifdef __ASSEMBLY__ | ||
16 | #define Const__(x) $##x | ||
17 | #else | ||
18 | #define Const__(x) x | ||
19 | #endif | ||
20 | |||
21 | #define SW_Backward Const__(0x8000) /* backward compatibility */ | ||
22 | #define SW_C3 Const__(0x4000) /* condition bit 3 */ | ||
23 | #define SW_Top Const__(0x3800) /* top of stack */ | ||
24 | #define SW_Top_Shift Const__(11) /* shift for top of stack bits */ | ||
25 | #define SW_C2 Const__(0x0400) /* condition bit 2 */ | ||
26 | #define SW_C1 Const__(0x0200) /* condition bit 1 */ | ||
27 | #define SW_C0 Const__(0x0100) /* condition bit 0 */ | ||
28 | #define SW_Summary Const__(0x0080) /* exception summary */ | ||
29 | #define SW_Stack_Fault Const__(0x0040) /* stack fault */ | ||
30 | #define SW_Precision Const__(0x0020) /* loss of precision */ | ||
31 | #define SW_Underflow Const__(0x0010) /* underflow */ | ||
32 | #define SW_Overflow Const__(0x0008) /* overflow */ | ||
33 | #define SW_Zero_Div Const__(0x0004) /* divide by zero */ | ||
34 | #define SW_Denorm_Op Const__(0x0002) /* denormalized operand */ | ||
35 | #define SW_Invalid Const__(0x0001) /* invalid operation */ | ||
36 | |||
37 | #define SW_Exc_Mask Const__(0x27f) /* Status word exception bit mask */ | ||
38 | |||
39 | #ifndef __ASSEMBLY__ | ||
40 | |||
41 | #define COMP_A_gt_B 1 | ||
42 | #define COMP_A_eq_B 2 | ||
43 | #define COMP_A_lt_B 3 | ||
44 | #define COMP_No_Comp 4 | ||
45 | #define COMP_Denormal 0x20 | ||
46 | #define COMP_NaN 0x40 | ||
47 | #define COMP_SNaN 0x80 | ||
48 | |||
49 | #define status_word() \ | ||
50 | ((partial_status & ~SW_Top & 0xffff) | ((top << SW_Top_Shift) & SW_Top)) | ||
51 | static inline void setcc(int cc) | ||
52 | { | ||
53 | partial_status &= ~(SW_C0|SW_C1|SW_C2|SW_C3); | ||
54 | partial_status |= (cc) & (SW_C0|SW_C1|SW_C2|SW_C3); | ||
55 | } | ||
56 | |||
57 | #ifdef PECULIAR_486 | ||
58 | /* Default, this conveys no information, but an 80486 does it. */ | ||
59 | /* Clear the SW_C1 bit, "other bits undefined". */ | ||
60 | # define clear_C1() { partial_status &= ~SW_C1; } | ||
61 | # else | ||
62 | # define clear_C1() | ||
63 | #endif /* PECULIAR_486 */ | ||
64 | |||
65 | #endif /* __ASSEMBLY__ */ | ||
66 | |||
67 | #endif /* _STATUS_H_ */ | ||
diff --git a/arch/x86/math-emu/version.h b/arch/x86/math-emu/version.h new file mode 100644 index 000000000000..a0d73a1d2b67 --- /dev/null +++ b/arch/x86/math-emu/version.h | |||
@@ -0,0 +1,12 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | version.h | | ||
3 | | | | ||
4 | | | | ||
5 | | Copyright (C) 1992,1993,1994,1996,1997,1999 | | ||
6 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | | ||
7 | | E-mail billm@melbpc.org.au | | ||
8 | | | | ||
9 | | | | ||
10 | +---------------------------------------------------------------------------*/ | ||
11 | |||
12 | #define FPU_VERSION "wm-FPU-emu version 2.01" | ||
diff --git a/arch/x86/math-emu/wm_shrx.S b/arch/x86/math-emu/wm_shrx.S new file mode 100644 index 000000000000..518428317985 --- /dev/null +++ b/arch/x86/math-emu/wm_shrx.S | |||
@@ -0,0 +1,204 @@ | |||
1 | .file "wm_shrx.S" | ||
2 | /*---------------------------------------------------------------------------+ | ||
3 | | wm_shrx.S | | ||
4 | | | | ||
5 | | 64 bit right shift functions | | ||
6 | | | | ||
7 | | Copyright (C) 1992,1995 | | ||
8 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | ||
9 | | Australia. E-mail billm@jacobi.maths.monash.edu.au | | ||
10 | | | | ||
11 | | Call from C as: | | ||
12 | | unsigned FPU_shrx(void *arg1, unsigned arg2) | | ||
13 | | and | | ||
14 | | unsigned FPU_shrxs(void *arg1, unsigned arg2) | | ||
15 | | | | ||
16 | +---------------------------------------------------------------------------*/ | ||
17 | |||
18 | #include "fpu_emu.h" | ||
19 | |||
20 | .text | ||
21 | /*---------------------------------------------------------------------------+ | ||
22 | | unsigned FPU_shrx(void *arg1, unsigned arg2) | | ||
23 | | | | ||
24 | | Extended shift right function. | | ||
25 | | Fastest for small shifts. | | ||
26 | | Shifts the 64 bit quantity pointed to by the first arg (arg1) | | ||
27 | | right by the number of bits specified by the second arg (arg2). | | ||
28 | | Forms a 96 bit quantity from the 64 bit arg and eax: | | ||
29 | | [ 64 bit arg ][ eax ] | | ||
30 | | shift right ---------> | | ||
31 | | The eax register is initialized to 0 before the shifting. | | ||
32 | | Results returned in the 64 bit arg and eax. | | ||
33 | +---------------------------------------------------------------------------*/ | ||
34 | |||
35 | ENTRY(FPU_shrx) | ||
36 | push %ebp | ||
37 | movl %esp,%ebp | ||
38 | pushl %esi | ||
39 | movl PARAM2,%ecx | ||
40 | movl PARAM1,%esi | ||
41 | cmpl $32,%ecx /* shrd only works for 0..31 bits */ | ||
42 | jnc L_more_than_31 | ||
43 | |||
44 | /* less than 32 bits */ | ||
45 | pushl %ebx | ||
46 | movl (%esi),%ebx /* lsl */ | ||
47 | movl 4(%esi),%edx /* msl */ | ||
48 | xorl %eax,%eax /* extension */ | ||
49 | shrd %cl,%ebx,%eax | ||
50 | shrd %cl,%edx,%ebx | ||
51 | shr %cl,%edx | ||
52 | movl %ebx,(%esi) | ||
53 | movl %edx,4(%esi) | ||
54 | popl %ebx | ||
55 | popl %esi | ||
56 | leave | ||
57 | ret | ||
58 | |||
59 | L_more_than_31: | ||
60 | cmpl $64,%ecx | ||
61 | jnc L_more_than_63 | ||
62 | |||
63 | subb $32,%cl | ||
64 | movl (%esi),%eax /* lsl */ | ||
65 | movl 4(%esi),%edx /* msl */ | ||
66 | shrd %cl,%edx,%eax | ||
67 | shr %cl,%edx | ||
68 | movl %edx,(%esi) | ||
69 | movl $0,4(%esi) | ||
70 | popl %esi | ||
71 | leave | ||
72 | ret | ||
73 | |||
74 | L_more_than_63: | ||
75 | cmpl $96,%ecx | ||
76 | jnc L_more_than_95 | ||
77 | |||
78 | subb $64,%cl | ||
79 | movl 4(%esi),%eax /* msl */ | ||
80 | shr %cl,%eax | ||
81 | xorl %edx,%edx | ||
82 | movl %edx,(%esi) | ||
83 | movl %edx,4(%esi) | ||
84 | popl %esi | ||
85 | leave | ||
86 | ret | ||
87 | |||
88 | L_more_than_95: | ||
89 | xorl %eax,%eax | ||
90 | movl %eax,(%esi) | ||
91 | movl %eax,4(%esi) | ||
92 | popl %esi | ||
93 | leave | ||
94 | ret | ||
95 | |||
96 | |||
97 | /*---------------------------------------------------------------------------+ | ||
98 | | unsigned FPU_shrxs(void *arg1, unsigned arg2) | | ||
99 | | | | ||
100 | | Extended shift right function (optimized for small floating point | | ||
101 | | integers). | | ||
102 | | Shifts the 64 bit quantity pointed to by the first arg (arg1) | | ||
103 | | right by the number of bits specified by the second arg (arg2). | | ||
104 | | Forms a 96 bit quantity from the 64 bit arg and eax: | | ||
105 | | [ 64 bit arg ][ eax ] | | ||
106 | | shift right ---------> | | ||
107 | | The eax register is initialized to 0 before the shifting. | | ||
108 | | The lower 8 bits of eax are lost and replaced by a flag which is | | ||
109 | | set (to 0x01) if any bit, apart from the first one, is set in the | | ||
110 | | part which has been shifted out of the arg. | | ||
111 | | Results returned in the 64 bit arg and eax. | | ||
112 | +---------------------------------------------------------------------------*/ | ||
113 | ENTRY(FPU_shrxs) | ||
114 | push %ebp | ||
115 | movl %esp,%ebp | ||
116 | pushl %esi | ||
117 | pushl %ebx | ||
118 | movl PARAM2,%ecx | ||
119 | movl PARAM1,%esi | ||
120 | cmpl $64,%ecx /* shrd only works for 0..31 bits */ | ||
121 | jnc Ls_more_than_63 | ||
122 | |||
123 | cmpl $32,%ecx /* shrd only works for 0..31 bits */ | ||
124 | jc Ls_less_than_32 | ||
125 | |||
126 | /* We got here without jumps by assuming that the most common requirement | ||
127 | is for small integers */ | ||
128 | /* Shift by [32..63] bits */ | ||
129 | subb $32,%cl | ||
130 | movl (%esi),%eax /* lsl */ | ||
131 | movl 4(%esi),%edx /* msl */ | ||
132 | xorl %ebx,%ebx | ||
133 | shrd %cl,%eax,%ebx | ||
134 | shrd %cl,%edx,%eax | ||
135 | shr %cl,%edx | ||
136 | orl %ebx,%ebx /* test these 32 bits */ | ||
137 | setne %bl | ||
138 | test $0x7fffffff,%eax /* and 31 bits here */ | ||
139 | setne %bh | ||
140 | orw %bx,%bx /* Any of the 63 bit set ? */ | ||
141 | setne %al | ||
142 | movl %edx,(%esi) | ||
143 | movl $0,4(%esi) | ||
144 | popl %ebx | ||
145 | popl %esi | ||
146 | leave | ||
147 | ret | ||
148 | |||
149 | /* Shift by [0..31] bits */ | ||
150 | Ls_less_than_32: | ||
151 | movl (%esi),%ebx /* lsl */ | ||
152 | movl 4(%esi),%edx /* msl */ | ||
153 | xorl %eax,%eax /* extension */ | ||
154 | shrd %cl,%ebx,%eax | ||
155 | shrd %cl,%edx,%ebx | ||
156 | shr %cl,%edx | ||
157 | test $0x7fffffff,%eax /* only need to look at eax here */ | ||
158 | setne %al | ||
159 | movl %ebx,(%esi) | ||
160 | movl %edx,4(%esi) | ||
161 | popl %ebx | ||
162 | popl %esi | ||
163 | leave | ||
164 | ret | ||
165 | |||
166 | /* Shift by [64..95] bits */ | ||
167 | Ls_more_than_63: | ||
168 | cmpl $96,%ecx | ||
169 | jnc Ls_more_than_95 | ||
170 | |||
171 | subb $64,%cl | ||
172 | movl (%esi),%ebx /* lsl */ | ||
173 | movl 4(%esi),%eax /* msl */ | ||
174 | xorl %edx,%edx /* extension */ | ||
175 | shrd %cl,%ebx,%edx | ||
176 | shrd %cl,%eax,%ebx | ||
177 | shr %cl,%eax | ||
178 | orl %ebx,%edx | ||
179 | setne %bl | ||
180 | test $0x7fffffff,%eax /* only need to look at eax here */ | ||
181 | setne %bh | ||
182 | orw %bx,%bx | ||
183 | setne %al | ||
184 | xorl %edx,%edx | ||
185 | movl %edx,(%esi) /* set to zero */ | ||
186 | movl %edx,4(%esi) /* set to zero */ | ||
187 | popl %ebx | ||
188 | popl %esi | ||
189 | leave | ||
190 | ret | ||
191 | |||
192 | Ls_more_than_95: | ||
193 | /* Shift by [96..inf) bits */ | ||
194 | xorl %eax,%eax | ||
195 | movl (%esi),%ebx | ||
196 | orl 4(%esi),%ebx | ||
197 | setne %al | ||
198 | xorl %ebx,%ebx | ||
199 | movl %ebx,(%esi) | ||
200 | movl %ebx,4(%esi) | ||
201 | popl %ebx | ||
202 | popl %esi | ||
203 | leave | ||
204 | ret | ||
diff --git a/arch/x86/math-emu/wm_sqrt.S b/arch/x86/math-emu/wm_sqrt.S new file mode 100644 index 000000000000..d258f59564e1 --- /dev/null +++ b/arch/x86/math-emu/wm_sqrt.S | |||
@@ -0,0 +1,470 @@ | |||
1 | .file "wm_sqrt.S" | ||
2 | /*---------------------------------------------------------------------------+ | ||
3 | | wm_sqrt.S | | ||
4 | | | | ||
5 | | Fixed point arithmetic square root evaluation. | | ||
6 | | | | ||
7 | | Copyright (C) 1992,1993,1995,1997 | | ||
8 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | ||
9 | | Australia. E-mail billm@suburbia.net | | ||
10 | | | | ||
11 | | Call from C as: | | ||
12 | | int wm_sqrt(FPU_REG *n, unsigned int control_word) | | ||
13 | | | | ||
14 | +---------------------------------------------------------------------------*/ | ||
15 | |||
16 | /*---------------------------------------------------------------------------+ | ||
17 | | wm_sqrt(FPU_REG *n, unsigned int control_word) | | ||
18 | | returns the square root of n in n. | | ||
19 | | | | ||
20 | | Use Newton's method to compute the square root of a number, which must | | ||
21 | | be in the range [1.0 .. 4.0), to 64 bits accuracy. | | ||
22 | | Does not check the sign or tag of the argument. | | ||
23 | | Sets the exponent, but not the sign or tag of the result. | | ||
24 | | | | ||
25 | | The guess is kept in %esi:%edi | | ||
26 | +---------------------------------------------------------------------------*/ | ||
27 | |||
28 | #include "exception.h" | ||
29 | #include "fpu_emu.h" | ||
30 | |||
31 | |||
32 | #ifndef NON_REENTRANT_FPU | ||
33 | /* Local storage on the stack: */ | ||
34 | #define FPU_accum_3 -4(%ebp) /* ms word */ | ||
35 | #define FPU_accum_2 -8(%ebp) | ||
36 | #define FPU_accum_1 -12(%ebp) | ||
37 | #define FPU_accum_0 -16(%ebp) | ||
38 | |||
39 | /* | ||
40 | * The de-normalised argument: | ||
41 | * sq_2 sq_1 sq_0 | ||
42 | * b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 | ||
43 | * ^ binary point here | ||
44 | */ | ||
45 | #define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */ | ||
46 | #define FPU_fsqrt_arg_1 -24(%ebp) | ||
47 | #define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */ | ||
48 | |||
49 | #else | ||
50 | /* Local storage in a static area: */ | ||
51 | .data | ||
52 | .align 4,0 | ||
53 | FPU_accum_3: | ||
54 | .long 0 /* ms word */ | ||
55 | FPU_accum_2: | ||
56 | .long 0 | ||
57 | FPU_accum_1: | ||
58 | .long 0 | ||
59 | FPU_accum_0: | ||
60 | .long 0 | ||
61 | |||
62 | /* The de-normalised argument: | ||
63 | sq_2 sq_1 sq_0 | ||
64 | b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 | ||
65 | ^ binary point here | ||
66 | */ | ||
67 | FPU_fsqrt_arg_2: | ||
68 | .long 0 /* ms word */ | ||
69 | FPU_fsqrt_arg_1: | ||
70 | .long 0 | ||
71 | FPU_fsqrt_arg_0: | ||
72 | .long 0 /* ls word, at most the ms bit is set */ | ||
73 | #endif /* NON_REENTRANT_FPU */ | ||
74 | |||
75 | |||
76 | .text | ||
77 | ENTRY(wm_sqrt) | ||
78 | pushl %ebp | ||
79 | movl %esp,%ebp | ||
80 | #ifndef NON_REENTRANT_FPU | ||
81 | subl $28,%esp | ||
82 | #endif /* NON_REENTRANT_FPU */ | ||
83 | pushl %esi | ||
84 | pushl %edi | ||
85 | pushl %ebx | ||
86 | |||
87 | movl PARAM1,%esi | ||
88 | |||
89 | movl SIGH(%esi),%eax | ||
90 | movl SIGL(%esi),%ecx | ||
91 | xorl %edx,%edx | ||
92 | |||
93 | /* We use a rough linear estimate for the first guess.. */ | ||
94 | |||
95 | cmpw EXP_BIAS,EXP(%esi) | ||
96 | jnz sqrt_arg_ge_2 | ||
97 | |||
98 | shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */ | ||
99 | rcrl $1,%ecx | ||
100 | rcrl $1,%edx | ||
101 | |||
102 | sqrt_arg_ge_2: | ||
103 | /* From here on, n is never accessed directly again until it is | ||
104 | replaced by the answer. */ | ||
105 | |||
106 | movl %eax,FPU_fsqrt_arg_2 /* ms word of n */ | ||
107 | movl %ecx,FPU_fsqrt_arg_1 | ||
108 | movl %edx,FPU_fsqrt_arg_0 | ||
109 | |||
110 | /* Make a linear first estimate */ | ||
111 | shrl $1,%eax | ||
112 | addl $0x40000000,%eax | ||
113 | movl $0xaaaaaaaa,%ecx | ||
114 | mull %ecx | ||
115 | shll %edx /* max result was 7fff... */ | ||
116 | testl $0x80000000,%edx /* but min was 3fff... */ | ||
117 | jnz sqrt_prelim_no_adjust | ||
118 | |||
119 | movl $0x80000000,%edx /* round up */ | ||
120 | |||
121 | sqrt_prelim_no_adjust: | ||
122 | movl %edx,%esi /* Our first guess */ | ||
123 | |||
124 | /* We have now computed (approx) (2 + x) / 3, which forms the basis | ||
125 | for a few iterations of Newton's method */ | ||
126 | |||
127 | movl FPU_fsqrt_arg_2,%ecx /* ms word */ | ||
128 | |||
129 | /* | ||
130 | * From our initial estimate, three iterations are enough to get us | ||
131 | * to 30 bits or so. This will then allow two iterations at better | ||
132 | * precision to complete the process. | ||
133 | */ | ||
134 | |||
135 | /* Compute (g + n/g)/2 at each iteration (g is the guess). */ | ||
136 | shrl %ecx /* Doing this first will prevent a divide */ | ||
137 | /* overflow later. */ | ||
138 | |||
139 | movl %ecx,%edx /* msw of the arg / 2 */ | ||
140 | divl %esi /* current estimate */ | ||
141 | shrl %esi /* divide by 2 */ | ||
142 | addl %eax,%esi /* the new estimate */ | ||
143 | |||
144 | movl %ecx,%edx | ||
145 | divl %esi | ||
146 | shrl %esi | ||
147 | addl %eax,%esi | ||
148 | |||
149 | movl %ecx,%edx | ||
150 | divl %esi | ||
151 | shrl %esi | ||
152 | addl %eax,%esi | ||
153 | |||
154 | /* | ||
155 | * Now that an estimate accurate to about 30 bits has been obtained (in %esi), | ||
156 | * we improve it to 60 bits or so. | ||
157 | * | ||
158 | * The strategy from now on is to compute new estimates from | ||
159 | * guess := guess + (n - guess^2) / (2 * guess) | ||
160 | */ | ||
161 | |||
162 | /* First, find the square of the guess */ | ||
163 | movl %esi,%eax | ||
164 | mull %esi | ||
165 | /* guess^2 now in %edx:%eax */ | ||
166 | |||
167 | movl FPU_fsqrt_arg_1,%ecx | ||
168 | subl %ecx,%eax | ||
169 | movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */ | ||
170 | sbbl %ecx,%edx | ||
171 | jnc sqrt_stage_2_positive | ||
172 | |||
173 | /* Subtraction gives a negative result, | ||
174 | negate the result before division. */ | ||
175 | notl %edx | ||
176 | notl %eax | ||
177 | addl $1,%eax | ||
178 | adcl $0,%edx | ||
179 | |||
180 | divl %esi | ||
181 | movl %eax,%ecx | ||
182 | |||
183 | movl %edx,%eax | ||
184 | divl %esi | ||
185 | jmp sqrt_stage_2_finish | ||
186 | |||
187 | sqrt_stage_2_positive: | ||
188 | divl %esi | ||
189 | movl %eax,%ecx | ||
190 | |||
191 | movl %edx,%eax | ||
192 | divl %esi | ||
193 | |||
194 | notl %ecx | ||
195 | notl %eax | ||
196 | addl $1,%eax | ||
197 | adcl $0,%ecx | ||
198 | |||
199 | sqrt_stage_2_finish: | ||
200 | sarl $1,%ecx /* divide by 2 */ | ||
201 | rcrl $1,%eax | ||
202 | |||
203 | /* Form the new estimate in %esi:%edi */ | ||
204 | movl %eax,%edi | ||
205 | addl %ecx,%esi | ||
206 | |||
207 | jnz sqrt_stage_2_done /* result should be [1..2) */ | ||
208 | |||
209 | #ifdef PARANOID | ||
210 | /* It should be possible to get here only if the arg is ffff....ffff */ | ||
211 | cmp $0xffffffff,FPU_fsqrt_arg_1 | ||
212 | jnz sqrt_stage_2_error | ||
213 | #endif /* PARANOID */ | ||
214 | |||
215 | /* The best rounded result. */ | ||
216 | xorl %eax,%eax | ||
217 | decl %eax | ||
218 | movl %eax,%edi | ||
219 | movl %eax,%esi | ||
220 | movl $0x7fffffff,%eax | ||
221 | jmp sqrt_round_result | ||
222 | |||
223 | #ifdef PARANOID | ||
224 | sqrt_stage_2_error: | ||
225 | pushl EX_INTERNAL|0x213 | ||
226 | call EXCEPTION | ||
227 | #endif /* PARANOID */ | ||
228 | |||
229 | sqrt_stage_2_done: | ||
230 | |||
231 | /* Now the square root has been computed to better than 60 bits. */ | ||
232 | |||
233 | /* Find the square of the guess. */ | ||
234 | movl %edi,%eax /* ls word of guess */ | ||
235 | mull %edi | ||
236 | movl %edx,FPU_accum_1 | ||
237 | |||
238 | movl %esi,%eax | ||
239 | mull %esi | ||
240 | movl %edx,FPU_accum_3 | ||
241 | movl %eax,FPU_accum_2 | ||
242 | |||
243 | movl %edi,%eax | ||
244 | mull %esi | ||
245 | addl %eax,FPU_accum_1 | ||
246 | adcl %edx,FPU_accum_2 | ||
247 | adcl $0,FPU_accum_3 | ||
248 | |||
249 | /* movl %esi,%eax */ | ||
250 | /* mull %edi */ | ||
251 | addl %eax,FPU_accum_1 | ||
252 | adcl %edx,FPU_accum_2 | ||
253 | adcl $0,FPU_accum_3 | ||
254 | |||
255 | /* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */ | ||
256 | |||
257 | movl FPU_fsqrt_arg_0,%eax /* get normalized n */ | ||
258 | subl %eax,FPU_accum_1 | ||
259 | movl FPU_fsqrt_arg_1,%eax | ||
260 | sbbl %eax,FPU_accum_2 | ||
261 | movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */ | ||
262 | sbbl %eax,FPU_accum_3 | ||
263 | jnc sqrt_stage_3_positive | ||
264 | |||
265 | /* Subtraction gives a negative result, | ||
266 | negate the result before division */ | ||
267 | notl FPU_accum_1 | ||
268 | notl FPU_accum_2 | ||
269 | notl FPU_accum_3 | ||
270 | addl $1,FPU_accum_1 | ||
271 | adcl $0,FPU_accum_2 | ||
272 | |||
273 | #ifdef PARANOID | ||
274 | adcl $0,FPU_accum_3 /* This must be zero */ | ||
275 | jz sqrt_stage_3_no_error | ||
276 | |||
277 | sqrt_stage_3_error: | ||
278 | pushl EX_INTERNAL|0x207 | ||
279 | call EXCEPTION | ||
280 | |||
281 | sqrt_stage_3_no_error: | ||
282 | #endif /* PARANOID */ | ||
283 | |||
284 | movl FPU_accum_2,%edx | ||
285 | movl FPU_accum_1,%eax | ||
286 | divl %esi | ||
287 | movl %eax,%ecx | ||
288 | |||
289 | movl %edx,%eax | ||
290 | divl %esi | ||
291 | |||
292 | sarl $1,%ecx /* divide by 2 */ | ||
293 | rcrl $1,%eax | ||
294 | |||
295 | /* prepare to round the result */ | ||
296 | |||
297 | addl %ecx,%edi | ||
298 | adcl $0,%esi | ||
299 | |||
300 | jmp sqrt_stage_3_finished | ||
301 | |||
302 | sqrt_stage_3_positive: | ||
303 | movl FPU_accum_2,%edx | ||
304 | movl FPU_accum_1,%eax | ||
305 | divl %esi | ||
306 | movl %eax,%ecx | ||
307 | |||
308 | movl %edx,%eax | ||
309 | divl %esi | ||
310 | |||
311 | sarl $1,%ecx /* divide by 2 */ | ||
312 | rcrl $1,%eax | ||
313 | |||
314 | /* prepare to round the result */ | ||
315 | |||
316 | notl %eax /* Negate the correction term */ | ||
317 | notl %ecx | ||
318 | addl $1,%eax | ||
319 | adcl $0,%ecx /* carry here ==> correction == 0 */ | ||
320 | adcl $0xffffffff,%esi | ||
321 | |||
322 | addl %ecx,%edi | ||
323 | adcl $0,%esi | ||
324 | |||
325 | sqrt_stage_3_finished: | ||
326 | |||
327 | /* | ||
328 | * The result in %esi:%edi:%esi should be good to about 90 bits here, | ||
329 | * and the rounding information here does not have sufficient accuracy | ||
330 | * in a few rare cases. | ||
331 | */ | ||
332 | cmpl $0xffffffe0,%eax | ||
333 | ja sqrt_near_exact_x | ||
334 | |||
335 | cmpl $0x00000020,%eax | ||
336 | jb sqrt_near_exact | ||
337 | |||
338 | cmpl $0x7fffffe0,%eax | ||
339 | jb sqrt_round_result | ||
340 | |||
341 | cmpl $0x80000020,%eax | ||
342 | jb sqrt_get_more_precision | ||
343 | |||
344 | sqrt_round_result: | ||
345 | /* Set up for rounding operations */ | ||
346 | movl %eax,%edx | ||
347 | movl %esi,%eax | ||
348 | movl %edi,%ebx | ||
349 | movl PARAM1,%edi | ||
350 | movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */ | ||
351 | jmp fpu_reg_round | ||
352 | |||
353 | |||
354 | sqrt_near_exact_x: | ||
355 | /* First, the estimate must be rounded up. */ | ||
356 | addl $1,%edi | ||
357 | adcl $0,%esi | ||
358 | |||
359 | sqrt_near_exact: | ||
360 | /* | ||
361 | * This is an easy case because x^1/2 is monotonic. | ||
362 | * We need just find the square of our estimate, compare it | ||
363 | * with the argument, and deduce whether our estimate is | ||
364 | * above, below, or exact. We use the fact that the estimate | ||
365 | * is known to be accurate to about 90 bits. | ||
366 | */ | ||
367 | movl %edi,%eax /* ls word of guess */ | ||
368 | mull %edi | ||
369 | movl %edx,%ebx /* 2nd ls word of square */ | ||
370 | movl %eax,%ecx /* ls word of square */ | ||
371 | |||
372 | movl %edi,%eax | ||
373 | mull %esi | ||
374 | addl %eax,%ebx | ||
375 | addl %eax,%ebx | ||
376 | |||
377 | #ifdef PARANOID | ||
378 | cmp $0xffffffb0,%ebx | ||
379 | jb sqrt_near_exact_ok | ||
380 | |||
381 | cmp $0x00000050,%ebx | ||
382 | ja sqrt_near_exact_ok | ||
383 | |||
384 | pushl EX_INTERNAL|0x214 | ||
385 | call EXCEPTION | ||
386 | |||
387 | sqrt_near_exact_ok: | ||
388 | #endif /* PARANOID */ | ||
389 | |||
390 | or %ebx,%ebx | ||
391 | js sqrt_near_exact_small | ||
392 | |||
393 | jnz sqrt_near_exact_large | ||
394 | |||
395 | or %ebx,%edx | ||
396 | jnz sqrt_near_exact_large | ||
397 | |||
398 | /* Our estimate is exactly the right answer */ | ||
399 | xorl %eax,%eax | ||
400 | jmp sqrt_round_result | ||
401 | |||
402 | sqrt_near_exact_small: | ||
403 | /* Our estimate is too small */ | ||
404 | movl $0x000000ff,%eax | ||
405 | jmp sqrt_round_result | ||
406 | |||
407 | sqrt_near_exact_large: | ||
408 | /* Our estimate is too large, we need to decrement it */ | ||
409 | subl $1,%edi | ||
410 | sbbl $0,%esi | ||
411 | movl $0xffffff00,%eax | ||
412 | jmp sqrt_round_result | ||
413 | |||
414 | |||
415 | sqrt_get_more_precision: | ||
416 | /* This case is almost the same as the above, except we start | ||
417 | with an extra bit of precision in the estimate. */ | ||
418 | stc /* The extra bit. */ | ||
419 | rcll $1,%edi /* Shift the estimate left one bit */ | ||
420 | rcll $1,%esi | ||
421 | |||
422 | movl %edi,%eax /* ls word of guess */ | ||
423 | mull %edi | ||
424 | movl %edx,%ebx /* 2nd ls word of square */ | ||
425 | movl %eax,%ecx /* ls word of square */ | ||
426 | |||
427 | movl %edi,%eax | ||
428 | mull %esi | ||
429 | addl %eax,%ebx | ||
430 | addl %eax,%ebx | ||
431 | |||
432 | /* Put our estimate back to its original value */ | ||
433 | stc /* The ms bit. */ | ||
434 | rcrl $1,%esi /* Shift the estimate left one bit */ | ||
435 | rcrl $1,%edi | ||
436 | |||
437 | #ifdef PARANOID | ||
438 | cmp $0xffffff60,%ebx | ||
439 | jb sqrt_more_prec_ok | ||
440 | |||
441 | cmp $0x000000a0,%ebx | ||
442 | ja sqrt_more_prec_ok | ||
443 | |||
444 | pushl EX_INTERNAL|0x215 | ||
445 | call EXCEPTION | ||
446 | |||
447 | sqrt_more_prec_ok: | ||
448 | #endif /* PARANOID */ | ||
449 | |||
450 | or %ebx,%ebx | ||
451 | js sqrt_more_prec_small | ||
452 | |||
453 | jnz sqrt_more_prec_large | ||
454 | |||
455 | or %ebx,%ecx | ||
456 | jnz sqrt_more_prec_large | ||
457 | |||
458 | /* Our estimate is exactly the right answer */ | ||
459 | movl $0x80000000,%eax | ||
460 | jmp sqrt_round_result | ||
461 | |||
462 | sqrt_more_prec_small: | ||
463 | /* Our estimate is too small */ | ||
464 | movl $0x800000ff,%eax | ||
465 | jmp sqrt_round_result | ||
466 | |||
467 | sqrt_more_prec_large: | ||
468 | /* Our estimate is too large */ | ||
469 | movl $0x7fffff00,%eax | ||
470 | jmp sqrt_round_result | ||