diff options
Diffstat (limited to 'arch/x86/math-emu/wm_sqrt.S')
-rw-r--r-- | arch/x86/math-emu/wm_sqrt.S | 470 |
1 files changed, 470 insertions, 0 deletions
diff --git a/arch/x86/math-emu/wm_sqrt.S b/arch/x86/math-emu/wm_sqrt.S new file mode 100644 index 000000000000..d258f59564e1 --- /dev/null +++ b/arch/x86/math-emu/wm_sqrt.S | |||
@@ -0,0 +1,470 @@ | |||
1 | .file "wm_sqrt.S" | ||
2 | /*---------------------------------------------------------------------------+ | ||
3 | | wm_sqrt.S | | ||
4 | | | | ||
5 | | Fixed point arithmetic square root evaluation. | | ||
6 | | | | ||
7 | | Copyright (C) 1992,1993,1995,1997 | | ||
8 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | ||
9 | | Australia. E-mail billm@suburbia.net | | ||
10 | | | | ||
11 | | Call from C as: | | ||
12 | | int wm_sqrt(FPU_REG *n, unsigned int control_word) | | ||
13 | | | | ||
14 | +---------------------------------------------------------------------------*/ | ||
15 | |||
16 | /*---------------------------------------------------------------------------+ | ||
17 | | wm_sqrt(FPU_REG *n, unsigned int control_word) | | ||
18 | | returns the square root of n in n. | | ||
19 | | | | ||
20 | | Use Newton's method to compute the square root of a number, which must | | ||
21 | | be in the range [1.0 .. 4.0), to 64 bits accuracy. | | ||
22 | | Does not check the sign or tag of the argument. | | ||
23 | | Sets the exponent, but not the sign or tag of the result. | | ||
24 | | | | ||
25 | | The guess is kept in %esi:%edi | | ||
26 | +---------------------------------------------------------------------------*/ | ||
27 | |||
28 | #include "exception.h" | ||
29 | #include "fpu_emu.h" | ||
30 | |||
31 | |||
32 | #ifndef NON_REENTRANT_FPU | ||
33 | /* Local storage on the stack: */ | ||
34 | #define FPU_accum_3 -4(%ebp) /* ms word */ | ||
35 | #define FPU_accum_2 -8(%ebp) | ||
36 | #define FPU_accum_1 -12(%ebp) | ||
37 | #define FPU_accum_0 -16(%ebp) | ||
38 | |||
39 | /* | ||
40 | * The de-normalised argument: | ||
41 | * sq_2 sq_1 sq_0 | ||
42 | * b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 | ||
43 | * ^ binary point here | ||
44 | */ | ||
45 | #define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */ | ||
46 | #define FPU_fsqrt_arg_1 -24(%ebp) | ||
47 | #define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */ | ||
48 | |||
49 | #else | ||
50 | /* Local storage in a static area: */ | ||
51 | .data | ||
52 | .align 4,0 | ||
53 | FPU_accum_3: | ||
54 | .long 0 /* ms word */ | ||
55 | FPU_accum_2: | ||
56 | .long 0 | ||
57 | FPU_accum_1: | ||
58 | .long 0 | ||
59 | FPU_accum_0: | ||
60 | .long 0 | ||
61 | |||
62 | /* The de-normalised argument: | ||
63 | sq_2 sq_1 sq_0 | ||
64 | b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 | ||
65 | ^ binary point here | ||
66 | */ | ||
67 | FPU_fsqrt_arg_2: | ||
68 | .long 0 /* ms word */ | ||
69 | FPU_fsqrt_arg_1: | ||
70 | .long 0 | ||
71 | FPU_fsqrt_arg_0: | ||
72 | .long 0 /* ls word, at most the ms bit is set */ | ||
73 | #endif /* NON_REENTRANT_FPU */ | ||
74 | |||
75 | |||
76 | .text | ||
77 | ENTRY(wm_sqrt) | ||
78 | pushl %ebp | ||
79 | movl %esp,%ebp | ||
80 | #ifndef NON_REENTRANT_FPU | ||
81 | subl $28,%esp | ||
82 | #endif /* NON_REENTRANT_FPU */ | ||
83 | pushl %esi | ||
84 | pushl %edi | ||
85 | pushl %ebx | ||
86 | |||
87 | movl PARAM1,%esi | ||
88 | |||
89 | movl SIGH(%esi),%eax | ||
90 | movl SIGL(%esi),%ecx | ||
91 | xorl %edx,%edx | ||
92 | |||
93 | /* We use a rough linear estimate for the first guess.. */ | ||
94 | |||
95 | cmpw EXP_BIAS,EXP(%esi) | ||
96 | jnz sqrt_arg_ge_2 | ||
97 | |||
98 | shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */ | ||
99 | rcrl $1,%ecx | ||
100 | rcrl $1,%edx | ||
101 | |||
102 | sqrt_arg_ge_2: | ||
103 | /* From here on, n is never accessed directly again until it is | ||
104 | replaced by the answer. */ | ||
105 | |||
106 | movl %eax,FPU_fsqrt_arg_2 /* ms word of n */ | ||
107 | movl %ecx,FPU_fsqrt_arg_1 | ||
108 | movl %edx,FPU_fsqrt_arg_0 | ||
109 | |||
110 | /* Make a linear first estimate */ | ||
111 | shrl $1,%eax | ||
112 | addl $0x40000000,%eax | ||
113 | movl $0xaaaaaaaa,%ecx | ||
114 | mull %ecx | ||
115 | shll %edx /* max result was 7fff... */ | ||
116 | testl $0x80000000,%edx /* but min was 3fff... */ | ||
117 | jnz sqrt_prelim_no_adjust | ||
118 | |||
119 | movl $0x80000000,%edx /* round up */ | ||
120 | |||
121 | sqrt_prelim_no_adjust: | ||
122 | movl %edx,%esi /* Our first guess */ | ||
123 | |||
124 | /* We have now computed (approx) (2 + x) / 3, which forms the basis | ||
125 | for a few iterations of Newton's method */ | ||
126 | |||
127 | movl FPU_fsqrt_arg_2,%ecx /* ms word */ | ||
128 | |||
129 | /* | ||
130 | * From our initial estimate, three iterations are enough to get us | ||
131 | * to 30 bits or so. This will then allow two iterations at better | ||
132 | * precision to complete the process. | ||
133 | */ | ||
134 | |||
135 | /* Compute (g + n/g)/2 at each iteration (g is the guess). */ | ||
136 | shrl %ecx /* Doing this first will prevent a divide */ | ||
137 | /* overflow later. */ | ||
138 | |||
139 | movl %ecx,%edx /* msw of the arg / 2 */ | ||
140 | divl %esi /* current estimate */ | ||
141 | shrl %esi /* divide by 2 */ | ||
142 | addl %eax,%esi /* the new estimate */ | ||
143 | |||
144 | movl %ecx,%edx | ||
145 | divl %esi | ||
146 | shrl %esi | ||
147 | addl %eax,%esi | ||
148 | |||
149 | movl %ecx,%edx | ||
150 | divl %esi | ||
151 | shrl %esi | ||
152 | addl %eax,%esi | ||
153 | |||
154 | /* | ||
155 | * Now that an estimate accurate to about 30 bits has been obtained (in %esi), | ||
156 | * we improve it to 60 bits or so. | ||
157 | * | ||
158 | * The strategy from now on is to compute new estimates from | ||
159 | * guess := guess + (n - guess^2) / (2 * guess) | ||
160 | */ | ||
161 | |||
162 | /* First, find the square of the guess */ | ||
163 | movl %esi,%eax | ||
164 | mull %esi | ||
165 | /* guess^2 now in %edx:%eax */ | ||
166 | |||
167 | movl FPU_fsqrt_arg_1,%ecx | ||
168 | subl %ecx,%eax | ||
169 | movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */ | ||
170 | sbbl %ecx,%edx | ||
171 | jnc sqrt_stage_2_positive | ||
172 | |||
173 | /* Subtraction gives a negative result, | ||
174 | negate the result before division. */ | ||
175 | notl %edx | ||
176 | notl %eax | ||
177 | addl $1,%eax | ||
178 | adcl $0,%edx | ||
179 | |||
180 | divl %esi | ||
181 | movl %eax,%ecx | ||
182 | |||
183 | movl %edx,%eax | ||
184 | divl %esi | ||
185 | jmp sqrt_stage_2_finish | ||
186 | |||
187 | sqrt_stage_2_positive: | ||
188 | divl %esi | ||
189 | movl %eax,%ecx | ||
190 | |||
191 | movl %edx,%eax | ||
192 | divl %esi | ||
193 | |||
194 | notl %ecx | ||
195 | notl %eax | ||
196 | addl $1,%eax | ||
197 | adcl $0,%ecx | ||
198 | |||
199 | sqrt_stage_2_finish: | ||
200 | sarl $1,%ecx /* divide by 2 */ | ||
201 | rcrl $1,%eax | ||
202 | |||
203 | /* Form the new estimate in %esi:%edi */ | ||
204 | movl %eax,%edi | ||
205 | addl %ecx,%esi | ||
206 | |||
207 | jnz sqrt_stage_2_done /* result should be [1..2) */ | ||
208 | |||
209 | #ifdef PARANOID | ||
210 | /* It should be possible to get here only if the arg is ffff....ffff */ | ||
211 | cmp $0xffffffff,FPU_fsqrt_arg_1 | ||
212 | jnz sqrt_stage_2_error | ||
213 | #endif /* PARANOID */ | ||
214 | |||
215 | /* The best rounded result. */ | ||
216 | xorl %eax,%eax | ||
217 | decl %eax | ||
218 | movl %eax,%edi | ||
219 | movl %eax,%esi | ||
220 | movl $0x7fffffff,%eax | ||
221 | jmp sqrt_round_result | ||
222 | |||
223 | #ifdef PARANOID | ||
224 | sqrt_stage_2_error: | ||
225 | pushl EX_INTERNAL|0x213 | ||
226 | call EXCEPTION | ||
227 | #endif /* PARANOID */ | ||
228 | |||
229 | sqrt_stage_2_done: | ||
230 | |||
231 | /* Now the square root has been computed to better than 60 bits. */ | ||
232 | |||
233 | /* Find the square of the guess. */ | ||
234 | movl %edi,%eax /* ls word of guess */ | ||
235 | mull %edi | ||
236 | movl %edx,FPU_accum_1 | ||
237 | |||
238 | movl %esi,%eax | ||
239 | mull %esi | ||
240 | movl %edx,FPU_accum_3 | ||
241 | movl %eax,FPU_accum_2 | ||
242 | |||
243 | movl %edi,%eax | ||
244 | mull %esi | ||
245 | addl %eax,FPU_accum_1 | ||
246 | adcl %edx,FPU_accum_2 | ||
247 | adcl $0,FPU_accum_3 | ||
248 | |||
249 | /* movl %esi,%eax */ | ||
250 | /* mull %edi */ | ||
251 | addl %eax,FPU_accum_1 | ||
252 | adcl %edx,FPU_accum_2 | ||
253 | adcl $0,FPU_accum_3 | ||
254 | |||
255 | /* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */ | ||
256 | |||
257 | movl FPU_fsqrt_arg_0,%eax /* get normalized n */ | ||
258 | subl %eax,FPU_accum_1 | ||
259 | movl FPU_fsqrt_arg_1,%eax | ||
260 | sbbl %eax,FPU_accum_2 | ||
261 | movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */ | ||
262 | sbbl %eax,FPU_accum_3 | ||
263 | jnc sqrt_stage_3_positive | ||
264 | |||
265 | /* Subtraction gives a negative result, | ||
266 | negate the result before division */ | ||
267 | notl FPU_accum_1 | ||
268 | notl FPU_accum_2 | ||
269 | notl FPU_accum_3 | ||
270 | addl $1,FPU_accum_1 | ||
271 | adcl $0,FPU_accum_2 | ||
272 | |||
273 | #ifdef PARANOID | ||
274 | adcl $0,FPU_accum_3 /* This must be zero */ | ||
275 | jz sqrt_stage_3_no_error | ||
276 | |||
277 | sqrt_stage_3_error: | ||
278 | pushl EX_INTERNAL|0x207 | ||
279 | call EXCEPTION | ||
280 | |||
281 | sqrt_stage_3_no_error: | ||
282 | #endif /* PARANOID */ | ||
283 | |||
284 | movl FPU_accum_2,%edx | ||
285 | movl FPU_accum_1,%eax | ||
286 | divl %esi | ||
287 | movl %eax,%ecx | ||
288 | |||
289 | movl %edx,%eax | ||
290 | divl %esi | ||
291 | |||
292 | sarl $1,%ecx /* divide by 2 */ | ||
293 | rcrl $1,%eax | ||
294 | |||
295 | /* prepare to round the result */ | ||
296 | |||
297 | addl %ecx,%edi | ||
298 | adcl $0,%esi | ||
299 | |||
300 | jmp sqrt_stage_3_finished | ||
301 | |||
302 | sqrt_stage_3_positive: | ||
303 | movl FPU_accum_2,%edx | ||
304 | movl FPU_accum_1,%eax | ||
305 | divl %esi | ||
306 | movl %eax,%ecx | ||
307 | |||
308 | movl %edx,%eax | ||
309 | divl %esi | ||
310 | |||
311 | sarl $1,%ecx /* divide by 2 */ | ||
312 | rcrl $1,%eax | ||
313 | |||
314 | /* prepare to round the result */ | ||
315 | |||
316 | notl %eax /* Negate the correction term */ | ||
317 | notl %ecx | ||
318 | addl $1,%eax | ||
319 | adcl $0,%ecx /* carry here ==> correction == 0 */ | ||
320 | adcl $0xffffffff,%esi | ||
321 | |||
322 | addl %ecx,%edi | ||
323 | adcl $0,%esi | ||
324 | |||
325 | sqrt_stage_3_finished: | ||
326 | |||
327 | /* | ||
328 | * The result in %esi:%edi:%esi should be good to about 90 bits here, | ||
329 | * and the rounding information here does not have sufficient accuracy | ||
330 | * in a few rare cases. | ||
331 | */ | ||
332 | cmpl $0xffffffe0,%eax | ||
333 | ja sqrt_near_exact_x | ||
334 | |||
335 | cmpl $0x00000020,%eax | ||
336 | jb sqrt_near_exact | ||
337 | |||
338 | cmpl $0x7fffffe0,%eax | ||
339 | jb sqrt_round_result | ||
340 | |||
341 | cmpl $0x80000020,%eax | ||
342 | jb sqrt_get_more_precision | ||
343 | |||
344 | sqrt_round_result: | ||
345 | /* Set up for rounding operations */ | ||
346 | movl %eax,%edx | ||
347 | movl %esi,%eax | ||
348 | movl %edi,%ebx | ||
349 | movl PARAM1,%edi | ||
350 | movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */ | ||
351 | jmp fpu_reg_round | ||
352 | |||
353 | |||
354 | sqrt_near_exact_x: | ||
355 | /* First, the estimate must be rounded up. */ | ||
356 | addl $1,%edi | ||
357 | adcl $0,%esi | ||
358 | |||
359 | sqrt_near_exact: | ||
360 | /* | ||
361 | * This is an easy case because x^1/2 is monotonic. | ||
362 | * We need just find the square of our estimate, compare it | ||
363 | * with the argument, and deduce whether our estimate is | ||
364 | * above, below, or exact. We use the fact that the estimate | ||
365 | * is known to be accurate to about 90 bits. | ||
366 | */ | ||
367 | movl %edi,%eax /* ls word of guess */ | ||
368 | mull %edi | ||
369 | movl %edx,%ebx /* 2nd ls word of square */ | ||
370 | movl %eax,%ecx /* ls word of square */ | ||
371 | |||
372 | movl %edi,%eax | ||
373 | mull %esi | ||
374 | addl %eax,%ebx | ||
375 | addl %eax,%ebx | ||
376 | |||
377 | #ifdef PARANOID | ||
378 | cmp $0xffffffb0,%ebx | ||
379 | jb sqrt_near_exact_ok | ||
380 | |||
381 | cmp $0x00000050,%ebx | ||
382 | ja sqrt_near_exact_ok | ||
383 | |||
384 | pushl EX_INTERNAL|0x214 | ||
385 | call EXCEPTION | ||
386 | |||
387 | sqrt_near_exact_ok: | ||
388 | #endif /* PARANOID */ | ||
389 | |||
390 | or %ebx,%ebx | ||
391 | js sqrt_near_exact_small | ||
392 | |||
393 | jnz sqrt_near_exact_large | ||
394 | |||
395 | or %ebx,%edx | ||
396 | jnz sqrt_near_exact_large | ||
397 | |||
398 | /* Our estimate is exactly the right answer */ | ||
399 | xorl %eax,%eax | ||
400 | jmp sqrt_round_result | ||
401 | |||
402 | sqrt_near_exact_small: | ||
403 | /* Our estimate is too small */ | ||
404 | movl $0x000000ff,%eax | ||
405 | jmp sqrt_round_result | ||
406 | |||
407 | sqrt_near_exact_large: | ||
408 | /* Our estimate is too large, we need to decrement it */ | ||
409 | subl $1,%edi | ||
410 | sbbl $0,%esi | ||
411 | movl $0xffffff00,%eax | ||
412 | jmp sqrt_round_result | ||
413 | |||
414 | |||
415 | sqrt_get_more_precision: | ||
416 | /* This case is almost the same as the above, except we start | ||
417 | with an extra bit of precision in the estimate. */ | ||
418 | stc /* The extra bit. */ | ||
419 | rcll $1,%edi /* Shift the estimate left one bit */ | ||
420 | rcll $1,%esi | ||
421 | |||
422 | movl %edi,%eax /* ls word of guess */ | ||
423 | mull %edi | ||
424 | movl %edx,%ebx /* 2nd ls word of square */ | ||
425 | movl %eax,%ecx /* ls word of square */ | ||
426 | |||
427 | movl %edi,%eax | ||
428 | mull %esi | ||
429 | addl %eax,%ebx | ||
430 | addl %eax,%ebx | ||
431 | |||
432 | /* Put our estimate back to its original value */ | ||
433 | stc /* The ms bit. */ | ||
434 | rcrl $1,%esi /* Shift the estimate left one bit */ | ||
435 | rcrl $1,%edi | ||
436 | |||
437 | #ifdef PARANOID | ||
438 | cmp $0xffffff60,%ebx | ||
439 | jb sqrt_more_prec_ok | ||
440 | |||
441 | cmp $0x000000a0,%ebx | ||
442 | ja sqrt_more_prec_ok | ||
443 | |||
444 | pushl EX_INTERNAL|0x215 | ||
445 | call EXCEPTION | ||
446 | |||
447 | sqrt_more_prec_ok: | ||
448 | #endif /* PARANOID */ | ||
449 | |||
450 | or %ebx,%ebx | ||
451 | js sqrt_more_prec_small | ||
452 | |||
453 | jnz sqrt_more_prec_large | ||
454 | |||
455 | or %ebx,%ecx | ||
456 | jnz sqrt_more_prec_large | ||
457 | |||
458 | /* Our estimate is exactly the right answer */ | ||
459 | movl $0x80000000,%eax | ||
460 | jmp sqrt_round_result | ||
461 | |||
462 | sqrt_more_prec_small: | ||
463 | /* Our estimate is too small */ | ||
464 | movl $0x800000ff,%eax | ||
465 | jmp sqrt_round_result | ||
466 | |||
467 | sqrt_more_prec_large: | ||
468 | /* Our estimate is too large */ | ||
469 | movl $0x7fffff00,%eax | ||
470 | jmp sqrt_round_result | ||