i386: move math-emu

Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Thomas Gleixner <tglx@linutronix.de> 2007-10-11 05:16:31 -0400
committer: Thomas Gleixner <tglx@linutronix.de> 2007-10-11 05:16:31 -0400
commit: da957e111bb0c189a4a3bf8a00caaecb59ed94ca (patch)
tree: 6916075fdd3e28869dcd3dfa2cf160a74d1cb02e /arch/x86/math-emu/wm_sqrt.S
parent: 2ec1df4130c60d1eb49dc0fa0ed15858fede6b05 (diff)
1 files changed, 470 insertions, 0 deletions
diff --git a/arch/x86/math-emu/wm_sqrt.S b/arch/x86/math-emu/wm_sqrt.S
new file mode 100644
index 000000000000..d258f59564e1
--- /dev/null
+++ b/arch/x86/math-emu/wm_sqrt.S
@@ -0,0 +1,470 @@
+        .file   "wm_sqrt.S"
+/*---------------------------------------------------------------------------+
+ |  wm_sqrt.S                                                                |
+ |                                                                           |
+ | Fixed point arithmetic square root evaluation.                            |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1995,1997                                         |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail billm@suburbia.net               |
+ |                                                                           |
+ | Call from C as:                                                           |
+ |    int wm_sqrt(FPU_REG *n, unsigned int control_word)                     |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+/*---------------------------------------------------------------------------+
+ |  wm_sqrt(FPU_REG *n, unsigned int control_word)                           |
+ |    returns the square root of n in n.                                     |
+ |                                                                           |
+ |  Use Newton's method to compute the square root of a number, which must   |
+ |  be in the range  [1.0 .. 4.0),  to 64 bits accuracy.                     |
+ |  Does not check the sign or tag of the argument.                          |
+ |  Sets the exponent, but not the sign or tag of the result.                |
+ |                                                                           |
+ |  The guess is kept in %esi:%edi                                           |
+ +---------------------------------------------------------------------------*/
+#include "exception.h"
+#include "fpu_emu.h"
+#ifndef NON_REENTRANT_FPU
+/*      Local storage on the stack: */
+#define FPU_accum_3     -4(%ebp)        /* ms word */
+#define FPU_accum_2     -8(%ebp)
+#define FPU_accum_1     -12(%ebp)
+#define FPU_accum_0     -16(%ebp)
+/*
+ * The de-normalised argument:
+ *                  sq_2                  sq_1              sq_0
+ *        b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
+ *           ^ binary point here
+ */
+#define FPU_fsqrt_arg_2 -20(%ebp)       /* ms word */
+#define FPU_fsqrt_arg_1 -24(%ebp)
+#define FPU_fsqrt_arg_0 -28(%ebp)       /* ls word, at most the ms bit is set */
+#else
+/*      Local storage in a static area: */
+.data
+        .align 4,0
+FPU_accum_3:
+        .long   0               /* ms word */
+FPU_accum_2:
+        .long   0
+FPU_accum_1:
+        .long   0
+FPU_accum_0:
+        .long   0
+/* The de-normalised argument:
+                    sq_2                  sq_1              sq_0
+          b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
+             ^ binary point here
+ */
+FPU_fsqrt_arg_2:
+        .long   0               /* ms word */
+FPU_fsqrt_arg_1:
+        .long   0
+FPU_fsqrt_arg_0:
+        .long   0               /* ls word, at most the ms bit is set */
+#endif /* NON_REENTRANT_FPU */ 
+.text
+ENTRY(wm_sqrt)
+        pushl   %ebp
+        movl    %esp,%ebp
+#ifndef NON_REENTRANT_FPU
+        subl    $28,%esp
+#endif /* NON_REENTRANT_FPU */
+        pushl   %esi
+        pushl   %edi
+        pushl   %ebx
+        movl    PARAM1,%esi
+        movl    SIGH(%esi),%eax
+        movl    SIGL(%esi),%ecx
+        xorl    %edx,%edx
+/* We use a rough linear estimate for the first guess.. */
+        cmpw    EXP_BIAS,EXP(%esi)
+        jnz     sqrt_arg_ge_2
+        shrl    $1,%eax                 /* arg is in the range  [1.0 .. 2.0) */
+        rcrl    $1,%ecx
+        rcrl    $1,%edx
+sqrt_arg_ge_2:
+/* From here on, n is never accessed directly again until it is
+   replaced by the answer. */
+        movl    %eax,FPU_fsqrt_arg_2            /* ms word of n */
+        movl    %ecx,FPU_fsqrt_arg_1
+        movl    %edx,FPU_fsqrt_arg_0
+/* Make a linear first estimate */
+        shrl    $1,%eax
+        addl    $0x40000000,%eax
+        movl    $0xaaaaaaaa,%ecx
+        mull    %ecx
+        shll    %edx                    /* max result was 7fff... */
+        testl   $0x80000000,%edx        /* but min was 3fff... */
+        jnz     sqrt_prelim_no_adjust
+        movl    $0x80000000,%edx        /* round up */
+sqrt_prelim_no_adjust:
+        movl    %edx,%esi       /* Our first guess */
+/* We have now computed (approx)   (2 + x) / 3, which forms the basis
+   for a few iterations of Newton's method */
+        movl    FPU_fsqrt_arg_2,%ecx    /* ms word */
+/*
+ * From our initial estimate, three iterations are enough to get us
+ * to 30 bits or so. This will then allow two iterations at better
+ * precision to complete the process.
+ */
+/* Compute  (g + n/g)/2  at each iteration (g is the guess). */
+        shrl    %ecx            /* Doing this first will prevent a divide */
+                                /* overflow later. */
+        movl    %ecx,%edx       /* msw of the arg / 2 */
+        divl    %esi            /* current estimate */
+        shrl    %esi            /* divide by 2 */
+        addl    %eax,%esi       /* the new estimate */
+        movl    %ecx,%edx
+        divl    %esi
+        shrl    %esi
+        addl    %eax,%esi
+        movl    %ecx,%edx
+        divl    %esi
+        shrl    %esi
+        addl    %eax,%esi
+/*
+ * Now that an estimate accurate to about 30 bits has been obtained (in %esi),
+ * we improve it to 60 bits or so.
+ *
+ * The strategy from now on is to compute new estimates from
+ *      guess := guess + (n - guess^2) / (2 * guess)
+ */
+/* First, find the square of the guess */
+        movl    %esi,%eax
+        mull    %esi
+/* guess^2 now in %edx:%eax */
+        movl    FPU_fsqrt_arg_1,%ecx
+        subl    %ecx,%eax
+        movl    FPU_fsqrt_arg_2,%ecx    /* ms word of normalized n */
+        sbbl    %ecx,%edx
+        jnc     sqrt_stage_2_positive
+/* Subtraction gives a negative result,
+   negate the result before division. */
+        notl    %edx
+        notl    %eax
+        addl    $1,%eax
+        adcl    $0,%edx
+        divl    %esi
+        movl    %eax,%ecx
+        movl    %edx,%eax
+        divl    %esi
+        jmp     sqrt_stage_2_finish
+sqrt_stage_2_positive:
+        divl    %esi
+        movl    %eax,%ecx
+        movl    %edx,%eax
+        divl    %esi
+        notl    %ecx
+        notl    %eax
+        addl    $1,%eax
+        adcl    $0,%ecx
+sqrt_stage_2_finish:
+        sarl    $1,%ecx         /* divide by 2 */
+        rcrl    $1,%eax
+        /* Form the new estimate in %esi:%edi */
+        movl    %eax,%edi
+        addl    %ecx,%esi
+        jnz     sqrt_stage_2_done       /* result should be [1..2) */
+#ifdef PARANOID
+/* It should be possible to get here only if the arg is ffff....ffff */
+        cmp     $0xffffffff,FPU_fsqrt_arg_1
+        jnz     sqrt_stage_2_error
+#endif /* PARANOID */
+/* The best rounded result. */
+        xorl    %eax,%eax
+        decl    %eax
+        movl    %eax,%edi
+        movl    %eax,%esi
+        movl    $0x7fffffff,%eax
+        jmp     sqrt_round_result
+#ifdef PARANOID
+sqrt_stage_2_error:
+        pushl   EX_INTERNAL|0x213
+        call    EXCEPTION
+#endif /* PARANOID */ 
+sqrt_stage_2_done:
+/* Now the square root has been computed to better than 60 bits. */
+/* Find the square of the guess. */
+        movl    %edi,%eax               /* ls word of guess */
+        mull    %edi
+        movl    %edx,FPU_accum_1
+        movl    %esi,%eax
+        mull    %esi
+        movl    %edx,FPU_accum_3
+        movl    %eax,FPU_accum_2
+        movl    %edi,%eax
+        mull    %esi
+        addl    %eax,FPU_accum_1
+        adcl    %edx,FPU_accum_2
+        adcl    $0,FPU_accum_3
+/*      movl    %esi,%eax */
+/*      mull    %edi */
+        addl    %eax,FPU_accum_1
+        adcl    %edx,FPU_accum_2
+        adcl    $0,FPU_accum_3
+/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
+        movl    FPU_fsqrt_arg_0,%eax            /* get normalized n */
+        subl    %eax,FPU_accum_1
+        movl    FPU_fsqrt_arg_1,%eax
+        sbbl    %eax,FPU_accum_2
+        movl    FPU_fsqrt_arg_2,%eax            /* ms word of normalized n */
+        sbbl    %eax,FPU_accum_3
+        jnc     sqrt_stage_3_positive
+/* Subtraction gives a negative result,
+   negate the result before division */
+        notl    FPU_accum_1
+        notl    FPU_accum_2
+        notl    FPU_accum_3
+        addl    $1,FPU_accum_1
+        adcl    $0,FPU_accum_2
+#ifdef PARANOID
+        adcl    $0,FPU_accum_3  /* This must be zero */
+        jz      sqrt_stage_3_no_error
+sqrt_stage_3_error:
+        pushl   EX_INTERNAL|0x207
+        call    EXCEPTION
+sqrt_stage_3_no_error:
+#endif /* PARANOID */
+        movl    FPU_accum_2,%edx
+        movl    FPU_accum_1,%eax
+        divl    %esi
+        movl    %eax,%ecx
+        movl    %edx,%eax
+        divl    %esi
+        sarl    $1,%ecx         /* divide by 2 */
+        rcrl    $1,%eax
+        /* prepare to round the result */
+        addl    %ecx,%edi
+        adcl    $0,%esi
+        jmp     sqrt_stage_3_finished
+sqrt_stage_3_positive:
+        movl    FPU_accum_2,%edx
+        movl    FPU_accum_1,%eax
+        divl    %esi
+        movl    %eax,%ecx
+        movl    %edx,%eax
+        divl    %esi
+        sarl    $1,%ecx         /* divide by 2 */
+        rcrl    $1,%eax
+        /* prepare to round the result */
+        notl    %eax            /* Negate the correction term */
+        notl    %ecx
+        addl    $1,%eax
+        adcl    $0,%ecx         /* carry here ==> correction == 0 */
+        adcl    $0xffffffff,%esi
+        addl    %ecx,%edi
+        adcl    $0,%esi
+sqrt_stage_3_finished:
+/*
+ * The result in %esi:%edi:%esi should be good to about 90 bits here,
+ * and the rounding information here does not have sufficient accuracy
+ * in a few rare cases.
+ */
+        cmpl    $0xffffffe0,%eax
+        ja      sqrt_near_exact_x
+        cmpl    $0x00000020,%eax
+        jb      sqrt_near_exact
+        cmpl    $0x7fffffe0,%eax
+        jb      sqrt_round_result
+        cmpl    $0x80000020,%eax
+        jb      sqrt_get_more_precision
+sqrt_round_result:
+/* Set up for rounding operations */
+        movl    %eax,%edx
+        movl    %esi,%eax
+        movl    %edi,%ebx
+        movl    PARAM1,%edi
+        movw    EXP_BIAS,EXP(%edi)      /* Result is in  [1.0 .. 2.0) */
+        jmp     fpu_reg_round
+sqrt_near_exact_x:
+/* First, the estimate must be rounded up. */
+        addl    $1,%edi
+        adcl    $0,%esi
+sqrt_near_exact:
+/*
+ * This is an easy case because x^1/2 is monotonic.
+ * We need just find the square of our estimate, compare it
+ * with the argument, and deduce whether our estimate is
+ * above, below, or exact. We use the fact that the estimate
+ * is known to be accurate to about 90 bits.
+ */
+        movl    %edi,%eax               /* ls word of guess */
+        mull    %edi
+        movl    %edx,%ebx               /* 2nd ls word of square */
+        movl    %eax,%ecx               /* ls word of square */
+        movl    %edi,%eax
+        mull    %esi
+        addl    %eax,%ebx
+        addl    %eax,%ebx
+#ifdef PARANOID
+        cmp     $0xffffffb0,%ebx
+        jb      sqrt_near_exact_ok
+        cmp     $0x00000050,%ebx
+        ja      sqrt_near_exact_ok
+        pushl   EX_INTERNAL|0x214
+        call    EXCEPTION
+sqrt_near_exact_ok:
+#endif /* PARANOID */ 
+        or      %ebx,%ebx
+        js      sqrt_near_exact_small
+        jnz     sqrt_near_exact_large
+        or      %ebx,%edx
+        jnz     sqrt_near_exact_large
+/* Our estimate is exactly the right answer */
+        xorl    %eax,%eax
+        jmp     sqrt_round_result
+sqrt_near_exact_small:
+/* Our estimate is too small */
+        movl    $0x000000ff,%eax
+        jmp     sqrt_round_result
+        
+sqrt_near_exact_large:
+/* Our estimate is too large, we need to decrement it */
+        subl    $1,%edi
+        sbbl    $0,%esi
+        movl    $0xffffff00,%eax
+        jmp     sqrt_round_result
+sqrt_get_more_precision:
+/* This case is almost the same as the above, except we start
+   with an extra bit of precision in the estimate. */
+        stc                     /* The extra bit. */
+        rcll    $1,%edi         /* Shift the estimate left one bit */
+        rcll    $1,%esi
+        movl    %edi,%eax               /* ls word of guess */
+        mull    %edi
+        movl    %edx,%ebx               /* 2nd ls word of square */
+        movl    %eax,%ecx               /* ls word of square */
+        movl    %edi,%eax
+        mull    %esi
+        addl    %eax,%ebx
+        addl    %eax,%ebx
+/* Put our estimate back to its original value */
+        stc                     /* The ms bit. */
+        rcrl    $1,%esi         /* Shift the estimate left one bit */
+        rcrl    $1,%edi
+#ifdef PARANOID
+        cmp     $0xffffff60,%ebx
+        jb      sqrt_more_prec_ok
+        cmp     $0x000000a0,%ebx
+        ja      sqrt_more_prec_ok
+        pushl   EX_INTERNAL|0x215
+        call    EXCEPTION
+sqrt_more_prec_ok:
+#endif /* PARANOID */ 
+        or      %ebx,%ebx
+        js      sqrt_more_prec_small
+        jnz     sqrt_more_prec_large
+        or      %ebx,%ecx
+        jnz     sqrt_more_prec_large
+/* Our estimate is exactly the right answer */
+        movl    $0x80000000,%eax
+        jmp     sqrt_round_result
+sqrt_more_prec_small:
+/* Our estimate is too small */
+        movl    $0x800000ff,%eax
+        jmp     sqrt_round_result
+        
+sqrt_more_prec_large:
+/* Our estimate is too large */
+        movl    $0x7fffff00,%eax
+        jmp     sqrt_round_result
author	Thomas Gleixner <tglx@linutronix.de>	2007-10-11 05:16:31 -0400
committer	Thomas Gleixner <tglx@linutronix.de>	2007-10-11 05:16:31 -0400
commit	da957e111bb0c189a4a3bf8a00caaecb59ed94ca (patch)
tree	6916075fdd3e28869dcd3dfa2cf160a74d1cb02e /arch/x86/math-emu/wm_sqrt.S
parent	2ec1df4130c60d1eb49dc0fa0ed15858fede6b05 (diff)

diff --git a/arch/x86/math-emu/wm_sqrt.S b/arch/x86/math-emu/wm_sqrt.S new file mode 100644 index 000000000000..d258f59564e1 --- /dev/null +++ b/arch/x86/math-emu/wm_sqrt.S
@@ -0,0 +1,470 @@
	1	.file "wm_sqrt.S"
	2	/*---------------------------------------------------------------------------+
	3	\| wm_sqrt.S \|
	4	\| \|
	5	\| Fixed point arithmetic square root evaluation. \|
	6	\| \|
	7	\| Copyright (C) 1992,1993,1995,1997 \|
	8	\| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, \|
	9	\| Australia. E-mail billm@suburbia.net \|
	10	\| \|
	11	\| Call from C as: \|
	12	\| int wm_sqrt(FPU_REG *n, unsigned int control_word) \|
	13	\| \|
	14	+---------------------------------------------------------------------------*/
	15
	16	/*---------------------------------------------------------------------------+
	17	\| wm_sqrt(FPU_REG *n, unsigned int control_word) \|
	18	\| returns the square root of n in n. \|
	19	\| \|
	20	\| Use Newton's method to compute the square root of a number, which must \|
	21	\| be in the range [1.0 .. 4.0), to 64 bits accuracy. \|
	22	\| Does not check the sign or tag of the argument. \|
	23	\| Sets the exponent, but not the sign or tag of the result. \|
	24	\| \|
	25	\| The guess is kept in %esi:%edi \|
	26	+---------------------------------------------------------------------------*/
	27
	28	#include "exception.h"
	29	#include "fpu_emu.h"
	30
	31
	32	#ifndef NON_REENTRANT_FPU
	33	/* Local storage on the stack: */
	34	#define FPU_accum_3 -4(%ebp) /* ms word */
	35	#define FPU_accum_2 -8(%ebp)
	36	#define FPU_accum_1 -12(%ebp)
	37	#define FPU_accum_0 -16(%ebp)
	38
	39	/*
	40	* The de-normalised argument:
	41	* sq_2 sq_1 sq_0
	42	* b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
	43	* ^ binary point here
	44	*/
	45	#define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */
	46	#define FPU_fsqrt_arg_1 -24(%ebp)
	47	#define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */
	48
	49	#else
	50	/* Local storage in a static area: */
	51	.data
	52	.align 4,0
	53	FPU_accum_3:
	54	.long 0 /* ms word */
	55	FPU_accum_2:
	56	.long 0
	57	FPU_accum_1:
	58	.long 0
	59	FPU_accum_0:
	60	.long 0
	61
	62	/* The de-normalised argument:
	63	sq_2 sq_1 sq_0
	64	b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
	65	^ binary point here
	66	*/
	67	FPU_fsqrt_arg_2:
	68	.long 0 /* ms word */
	69	FPU_fsqrt_arg_1:
	70	.long 0
	71	FPU_fsqrt_arg_0:
	72	.long 0 /* ls word, at most the ms bit is set */
	73	#endif /* NON_REENTRANT_FPU */
	74
	75
	76	.text
	77	ENTRY(wm_sqrt)
	78	pushl %ebp
	79	movl %esp,%ebp
	80	#ifndef NON_REENTRANT_FPU
	81	subl $28,%esp
	82	#endif /* NON_REENTRANT_FPU */
	83	pushl %esi
	84	pushl %edi
	85	pushl %ebx
	86
	87	movl PARAM1,%esi
	88
	89	movl SIGH(%esi),%eax
	90	movl SIGL(%esi),%ecx
	91	xorl %edx,%edx
	92
	93	/* We use a rough linear estimate for the first guess.. */
	94
	95	cmpw EXP_BIAS,EXP(%esi)
	96	jnz sqrt_arg_ge_2
	97
	98	shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */
	99	rcrl $1,%ecx
	100	rcrl $1,%edx
	101
	102	sqrt_arg_ge_2:
	103	/* From here on, n is never accessed directly again until it is
	104	replaced by the answer. */
	105
	106	movl %eax,FPU_fsqrt_arg_2 /* ms word of n */
	107	movl %ecx,FPU_fsqrt_arg_1
	108	movl %edx,FPU_fsqrt_arg_0
	109
	110	/* Make a linear first estimate */
	111	shrl $1,%eax
	112	addl $0x40000000,%eax
	113	movl $0xaaaaaaaa,%ecx
	114	mull %ecx
	115	shll %edx /* max result was 7fff... */
	116	testl $0x80000000,%edx /* but min was 3fff... */
	117	jnz sqrt_prelim_no_adjust
	118
	119	movl $0x80000000,%edx /* round up */
	120
	121	sqrt_prelim_no_adjust:
	122	movl %edx,%esi /* Our first guess */
	123
	124	/* We have now computed (approx) (2 + x) / 3, which forms the basis
	125	for a few iterations of Newton's method */
	126
	127	movl FPU_fsqrt_arg_2,%ecx /* ms word */
	128
	129	/*
	130	* From our initial estimate, three iterations are enough to get us
	131	* to 30 bits or so. This will then allow two iterations at better
	132	* precision to complete the process.
	133	*/
	134
	135	/* Compute (g + n/g)/2 at each iteration (g is the guess). */
	136	shrl %ecx /* Doing this first will prevent a divide */
	137	/* overflow later. */
	138
	139	movl %ecx,%edx /* msw of the arg / 2 */
	140	divl %esi /* current estimate */
	141	shrl %esi /* divide by 2 */
	142	addl %eax,%esi /* the new estimate */
	143
	144	movl %ecx,%edx
	145	divl %esi
	146	shrl %esi
	147	addl %eax,%esi
	148
	149	movl %ecx,%edx
	150	divl %esi
	151	shrl %esi
	152	addl %eax,%esi
	153
	154	/*
	155	* Now that an estimate accurate to about 30 bits has been obtained (in %esi),
	156	* we improve it to 60 bits or so.
	157	*
	158	* The strategy from now on is to compute new estimates from
	159	* guess := guess + (n - guess^2) / (2 * guess)
	160	*/
	161
	162	/* First, find the square of the guess */
	163	movl %esi,%eax
	164	mull %esi
	165	/* guess^2 now in %edx:%eax */
	166
	167	movl FPU_fsqrt_arg_1,%ecx
	168	subl %ecx,%eax
	169	movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */
	170	sbbl %ecx,%edx
	171	jnc sqrt_stage_2_positive
	172
	173	/* Subtraction gives a negative result,
	174	negate the result before division. */
	175	notl %edx
	176	notl %eax
	177	addl $1,%eax
	178	adcl $0,%edx
	179
	180	divl %esi
	181	movl %eax,%ecx
	182
	183	movl %edx,%eax
	184	divl %esi
	185	jmp sqrt_stage_2_finish
	186
	187	sqrt_stage_2_positive:
	188	divl %esi
	189	movl %eax,%ecx
	190
	191	movl %edx,%eax
	192	divl %esi
	193
	194	notl %ecx
	195	notl %eax
	196	addl $1,%eax
	197	adcl $0,%ecx
	198
	199	sqrt_stage_2_finish:
	200	sarl $1,%ecx /* divide by 2 */
	201	rcrl $1,%eax
	202
	203	/* Form the new estimate in %esi:%edi */
	204	movl %eax,%edi
	205	addl %ecx,%esi
	206
	207	jnz sqrt_stage_2_done /* result should be [1..2) */
	208
	209	#ifdef PARANOID
	210	/* It should be possible to get here only if the arg is ffff....ffff */
	211	cmp $0xffffffff,FPU_fsqrt_arg_1
	212	jnz sqrt_stage_2_error
	213	#endif /* PARANOID */
	214
	215	/* The best rounded result. */
	216	xorl %eax,%eax
	217	decl %eax
	218	movl %eax,%edi
	219	movl %eax,%esi
	220	movl $0x7fffffff,%eax
	221	jmp sqrt_round_result
	222
	223	#ifdef PARANOID
	224	sqrt_stage_2_error:
	225	pushl EX_INTERNAL\|0x213
	226	call EXCEPTION
	227	#endif /* PARANOID */
	228
	229	sqrt_stage_2_done:
	230
	231	/* Now the square root has been computed to better than 60 bits. */
	232
	233	/* Find the square of the guess. */
	234	movl %edi,%eax /* ls word of guess */
	235	mull %edi
	236	movl %edx,FPU_accum_1
	237
	238	movl %esi,%eax
	239	mull %esi
	240	movl %edx,FPU_accum_3
	241	movl %eax,FPU_accum_2
	242
	243	movl %edi,%eax
	244	mull %esi
	245	addl %eax,FPU_accum_1
	246	adcl %edx,FPU_accum_2
	247	adcl $0,FPU_accum_3
	248
	249	/* movl %esi,%eax */
	250	/* mull %edi */
	251	addl %eax,FPU_accum_1
	252	adcl %edx,FPU_accum_2
	253	adcl $0,FPU_accum_3
	254
	255	/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
	256
	257	movl FPU_fsqrt_arg_0,%eax /* get normalized n */
	258	subl %eax,FPU_accum_1
	259	movl FPU_fsqrt_arg_1,%eax
	260	sbbl %eax,FPU_accum_2
	261	movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */
	262	sbbl %eax,FPU_accum_3
	263	jnc sqrt_stage_3_positive
	264
	265	/* Subtraction gives a negative result,
	266	negate the result before division */
	267	notl FPU_accum_1
	268	notl FPU_accum_2
	269	notl FPU_accum_3
	270	addl $1,FPU_accum_1
	271	adcl $0,FPU_accum_2
	272
	273	#ifdef PARANOID
	274	adcl $0,FPU_accum_3 /* This must be zero */
	275	jz sqrt_stage_3_no_error
	276
	277	sqrt_stage_3_error:
	278	pushl EX_INTERNAL\|0x207
	279	call EXCEPTION
	280
	281	sqrt_stage_3_no_error:
	282	#endif /* PARANOID */
	283
	284	movl FPU_accum_2,%edx
	285	movl FPU_accum_1,%eax
	286	divl %esi
	287	movl %eax,%ecx
	288
	289	movl %edx,%eax
	290	divl %esi
	291
	292	sarl $1,%ecx /* divide by 2 */
	293	rcrl $1,%eax
	294
	295	/* prepare to round the result */
	296
	297	addl %ecx,%edi
	298	adcl $0,%esi
	299
	300	jmp sqrt_stage_3_finished
	301
	302	sqrt_stage_3_positive:
	303	movl FPU_accum_2,%edx
	304	movl FPU_accum_1,%eax
	305	divl %esi
	306	movl %eax,%ecx
	307
	308	movl %edx,%eax
	309	divl %esi
	310
	311	sarl $1,%ecx /* divide by 2 */
	312	rcrl $1,%eax
	313
	314	/* prepare to round the result */
	315
	316	notl %eax /* Negate the correction term */
	317	notl %ecx
	318	addl $1,%eax
	319	adcl $0,%ecx /* carry here ==> correction == 0 */
	320	adcl $0xffffffff,%esi
	321
	322	addl %ecx,%edi
	323	adcl $0,%esi
	324
	325	sqrt_stage_3_finished:
	326
	327	/*
	328	* The result in %esi:%edi:%esi should be good to about 90 bits here,
	329	* and the rounding information here does not have sufficient accuracy
	330	* in a few rare cases.
	331	*/
	332	cmpl $0xffffffe0,%eax
	333	ja sqrt_near_exact_x
	334
	335	cmpl $0x00000020,%eax
	336	jb sqrt_near_exact
	337
	338	cmpl $0x7fffffe0,%eax
	339	jb sqrt_round_result
	340
	341	cmpl $0x80000020,%eax
	342	jb sqrt_get_more_precision
	343
	344	sqrt_round_result:
	345	/* Set up for rounding operations */
	346	movl %eax,%edx
	347	movl %esi,%eax
	348	movl %edi,%ebx
	349	movl PARAM1,%edi
	350	movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */
	351	jmp fpu_reg_round
	352
	353
	354	sqrt_near_exact_x:
	355	/* First, the estimate must be rounded up. */
	356	addl $1,%edi
	357	adcl $0,%esi
	358
	359	sqrt_near_exact:
	360	/*
	361	* This is an easy case because x^1/2 is monotonic.
	362	* We need just find the square of our estimate, compare it
	363	* with the argument, and deduce whether our estimate is
	364	* above, below, or exact. We use the fact that the estimate
	365	* is known to be accurate to about 90 bits.
	366	*/
	367	movl %edi,%eax /* ls word of guess */
	368	mull %edi
	369	movl %edx,%ebx /* 2nd ls word of square */
	370	movl %eax,%ecx /* ls word of square */
	371
	372	movl %edi,%eax
	373	mull %esi
	374	addl %eax,%ebx
	375	addl %eax,%ebx
	376
	377	#ifdef PARANOID
	378	cmp $0xffffffb0,%ebx
	379	jb sqrt_near_exact_ok
	380
	381	cmp $0x00000050,%ebx
	382	ja sqrt_near_exact_ok
	383
	384	pushl EX_INTERNAL\|0x214
	385	call EXCEPTION
	386
	387	sqrt_near_exact_ok:
	388	#endif /* PARANOID */
	389
	390	or %ebx,%ebx
	391	js sqrt_near_exact_small
	392
	393	jnz sqrt_near_exact_large
	394
	395	or %ebx,%edx
	396	jnz sqrt_near_exact_large
	397
	398	/* Our estimate is exactly the right answer */
	399	xorl %eax,%eax
	400	jmp sqrt_round_result
	401
	402	sqrt_near_exact_small:
	403	/* Our estimate is too small */
	404	movl $0x000000ff,%eax
	405	jmp sqrt_round_result
	406
	407	sqrt_near_exact_large:
	408	/* Our estimate is too large, we need to decrement it */
	409	subl $1,%edi
	410	sbbl $0,%esi
	411	movl $0xffffff00,%eax
	412	jmp sqrt_round_result
	413
	414
	415	sqrt_get_more_precision:
	416	/* This case is almost the same as the above, except we start
	417	with an extra bit of precision in the estimate. */
	418	stc /* The extra bit. */
	419	rcll $1,%edi /* Shift the estimate left one bit */
	420	rcll $1,%esi
	421
	422	movl %edi,%eax /* ls word of guess */
	423	mull %edi
	424	movl %edx,%ebx /* 2nd ls word of square */
	425	movl %eax,%ecx /* ls word of square */
	426
	427	movl %edi,%eax
	428	mull %esi
	429	addl %eax,%ebx
	430	addl %eax,%ebx
	431
	432	/* Put our estimate back to its original value */
	433	stc /* The ms bit. */
	434	rcrl $1,%esi /* Shift the estimate left one bit */
	435	rcrl $1,%edi
	436
	437	#ifdef PARANOID
	438	cmp $0xffffff60,%ebx
	439	jb sqrt_more_prec_ok
	440
	441	cmp $0x000000a0,%ebx
	442	ja sqrt_more_prec_ok
	443
	444	pushl EX_INTERNAL\|0x215
	445	call EXCEPTION
	446
	447	sqrt_more_prec_ok:
	448	#endif /* PARANOID */
	449
	450	or %ebx,%ebx
	451	js sqrt_more_prec_small
	452
	453	jnz sqrt_more_prec_large
	454
	455	or %ebx,%ecx
	456	jnz sqrt_more_prec_large
	457
	458	/* Our estimate is exactly the right answer */
	459	movl $0x80000000,%eax
	460	jmp sqrt_round_result
	461
	462	sqrt_more_prec_small:
	463	/* Our estimate is too small */
	464	movl $0x800000ff,%eax
	465	jmp sqrt_round_result
	466
	467	sqrt_more_prec_large:
	468	/* Our estimate is too large */
	469	movl $0x7fffff00,%eax
	470	jmp sqrt_round_result