i386: move math-emu

Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Thomas Gleixner <tglx@linutronix.de> 2007-10-11 05:16:31 -0400
committer: Thomas Gleixner <tglx@linutronix.de> 2007-10-11 05:16:31 -0400
commit: da957e111bb0c189a4a3bf8a00caaecb59ed94ca (patch)
tree: 6916075fdd3e28869dcd3dfa2cf160a74d1cb02e /arch/x86/math-emu/poly_sin.c
parent: 2ec1df4130c60d1eb49dc0fa0ed15858fede6b05 (diff)
1 files changed, 397 insertions, 0 deletions
diff --git a/arch/x86/math-emu/poly_sin.c b/arch/x86/math-emu/poly_sin.c
new file mode 100644
index 000000000000..a36313fb06f1
--- /dev/null
+++ b/arch/x86/math-emu/poly_sin.c
@@ -0,0 +1,397 @@
+/*---------------------------------------------------------------------------+
+ |  poly_sin.c                                                               |
+ |                                                                           |
+ |  Computation of an approximation of the sin function and the cosine       |
+ |  function by a polynomial.                                                |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994,1997,1999                                    |
+ |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
+ |                  E-mail   billm@melbpc.org.au                             |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+#include "exception.h"
+#include "reg_constant.h"
+#include "fpu_emu.h"
+#include "fpu_system.h"
+#include "control_w.h"
+#include "poly.h"
+#define N_COEFF_P       4
+#define N_COEFF_N       4
+static const unsigned long long pos_terms_l[N_COEFF_P] =
+{
+  0xaaaaaaaaaaaaaaabLL,
+  0x00d00d00d00cf906LL,
+  0x000006b99159a8bbLL,
+  0x000000000d7392e6LL
+};
+static const unsigned long long neg_terms_l[N_COEFF_N] =
+{
+  0x2222222222222167LL,
+  0x0002e3bc74aab624LL,
+  0x0000000b09229062LL,
+  0x00000000000c7973LL
+};
+#define N_COEFF_PH      4
+#define N_COEFF_NH      4
+static const unsigned long long pos_terms_h[N_COEFF_PH] =
+{
+  0x0000000000000000LL,
+  0x05b05b05b05b0406LL,
+  0x000049f93edd91a9LL,
+  0x00000000c9c9ed62LL
+};
+static const unsigned long long neg_terms_h[N_COEFF_NH] =
+{
+  0xaaaaaaaaaaaaaa98LL,
+  0x001a01a01a019064LL,
+  0x0000008f76c68a77LL,
+  0x0000000000d58f5eLL
+};
+/*--- poly_sine() -----------------------------------------------------------+
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+void    poly_sine(FPU_REG *st0_ptr)
+{
+  int                 exponent, echange;
+  Xsig                accumulator, argSqrd, argTo4;
+  unsigned long       fix_up, adj;
+  unsigned long long  fixed_arg;
+  FPU_REG             result;
+  exponent = exponent(st0_ptr);
+  accumulator.lsw = accumulator.midw = accumulator.msw = 0;
+  /* Split into two ranges, for arguments below and above 1.0 */
+  /* The boundary between upper and lower is approx 0.88309101259 */
+  if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa)) )
+    {
+      /* The argument is <= 0.88309101259 */
+      argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl; argSqrd.lsw = 0;
+      mul64_Xsig(&argSqrd, &significand(st0_ptr));
+      shr_Xsig(&argSqrd, 2*(-1-exponent));
+      argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
+      argTo4.lsw = argSqrd.lsw;
+      mul_Xsig_Xsig(&argTo4, &argTo4);
+      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
+                      N_COEFF_N-1);
+      mul_Xsig_Xsig(&accumulator, &argSqrd);
+      negate_Xsig(&accumulator);
+      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
+                      N_COEFF_P-1);
+      shr_Xsig(&accumulator, 2);    /* Divide by four */
+      accumulator.msw |= 0x80000000;  /* Add 1.0 */
+      mul64_Xsig(&accumulator, &significand(st0_ptr));
+      mul64_Xsig(&accumulator, &significand(st0_ptr));
+      mul64_Xsig(&accumulator, &significand(st0_ptr));
+      /* Divide by four, FPU_REG compatible, etc */
+      exponent = 3*exponent;
+      /* The minimum exponent difference is 3 */
+      shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
+      negate_Xsig(&accumulator);
+      XSIG_LL(accumulator) += significand(st0_ptr);
+      echange = round_Xsig(&accumulator);
+      setexponentpos(&result, exponent(st0_ptr) + echange);
+    }
+  else
+    {
+      /* The argument is > 0.88309101259 */
+      /* We use sin(st(0)) = cos(pi/2-st(0)) */
+      fixed_arg = significand(st0_ptr);
+      if ( exponent == 0 )
+        {
+          /* The argument is >= 1.0 */
+          /* Put the binary point at the left. */
+          fixed_arg <<= 1;
+        }
+      /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
+      fixed_arg = 0x921fb54442d18469LL - fixed_arg;
+      /* There is a special case which arises due to rounding, to fix here. */
+      if ( fixed_arg == 0xffffffffffffffffLL )
+        fixed_arg = 0;
+      XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0;
+      mul64_Xsig(&argSqrd, &fixed_arg);
+      XSIG_LL(argTo4) = XSIG_LL(argSqrd); argTo4.lsw = argSqrd.lsw;
+      mul_Xsig_Xsig(&argTo4, &argTo4);
+      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
+                      N_COEFF_NH-1);
+      mul_Xsig_Xsig(&accumulator, &argSqrd);
+      negate_Xsig(&accumulator);
+      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
+                      N_COEFF_PH-1);
+      negate_Xsig(&accumulator);
+      mul64_Xsig(&accumulator, &fixed_arg);
+      mul64_Xsig(&accumulator, &fixed_arg);
+      shr_Xsig(&accumulator, 3);
+      negate_Xsig(&accumulator);
+      add_Xsig_Xsig(&accumulator, &argSqrd);
+      shr_Xsig(&accumulator, 1);
+      accumulator.lsw |= 1;  /* A zero accumulator here would cause problems */
+      negate_Xsig(&accumulator);
+      /* The basic computation is complete. Now fix the answer to
+         compensate for the error due to the approximation used for
+         pi/2
+         */
+      /* This has an exponent of -65 */
+      fix_up = 0x898cc517;
+      /* The fix-up needs to be improved for larger args */
+      if ( argSqrd.msw & 0xffc00000 )
+        {
+          /* Get about 32 bit precision in these: */
+          fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
+        }
+      fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
+      adj = accumulator.lsw;    /* temp save */
+      accumulator.lsw -= fix_up;
+      if ( accumulator.lsw > adj )
+        XSIG_LL(accumulator) --;
+      echange = round_Xsig(&accumulator);
+      setexponentpos(&result, echange - 1);
+    }
+  significand(&result) = XSIG_LL(accumulator);
+  setsign(&result, getsign(st0_ptr));
+  FPU_copy_to_reg0(&result, TAG_Valid);
+#ifdef PARANOID
+  if ( (exponent(&result) >= 0)
+      && (significand(&result) > 0x8000000000000000LL) )
+    {
+      EXCEPTION(EX_INTERNAL|0x150);
+    }
+#endif /* PARANOID */
+}
+/*--- poly_cos() ------------------------------------------------------------+
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+void    poly_cos(FPU_REG *st0_ptr)
+{
+  FPU_REG             result;
+  long int            exponent, exp2, echange;
+  Xsig                accumulator, argSqrd, fix_up, argTo4;
+  unsigned long long  fixed_arg;
+#ifdef PARANOID
+  if ( (exponent(st0_ptr) > 0)
+      || ((exponent(st0_ptr) == 0)
+          && (significand(st0_ptr) > 0xc90fdaa22168c234LL)) )
+    {
+      EXCEPTION(EX_Invalid);
+      FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
+      return;
+    }
+#endif /* PARANOID */
+  exponent = exponent(st0_ptr);
+  accumulator.lsw = accumulator.midw = accumulator.msw = 0;
+  if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54)) )
+    {
+      /* arg is < 0.687705 */
+      argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl;
+      argSqrd.lsw = 0;
+      mul64_Xsig(&argSqrd, &significand(st0_ptr));
+      if ( exponent < -1 )
+        {
+          /* shift the argument right by the required places */
+          shr_Xsig(&argSqrd, 2*(-1-exponent));
+        }
+      argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
+      argTo4.lsw = argSqrd.lsw;
+      mul_Xsig_Xsig(&argTo4, &argTo4);
+      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
+                      N_COEFF_NH-1);
+      mul_Xsig_Xsig(&accumulator, &argSqrd);
+      negate_Xsig(&accumulator);
+      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
+                      N_COEFF_PH-1);
+      negate_Xsig(&accumulator);
+      mul64_Xsig(&accumulator, &significand(st0_ptr));
+      mul64_Xsig(&accumulator, &significand(st0_ptr));
+      shr_Xsig(&accumulator, -2*(1+exponent));
+      shr_Xsig(&accumulator, 3);
+      negate_Xsig(&accumulator);
+      add_Xsig_Xsig(&accumulator, &argSqrd);
+      shr_Xsig(&accumulator, 1);
+      /* It doesn't matter if accumulator is all zero here, the
+         following code will work ok */
+      negate_Xsig(&accumulator);
+      if ( accumulator.lsw & 0x80000000 )
+        XSIG_LL(accumulator) ++;
+      if ( accumulator.msw == 0 )
+        {
+          /* The result is 1.0 */
+          FPU_copy_to_reg0(&CONST_1, TAG_Valid);
+          return;
+        }
+      else
+        {
+          significand(&result) = XSIG_LL(accumulator);
+      
+          /* will be a valid positive nr with expon = -1 */
+          setexponentpos(&result, -1);
+        }
+    }
+  else
+    {
+      fixed_arg = significand(st0_ptr);
+      if ( exponent == 0 )
+        {
+          /* The argument is >= 1.0 */
+          /* Put the binary point at the left. */
+          fixed_arg <<= 1;
+        }
+      /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
+      fixed_arg = 0x921fb54442d18469LL - fixed_arg;
+      /* There is a special case which arises due to rounding, to fix here. */
+      if ( fixed_arg == 0xffffffffffffffffLL )
+        fixed_arg = 0;
+      exponent = -1;
+      exp2 = -1;
+      /* A shift is needed here only for a narrow range of arguments,
+         i.e. for fixed_arg approx 2^-32, but we pick up more... */
+      if ( !(LL_MSW(fixed_arg) & 0xffff0000) )
+        {
+          fixed_arg <<= 16;
+          exponent -= 16;
+          exp2 -= 16;
+        }
+      XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0;
+      mul64_Xsig(&argSqrd, &fixed_arg);
+      if ( exponent < -1 )
+        {
+          /* shift the argument right by the required places */
+          shr_Xsig(&argSqrd, 2*(-1-exponent));
+        }
+      argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
+      argTo4.lsw = argSqrd.lsw;
+      mul_Xsig_Xsig(&argTo4, &argTo4);
+      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
+                      N_COEFF_N-1);
+      mul_Xsig_Xsig(&accumulator, &argSqrd);
+      negate_Xsig(&accumulator);
+      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
+                      N_COEFF_P-1);
+      shr_Xsig(&accumulator, 2);    /* Divide by four */
+      accumulator.msw |= 0x80000000;  /* Add 1.0 */
+      mul64_Xsig(&accumulator, &fixed_arg);
+      mul64_Xsig(&accumulator, &fixed_arg);
+      mul64_Xsig(&accumulator, &fixed_arg);
+      /* Divide by four, FPU_REG compatible, etc */
+      exponent = 3*exponent;
+      /* The minimum exponent difference is 3 */
+      shr_Xsig(&accumulator, exp2 - exponent);
+      negate_Xsig(&accumulator);
+      XSIG_LL(accumulator) += fixed_arg;
+      /* The basic computation is complete. Now fix the answer to
+         compensate for the error due to the approximation used for
+         pi/2
+         */
+      /* This has an exponent of -65 */
+      XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
+      fix_up.lsw = 0;
+      /* The fix-up needs to be improved for larger args */
+      if ( argSqrd.msw & 0xffc00000 )
+        {
+          /* Get about 32 bit precision in these: */
+          fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
+          fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
+        }
+      exp2 += norm_Xsig(&accumulator);
+      shr_Xsig(&accumulator, 1); /* Prevent overflow */
+      exp2++;
+      shr_Xsig(&fix_up, 65 + exp2);
+      add_Xsig_Xsig(&accumulator, &fix_up);
+      echange = round_Xsig(&accumulator);
+      setexponentpos(&result, exp2 + echange);
+      significand(&result) = XSIG_LL(accumulator);
+    }
+  FPU_copy_to_reg0(&result, TAG_Valid);
+#ifdef PARANOID
+  if ( (exponent(&result) >= 0)
+      && (significand(&result) > 0x8000000000000000LL) )
+    {
+      EXCEPTION(EX_INTERNAL|0x151);
+    }
+#endif /* PARANOID */
+}
author	Thomas Gleixner <tglx@linutronix.de>	2007-10-11 05:16:31 -0400
committer	Thomas Gleixner <tglx@linutronix.de>	2007-10-11 05:16:31 -0400
commit	da957e111bb0c189a4a3bf8a00caaecb59ed94ca (patch)
tree	6916075fdd3e28869dcd3dfa2cf160a74d1cb02e /arch/x86/math-emu/poly_sin.c
parent	2ec1df4130c60d1eb49dc0fa0ed15858fede6b05 (diff)

diff --git a/arch/x86/math-emu/poly_sin.c b/arch/x86/math-emu/poly_sin.c new file mode 100644 index 000000000000..a36313fb06f1 --- /dev/null +++ b/arch/x86/math-emu/poly_sin.c
@@ -0,0 +1,397 @@
	1	/*---------------------------------------------------------------------------+
	2	\| poly_sin.c \|
	3	\| \|
	4	\| Computation of an approximation of the sin function and the cosine \|
	5	\| function by a polynomial. \|
	6	\| \|
	7	\| Copyright (C) 1992,1993,1994,1997,1999 \|
	8	\| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia \|
	9	\| E-mail billm@melbpc.org.au \|
	10	\| \|
	11	\| \|
	12	+---------------------------------------------------------------------------*/
	13
	14
	15	#include "exception.h"
	16	#include "reg_constant.h"
	17	#include "fpu_emu.h"
	18	#include "fpu_system.h"
	19	#include "control_w.h"
	20	#include "poly.h"
	21
	22
	23	#define N_COEFF_P 4
	24	#define N_COEFF_N 4
	25
	26	static const unsigned long long pos_terms_l[N_COEFF_P] =
	27	{
	28	0xaaaaaaaaaaaaaaabLL,
	29	0x00d00d00d00cf906LL,
	30	0x000006b99159a8bbLL,
	31	0x000000000d7392e6LL
	32	};
	33
	34	static const unsigned long long neg_terms_l[N_COEFF_N] =
	35	{
	36	0x2222222222222167LL,
	37	0x0002e3bc74aab624LL,
	38	0x0000000b09229062LL,
	39	0x00000000000c7973LL
	40	};
	41
	42
	43
	44	#define N_COEFF_PH 4
	45	#define N_COEFF_NH 4
	46	static const unsigned long long pos_terms_h[N_COEFF_PH] =
	47	{
	48	0x0000000000000000LL,
	49	0x05b05b05b05b0406LL,
	50	0x000049f93edd91a9LL,
	51	0x00000000c9c9ed62LL
	52	};
	53
	54	static const unsigned long long neg_terms_h[N_COEFF_NH] =
	55	{
	56	0xaaaaaaaaaaaaaa98LL,
	57	0x001a01a01a019064LL,
	58	0x0000008f76c68a77LL,
	59	0x0000000000d58f5eLL
	60	};
	61
	62
	63	/*--- poly_sine() -----------------------------------------------------------+
	64	\| \|
	65	+---------------------------------------------------------------------------*/
	66	void poly_sine(FPU_REG *st0_ptr)
	67	{
	68	int exponent, echange;
	69	Xsig accumulator, argSqrd, argTo4;
	70	unsigned long fix_up, adj;
	71	unsigned long long fixed_arg;
	72	FPU_REG result;
	73
	74	exponent = exponent(st0_ptr);
	75
	76	accumulator.lsw = accumulator.midw = accumulator.msw = 0;
	77
	78	/* Split into two ranges, for arguments below and above 1.0 */
	79	/* The boundary between upper and lower is approx 0.88309101259 */
	80	if ( (exponent < -1) \|\| ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa)) )
	81	{
	82	/* The argument is <= 0.88309101259 */
	83
	84	argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl; argSqrd.lsw = 0;
	85	mul64_Xsig(&argSqrd, &significand(st0_ptr));
	86	shr_Xsig(&argSqrd, 2*(-1-exponent));
	87	argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
	88	argTo4.lsw = argSqrd.lsw;
	89	mul_Xsig_Xsig(&argTo4, &argTo4);
	90
	91	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
	92	N_COEFF_N-1);
	93	mul_Xsig_Xsig(&accumulator, &argSqrd);
	94	negate_Xsig(&accumulator);
	95
	96	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
	97	N_COEFF_P-1);
	98
	99	shr_Xsig(&accumulator, 2); /* Divide by four */
	100	accumulator.msw \|= 0x80000000; /* Add 1.0 */
	101
	102	mul64_Xsig(&accumulator, &significand(st0_ptr));
	103	mul64_Xsig(&accumulator, &significand(st0_ptr));
	104	mul64_Xsig(&accumulator, &significand(st0_ptr));
	105
	106	/* Divide by four, FPU_REG compatible, etc */
	107	exponent = 3*exponent;
	108
	109	/* The minimum exponent difference is 3 */
	110	shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
	111
	112	negate_Xsig(&accumulator);
	113	XSIG_LL(accumulator) += significand(st0_ptr);
	114
	115	echange = round_Xsig(&accumulator);
	116
	117	setexponentpos(&result, exponent(st0_ptr) + echange);
	118	}
	119	else
	120	{
	121	/* The argument is > 0.88309101259 */
	122	/* We use sin(st(0)) = cos(pi/2-st(0)) */
	123
	124	fixed_arg = significand(st0_ptr);
	125
	126	if ( exponent == 0 )
	127	{
	128	/* The argument is >= 1.0 */
	129
	130	/* Put the binary point at the left. */
	131	fixed_arg <<= 1;
	132	}
	133	/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
	134	fixed_arg = 0x921fb54442d18469LL - fixed_arg;
	135	/* There is a special case which arises due to rounding, to fix here. */
	136	if ( fixed_arg == 0xffffffffffffffffLL )
	137	fixed_arg = 0;
	138
	139	XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0;
	140	mul64_Xsig(&argSqrd, &fixed_arg);
	141
	142	XSIG_LL(argTo4) = XSIG_LL(argSqrd); argTo4.lsw = argSqrd.lsw;
	143	mul_Xsig_Xsig(&argTo4, &argTo4);
	144
	145	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
	146	N_COEFF_NH-1);
	147	mul_Xsig_Xsig(&accumulator, &argSqrd);
	148	negate_Xsig(&accumulator);
	149
	150	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
	151	N_COEFF_PH-1);
	152	negate_Xsig(&accumulator);
	153
	154	mul64_Xsig(&accumulator, &fixed_arg);
	155	mul64_Xsig(&accumulator, &fixed_arg);
	156
	157	shr_Xsig(&accumulator, 3);
	158	negate_Xsig(&accumulator);
	159
	160	add_Xsig_Xsig(&accumulator, &argSqrd);
	161
	162	shr_Xsig(&accumulator, 1);
	163
	164	accumulator.lsw \|= 1; /* A zero accumulator here would cause problems */
	165	negate_Xsig(&accumulator);
	166
	167	/* The basic computation is complete. Now fix the answer to
	168	compensate for the error due to the approximation used for
	169	pi/2
	170	*/
	171
	172	/* This has an exponent of -65 */
	173	fix_up = 0x898cc517;
	174	/* The fix-up needs to be improved for larger args */
	175	if ( argSqrd.msw & 0xffc00000 )
	176	{
	177	/* Get about 32 bit precision in these: */
	178	fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
	179	}
	180	fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
	181
	182	adj = accumulator.lsw; /* temp save */
	183	accumulator.lsw -= fix_up;
	184	if ( accumulator.lsw > adj )
	185	XSIG_LL(accumulator) --;
	186
	187	echange = round_Xsig(&accumulator);
	188
	189	setexponentpos(&result, echange - 1);
	190	}
	191
	192	significand(&result) = XSIG_LL(accumulator);
	193	setsign(&result, getsign(st0_ptr));
	194	FPU_copy_to_reg0(&result, TAG_Valid);
	195
	196	#ifdef PARANOID
	197	if ( (exponent(&result) >= 0)
	198	&& (significand(&result) > 0x8000000000000000LL) )
	199	{
	200	EXCEPTION(EX_INTERNAL\|0x150);
	201	}
	202	#endif /* PARANOID */
	203
	204	}
	205
	206
	207
	208	/*--- poly_cos() ------------------------------------------------------------+
	209	\| \|
	210	+---------------------------------------------------------------------------*/
	211	void poly_cos(FPU_REG *st0_ptr)
	212	{
	213	FPU_REG result;
	214	long int exponent, exp2, echange;
	215	Xsig accumulator, argSqrd, fix_up, argTo4;
	216	unsigned long long fixed_arg;
	217
	218	#ifdef PARANOID
	219	if ( (exponent(st0_ptr) > 0)
	220	\|\| ((exponent(st0_ptr) == 0)
	221	&& (significand(st0_ptr) > 0xc90fdaa22168c234LL)) )
	222	{
	223	EXCEPTION(EX_Invalid);
	224	FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
	225	return;
	226	}
	227	#endif /* PARANOID */
	228
	229	exponent = exponent(st0_ptr);
	230
	231	accumulator.lsw = accumulator.midw = accumulator.msw = 0;
	232
	233	if ( (exponent < -1) \|\| ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54)) )
	234	{
	235	/* arg is < 0.687705 */
	236
	237	argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl;
	238	argSqrd.lsw = 0;
	239	mul64_Xsig(&argSqrd, &significand(st0_ptr));
	240
	241	if ( exponent < -1 )
	242	{
	243	/* shift the argument right by the required places */
	244	shr_Xsig(&argSqrd, 2*(-1-exponent));
	245	}
	246
	247	argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
	248	argTo4.lsw = argSqrd.lsw;
	249	mul_Xsig_Xsig(&argTo4, &argTo4);
	250
	251	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
	252	N_COEFF_NH-1);
	253	mul_Xsig_Xsig(&accumulator, &argSqrd);
	254	negate_Xsig(&accumulator);
	255
	256	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
	257	N_COEFF_PH-1);
	258	negate_Xsig(&accumulator);
	259
	260	mul64_Xsig(&accumulator, &significand(st0_ptr));
	261	mul64_Xsig(&accumulator, &significand(st0_ptr));
	262	shr_Xsig(&accumulator, -2*(1+exponent));
	263
	264	shr_Xsig(&accumulator, 3);
	265	negate_Xsig(&accumulator);
	266
	267	add_Xsig_Xsig(&accumulator, &argSqrd);
	268
	269	shr_Xsig(&accumulator, 1);
	270
	271	/* It doesn't matter if accumulator is all zero here, the
	272	following code will work ok */
	273	negate_Xsig(&accumulator);
	274
	275	if ( accumulator.lsw & 0x80000000 )
	276	XSIG_LL(accumulator) ++;
	277	if ( accumulator.msw == 0 )
	278	{
	279	/* The result is 1.0 */
	280	FPU_copy_to_reg0(&CONST_1, TAG_Valid);
	281	return;
	282	}
	283	else
	284	{
	285	significand(&result) = XSIG_LL(accumulator);
	286
	287	/* will be a valid positive nr with expon = -1 */
	288	setexponentpos(&result, -1);
	289	}
	290	}
	291	else
	292	{
	293	fixed_arg = significand(st0_ptr);
	294
	295	if ( exponent == 0 )
	296	{
	297	/* The argument is >= 1.0 */
	298
	299	/* Put the binary point at the left. */
	300	fixed_arg <<= 1;
	301	}
	302	/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
	303	fixed_arg = 0x921fb54442d18469LL - fixed_arg;
	304	/* There is a special case which arises due to rounding, to fix here. */
	305	if ( fixed_arg == 0xffffffffffffffffLL )
	306	fixed_arg = 0;
	307
	308	exponent = -1;
	309	exp2 = -1;
	310
	311	/* A shift is needed here only for a narrow range of arguments,
	312	i.e. for fixed_arg approx 2^-32, but we pick up more... */
	313	if ( !(LL_MSW(fixed_arg) & 0xffff0000) )
	314	{
	315	fixed_arg <<= 16;
	316	exponent -= 16;
	317	exp2 -= 16;
	318	}
	319
	320	XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0;
	321	mul64_Xsig(&argSqrd, &fixed_arg);
	322
	323	if ( exponent < -1 )
	324	{
	325	/* shift the argument right by the required places */
	326	shr_Xsig(&argSqrd, 2*(-1-exponent));
	327	}
	328
	329	argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
	330	argTo4.lsw = argSqrd.lsw;
	331	mul_Xsig_Xsig(&argTo4, &argTo4);
	332
	333	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
	334	N_COEFF_N-1);
	335	mul_Xsig_Xsig(&accumulator, &argSqrd);
	336	negate_Xsig(&accumulator);
	337
	338	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
	339	N_COEFF_P-1);
	340
	341	shr_Xsig(&accumulator, 2); /* Divide by four */
	342	accumulator.msw \|= 0x80000000; /* Add 1.0 */
	343
	344	mul64_Xsig(&accumulator, &fixed_arg);
	345	mul64_Xsig(&accumulator, &fixed_arg);
	346	mul64_Xsig(&accumulator, &fixed_arg);
	347
	348	/* Divide by four, FPU_REG compatible, etc */
	349	exponent = 3*exponent;
	350
	351	/* The minimum exponent difference is 3 */
	352	shr_Xsig(&accumulator, exp2 - exponent);
	353
	354	negate_Xsig(&accumulator);
	355	XSIG_LL(accumulator) += fixed_arg;
	356
	357	/* The basic computation is complete. Now fix the answer to
	358	compensate for the error due to the approximation used for
	359	pi/2
	360	*/
	361
	362	/* This has an exponent of -65 */
	363	XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
	364	fix_up.lsw = 0;
	365
	366	/* The fix-up needs to be improved for larger args */
	367	if ( argSqrd.msw & 0xffc00000 )
	368	{
	369	/* Get about 32 bit precision in these: */
	370	fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
	371	fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
	372	}
	373
	374	exp2 += norm_Xsig(&accumulator);
	375	shr_Xsig(&accumulator, 1); /* Prevent overflow */
	376	exp2++;
	377	shr_Xsig(&fix_up, 65 + exp2);
	378
	379	add_Xsig_Xsig(&accumulator, &fix_up);
	380
	381	echange = round_Xsig(&accumulator);
	382
	383	setexponentpos(&result, exp2 + echange);
	384	significand(&result) = XSIG_LL(accumulator);
	385	}
	386
	387	FPU_copy_to_reg0(&result, TAG_Valid);
	388
	389	#ifdef PARANOID
	390	if ( (exponent(&result) >= 0)
	391	&& (significand(&result) > 0x8000000000000000LL) )
	392	{
	393	EXCEPTION(EX_INTERNAL\|0x151);
	394	}
	395	#endif /* PARANOID */
	396
	397	}