1 files changed, 297 insertions, 0 deletions
diff --git a/arch/powerpc/math-emu/op-4.h b/arch/powerpc/math-emu/op-4.h
new file mode 100644
index 000000000000..fcdd6d064c54
--- /dev/null
+++ b/arch/powerpc/math-emu/op-4.h
@@ -0,0 +1,297 @@
+/*
+ * Basic four-word fraction declaration and manipulation.
+ *
+ * When adding quadword support for 32 bit machines, we need
+ * to be a little careful as double multiply uses some of these
+ * macros: (in op-2.h)
+ * _FP_MUL_MEAT_2_wide() uses _FP_FRAC_DECL_4, _FP_FRAC_WORD_4,
+ * _FP_FRAC_ADD_4, _FP_FRAC_SRS_4
+ * _FP_MUL_MEAT_2_gmp() uses _FP_FRAC_SRS_4 (and should use
+ * _FP_FRAC_DECL_4: it appears to be broken and is not used
+ * anywhere anyway. )
+ *
+ * I've now fixed all the macros that were here from the sparc64 code.
+ * [*none* of the shift macros were correct!] -- PMM 02/1998
+ *
+ * The only quadword stuff that remains to be coded is:
+ * 1) the conversion to/from ints, which requires
+ * that we check (in op-common.h) that the following do the right thing
+ * for quadwords: _FP_TO_INT(Q,4,r,X,rsz,rsg), _FP_FROM_INT(Q,4,X,r,rs,rt)
+ * 2) multiply, divide and sqrt, which require:
+ * _FP_MUL_MEAT_4_*(R,X,Y), _FP_DIV_MEAT_4_*(R,X,Y), _FP_SQRT_MEAT_4(R,S,T,X,q),
+ * This also needs _FP_MUL_MEAT_Q and _FP_DIV_MEAT_Q to be defined to
+ * some suitable _FP_MUL_MEAT_4_* macros in sfp-machine.h.
+ * [we're free to choose whatever FP_MUL_MEAT_4_* macros we need for
+ * these; they are used nowhere else. ]
+ */
+#define _FP_FRAC_DECL_4(X)      _FP_W_TYPE X##_f[4]
+#define _FP_FRAC_COPY_4(D,S)                    \
+  (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1],    \
+   D##_f[2] = S##_f[2], D##_f[3] = S##_f[3])
+/* The _FP_FRAC_SET_n(X,I) macro is intended for use with another
+ * macro such as _FP_ZEROFRAC_n which returns n comma separated values.
+ * The result is that we get an expansion of __FP_FRAC_SET_n(X,I0,I1,I2,I3)
+ * which just assigns the In values to the array X##_f[].
+ * This is why the number of parameters doesn't appear to match
+ * at first glance...      -- PMM
+ */
+#define _FP_FRAC_SET_4(X,I)     __FP_FRAC_SET_4(X, I)
+#define _FP_FRAC_HIGH_4(X)      (X##_f[3])
+#define _FP_FRAC_LOW_4(X)       (X##_f[0])
+#define _FP_FRAC_WORD_4(X,w)    (X##_f[w])
+#define _FP_FRAC_SLL_4(X,N)                                             \
+  do {                                                                  \
+    _FP_I_TYPE _up, _down, _skip, _i;                                   \
+    _skip = (N) / _FP_W_TYPE_SIZE;                                      \
+    _up = (N) % _FP_W_TYPE_SIZE;                                        \
+    _down = _FP_W_TYPE_SIZE - _up;                                      \
+    for (_i = 3; _i > _skip; --_i)                                      \
+      X##_f[_i] = X##_f[_i-_skip] << _up | X##_f[_i-_skip-1] >> _down;  \
+/* bugfixed: was X##_f[_i] <<= _up;  -- PMM 02/1998 */                  \
+    X##_f[_i] = X##_f[0] << _up;                                        \
+    for (--_i; _i >= 0; --_i)                                           \
+      X##_f[_i] = 0;                                                    \
+  } while (0)
+/* This one was broken too */
+#define _FP_FRAC_SRL_4(X,N)                                             \
+  do {                                                                  \
+    _FP_I_TYPE _up, _down, _skip, _i;                                   \
+    _skip = (N) / _FP_W_TYPE_SIZE;                                      \
+    _down = (N) % _FP_W_TYPE_SIZE;                                      \
+    _up = _FP_W_TYPE_SIZE - _down;                                      \
+    for (_i = 0; _i < 3-_skip; ++_i)                                    \
+      X##_f[_i] = X##_f[_i+_skip] >> _down | X##_f[_i+_skip+1] << _up;  \
+    X##_f[_i] = X##_f[3] >> _down;                                      \
+    for (++_i; _i < 4; ++_i)                                            \
+      X##_f[_i] = 0;                                                    \
+  } while (0)
+/* Right shift with sticky-lsb.
+ * What this actually means is that we do a standard right-shift,
+ * but that if any of the bits that fall off the right hand side
+ * were one then we always set the LSbit.
+ */
+#define _FP_FRAC_SRS_4(X,N,size)                                        \
+  do {                                                                  \
+    _FP_I_TYPE _up, _down, _skip, _i;                                   \
+    _FP_W_TYPE _s;                                                      \
+    _skip = (N) / _FP_W_TYPE_SIZE;                                      \
+    _down = (N) % _FP_W_TYPE_SIZE;                                      \
+    _up = _FP_W_TYPE_SIZE - _down;                                      \
+    for (_s = _i = 0; _i < _skip; ++_i)                                 \
+      _s |= X##_f[_i];                                                  \
+    _s |= X##_f[_i] << _up;                                             \
+/* s is now != 0 if we want to set the LSbit */                         \
+    for (_i = 0; _i < 3-_skip; ++_i)                                    \
+      X##_f[_i] = X##_f[_i+_skip] >> _down | X##_f[_i+_skip+1] << _up;  \
+    X##_f[_i] = X##_f[3] >> _down;                                      \
+    for (++_i; _i < 4; ++_i)                                            \
+      X##_f[_i] = 0;                                                    \
+    /* don't fix the LSB until the very end when we're sure f[0] is stable */ \
+    X##_f[0] |= (_s != 0);                                              \
+  } while (0)
+#define _FP_FRAC_ADD_4(R,X,Y)                                           \
+  __FP_FRAC_ADD_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],               \
+                  X##_f[3], X##_f[2], X##_f[1], X##_f[0],               \
+                  Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
+#define _FP_FRAC_SUB_4(R,X,Y)                                           \
+  __FP_FRAC_SUB_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],               \
+                  X##_f[3], X##_f[2], X##_f[1], X##_f[0],               \
+                  Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
+#define _FP_FRAC_ADDI_4(X,I)                                            \
+  __FP_FRAC_ADDI_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], I)
+#define _FP_ZEROFRAC_4  0,0,0,0
+#define _FP_MINFRAC_4   0,0,0,1
+#define _FP_FRAC_ZEROP_4(X)     ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0)
+#define _FP_FRAC_NEGP_4(X)      ((_FP_WS_TYPE)X##_f[3] < 0)
+#define _FP_FRAC_OVERP_4(fs,X)  (X##_f[0] & _FP_OVERFLOW_##fs)
+#define _FP_FRAC_EQ_4(X,Y)                              \
+ (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1]          \
+  && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3])
+#define _FP_FRAC_GT_4(X,Y)                              \
+ (X##_f[3] > Y##_f[3] ||                                \
+  (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||      \
+   (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||     \
+    (X##_f[1] == Y##_f[1] && X##_f[0] > Y##_f[0])       \
+   ))                                                   \
+  ))                                                    \
+ )
+#define _FP_FRAC_GE_4(X,Y)                              \
+ (X##_f[3] > Y##_f[3] ||                                \
+  (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||      \
+   (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||     \
+    (X##_f[1] == Y##_f[1] && X##_f[0] >= Y##_f[0])      \
+   ))                                                   \
+  ))                                                    \
+ )
+#define _FP_FRAC_CLZ_4(R,X)             \
+  do {                                  \
+    if (X##_f[3])                       \
+    {                                   \
+        __FP_CLZ(R,X##_f[3]);           \
+    }                                   \
+    else if (X##_f[2])                  \
+    {                                   \
+        __FP_CLZ(R,X##_f[2]);           \
+        R += _FP_W_TYPE_SIZE;           \
+    }                                   \
+    else if (X##_f[1])                  \
+    {                                   \
+        __FP_CLZ(R,X##_f[2]);           \
+        R += _FP_W_TYPE_SIZE*2;         \
+    }                                   \
+    else                                \
+    {                                   \
+        __FP_CLZ(R,X##_f[0]);           \
+        R += _FP_W_TYPE_SIZE*3;         \
+    }                                   \
+  } while(0)
+#define _FP_UNPACK_RAW_4(fs, X, val)                            \
+  do {                                                          \
+    union _FP_UNION_##fs _flo; _flo.flt = (val);                \
+    X##_f[0] = _flo.bits.frac0;                                 \
+    X##_f[1] = _flo.bits.frac1;                                 \
+    X##_f[2] = _flo.bits.frac2;                                 \
+    X##_f[3] = _flo.bits.frac3;                                 \
+    X##_e  = _flo.bits.exp;                                     \
+    X##_s  = _flo.bits.sign;                                    \
+  } while (0)
+#define _FP_PACK_RAW_4(fs, val, X)                              \
+  do {                                                          \
+    union _FP_UNION_##fs _flo;                                  \
+    _flo.bits.frac0 = X##_f[0];                                 \
+    _flo.bits.frac1 = X##_f[1];                                 \
+    _flo.bits.frac2 = X##_f[2];                                 \
+    _flo.bits.frac3 = X##_f[3];                                 \
+    _flo.bits.exp   = X##_e;                                    \
+    _flo.bits.sign  = X##_s;                                    \
+    (val) = _flo.flt;                                           \
+  } while (0)
+/*
+ * Internals
+ */
+#define __FP_FRAC_SET_4(X,I3,I2,I1,I0)                                  \
+  (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
+#ifndef __FP_FRAC_ADD_4
+#define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)            \
+  (r0 = x0 + y0,                                                        \
+   r1 = x1 + y1 + (r0 < x0),                                            \
+   r2 = x2 + y2 + (r1 < x1),                                            \
+   r3 = x3 + y3 + (r2 < x2))
+#endif
+#ifndef __FP_FRAC_SUB_4
+#define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)            \
+  (r0 = x0 - y0,                                                        \
+   r1 = x1 - y1 - (r0 > x0),                                            \
+   r2 = x2 - y2 - (r1 > x1),                                            \
+   r3 = x3 - y3 - (r2 > x2))
+#endif
+#ifndef __FP_FRAC_ADDI_4
+/* I always wanted to be a lisp programmer :-> */
+#define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i)                                 \
+  (x3 += ((x2 += ((x1 += ((x0 += i) < x0)) < x1) < x2)))
+#endif
+/* Convert FP values between word sizes. This appears to be more
+ * complicated than I'd have expected it to be, so these might be
+ * wrong... These macros are in any case somewhat bogus because they
+ * use information about what various FRAC_n variables look like
+ * internally [eg, that 2 word vars are X_f0 and x_f1]. But so do
+ * the ones in op-2.h and op-1.h.
+ */
+#define _FP_FRAC_CONV_1_4(dfs, sfs, D, S)                               \
+   do {                                                                 \
+     _FP_FRAC_SRS_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs),     \
+                        _FP_WFRACBITS_##sfs);                           \
+     D##_f = S##_f[0];                                                   \
+  } while (0)
+#define _FP_FRAC_CONV_2_4(dfs, sfs, D, S)                               \
+   do {                                                                 \
+     _FP_FRAC_SRS_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs),     \
+                        _FP_WFRACBITS_##sfs);                           \
+     D##_f0 = S##_f[0];                                                  \
+     D##_f1 = S##_f[1];                                                  \
+  } while (0)
+/* Assembly/disassembly for converting to/from integral types.
+ * No shifting or overflow handled here.
+ */
+/* Put the FP value X into r, which is an integer of size rsize. */
+#define _FP_FRAC_ASSEMBLE_4(r, X, rsize)                                \
+  do {                                                                  \
+    if (rsize <= _FP_W_TYPE_SIZE)                                       \
+      r = X##_f[0];                                                     \
+    else if (rsize <= 2*_FP_W_TYPE_SIZE)                                \
+    {                                                                   \
+      r = X##_f[1];                                                     \
+      r <<= _FP_W_TYPE_SIZE;                                            \
+      r += X##_f[0];                                                    \
+    }                                                                   \
+    else                                                                \
+    {                                                                   \
+      /* I'm feeling lazy so we deal with int == 3words (implausible)*/ \
+      /* and int == 4words as a single case.                         */ \
+      r = X##_f[3];                                                     \
+      r <<= _FP_W_TYPE_SIZE;                                            \
+      r += X##_f[2];                                                    \
+      r <<= _FP_W_TYPE_SIZE;                                            \
+      r += X##_f[1];                                                    \
+      r <<= _FP_W_TYPE_SIZE;                                            \
+      r += X##_f[0];                                                    \
+    }                                                                   \
+  } while (0)
+/* "No disassemble Number Five!" */
+/* move an integer of size rsize into X's fractional part. We rely on
+ * the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid
+ * having to mask the values we store into it.
+ */
+#define _FP_FRAC_DISASSEMBLE_4(X, r, rsize)                             \
+  do {                                                                  \
+    X##_f[0] = r;                                                       \
+    X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);   \
+    X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \
+    X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \
+  } while (0)
+#define _FP_FRAC_CONV_4_1(dfs, sfs, D, S)                               \
+   do {                                                                 \
+     D##_f[0] = S##_f;                                                  \
+     D##_f[1] = D##_f[2] = D##_f[3] = 0;                                \
+     _FP_FRAC_SLL_4(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs));    \
+   } while (0)
+#define _FP_FRAC_CONV_4_2(dfs, sfs, D, S)                               \
+   do {                                                                 \
+     D##_f[0] = S##_f0;                                                 \
+     D##_f[1] = S##_f1;                                                 \
+     D##_f[2] = D##_f[3] = 0;                                           \
+     _FP_FRAC_SLL_4(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs));    \
+   } while (0)
+/* FIXME! This has to be written */
+#define _FP_SQRT_MEAT_4(R, S, T, X, q)

diff --git a/arch/powerpc/math-emu/op-4.h b/arch/powerpc/math-emu/op-4.h new file mode 100644 index 000000000000..fcdd6d064c54 --- /dev/null +++ b/arch/powerpc/math-emu/op-4.h
@@ -0,0 +1,297 @@
	1	/*
	2	* Basic four-word fraction declaration and manipulation.
	3	*
	4	* When adding quadword support for 32 bit machines, we need
	5	* to be a little careful as double multiply uses some of these
	6	* macros: (in op-2.h)
	7	* _FP_MUL_MEAT_2_wide() uses _FP_FRAC_DECL_4, _FP_FRAC_WORD_4,
	8	* _FP_FRAC_ADD_4, _FP_FRAC_SRS_4
	9	* _FP_MUL_MEAT_2_gmp() uses _FP_FRAC_SRS_4 (and should use
	10	* _FP_FRAC_DECL_4: it appears to be broken and is not used
	11	* anywhere anyway. )
	12	*
	13	* I've now fixed all the macros that were here from the sparc64 code.
	14	* [none of the shift macros were correct!] -- PMM 02/1998
	15	*
	16	* The only quadword stuff that remains to be coded is:
	17	* 1) the conversion to/from ints, which requires
	18	* that we check (in op-common.h) that the following do the right thing
	19	* for quadwords: _FP_TO_INT(Q,4,r,X,rsz,rsg), _FP_FROM_INT(Q,4,X,r,rs,rt)
	20	* 2) multiply, divide and sqrt, which require:
	21	* _FP_MUL_MEAT_4_(R,X,Y), _FP_DIV_MEAT_4_(R,X,Y), _FP_SQRT_MEAT_4(R,S,T,X,q),
	22	* This also needs _FP_MUL_MEAT_Q and _FP_DIV_MEAT_Q to be defined to
	23	* some suitable _FP_MUL_MEAT_4_* macros in sfp-machine.h.
	24	* [we're free to choose whatever FP_MUL_MEAT_4_* macros we need for
	25	* these; they are used nowhere else. ]
	26	*/
	27
	28	#define _FP_FRAC_DECL_4(X) _FP_W_TYPE X##_f[4]
	29	#define _FP_FRAC_COPY_4(D,S) \
	30	(D##_f[0] = S##_f[0], D##_f[1] = S##_f[1], \
	31	D##_f[2] = S##_f[2], D##_f[3] = S##_f[3])
	32	/* The _FP_FRAC_SET_n(X,I) macro is intended for use with another
	33	* macro such as _FP_ZEROFRAC_n which returns n comma separated values.
	34	* The result is that we get an expansion of __FP_FRAC_SET_n(X,I0,I1,I2,I3)
	35	* which just assigns the In values to the array X##_f[].
	36	* This is why the number of parameters doesn't appear to match
	37	* at first glance... -- PMM
	38	*/
	39	#define _FP_FRAC_SET_4(X,I) __FP_FRAC_SET_4(X, I)
	40	#define _FP_FRAC_HIGH_4(X) (X##_f[3])
	41	#define _FP_FRAC_LOW_4(X) (X##_f[0])
	42	#define _FP_FRAC_WORD_4(X,w) (X##_f[w])
	43
	44	#define _FP_FRAC_SLL_4(X,N) \
	45	do { \
	46	_FP_I_TYPE _up, _down, _skip, _i; \
	47	_skip = (N) / _FP_W_TYPE_SIZE; \
	48	_up = (N) % _FP_W_TYPE_SIZE; \
	49	_down = _FP_W_TYPE_SIZE - _up; \
	50	for (_i = 3; _i > _skip; --_i) \
	51	X##_f[_i] = X##_f[_i-_skip] << _up \| X##_f[_i-_skip-1] >> _down; \
	52	/* bugfixed: was X##_f[_i] <<= _up; -- PMM 02/1998 */ \
	53	X##_f[_i] = X##_f[0] << _up; \
	54	for (--_i; _i >= 0; --_i) \
	55	X##_f[_i] = 0; \
	56	} while (0)
	57
	58	/* This one was broken too */
	59	#define _FP_FRAC_SRL_4(X,N) \
	60	do { \
	61	_FP_I_TYPE _up, _down, _skip, _i; \
	62	_skip = (N) / _FP_W_TYPE_SIZE; \
	63	_down = (N) % _FP_W_TYPE_SIZE; \
	64	_up = _FP_W_TYPE_SIZE - _down; \
	65	for (_i = 0; _i < 3-_skip; ++_i) \
	66	X##_f[_i] = X##_f[_i+_skip] >> _down \| X##_f[_i+_skip+1] << _up; \
	67	X##_f[_i] = X##_f[3] >> _down; \
	68	for (++_i; _i < 4; ++_i) \
	69	X##_f[_i] = 0; \
	70	} while (0)
	71
	72
	73	/* Right shift with sticky-lsb.
	74	* What this actually means is that we do a standard right-shift,
	75	* but that if any of the bits that fall off the right hand side
	76	* were one then we always set the LSbit.
	77	*/
	78	#define _FP_FRAC_SRS_4(X,N,size) \
	79	do { \
	80	_FP_I_TYPE _up, _down, _skip, _i; \
	81	_FP_W_TYPE _s; \
	82	_skip = (N) / _FP_W_TYPE_SIZE; \
	83	_down = (N) % _FP_W_TYPE_SIZE; \
	84	_up = _FP_W_TYPE_SIZE - _down; \
	85	for (_s = _i = 0; _i < _skip; ++_i) \
	86	_s \|= X##_f[_i]; \
	87	_s \|= X##_f[_i] << _up; \
	88	/* s is now != 0 if we want to set the LSbit */ \
	89	for (_i = 0; _i < 3-_skip; ++_i) \
	90	X##_f[_i] = X##_f[_i+_skip] >> _down \| X##_f[_i+_skip+1] << _up; \
	91	X##_f[_i] = X##_f[3] >> _down; \
	92	for (++_i; _i < 4; ++_i) \
	93	X##_f[_i] = 0; \
	94	/* don't fix the LSB until the very end when we're sure f[0] is stable */ \
	95	X##_f[0] \|= (_s != 0); \
	96	} while (0)
	97
	98	#define _FP_FRAC_ADD_4(R,X,Y) \
	99	__FP_FRAC_ADD_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0], \
	100	X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
	101	Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
	102
	103	#define _FP_FRAC_SUB_4(R,X,Y) \
	104	__FP_FRAC_SUB_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0], \
	105	X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
	106	Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
	107
	108	#define _FP_FRAC_ADDI_4(X,I) \
	109	__FP_FRAC_ADDI_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], I)
	110
	111	#define _FP_ZEROFRAC_4 0,0,0,0
	112	#define _FP_MINFRAC_4 0,0,0,1
	113
	114	#define _FP_FRAC_ZEROP_4(X) ((X##_f[0] \| X##_f[1] \| X##_f[2] \| X##_f[3]) == 0)
	115	#define _FP_FRAC_NEGP_4(X) ((_FP_WS_TYPE)X##_f[3] < 0)
	116	#define _FP_FRAC_OVERP_4(fs,X) (X##_f[0] & _FP_OVERFLOW_##fs)
	117
	118	#define _FP_FRAC_EQ_4(X,Y) \
	119	(X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1] \
	120	&& X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3])
	121
	122	#define _FP_FRAC_GT_4(X,Y) \
	123	(X##_f[3] > Y##_f[3] \|\| \
	124	(X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] \|\| \
	125	(X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] \|\| \
	126	(X##_f[1] == Y##_f[1] && X##_f[0] > Y##_f[0]) \
	127	)) \
	128	)) \
	129	)
	130
	131	#define _FP_FRAC_GE_4(X,Y) \
	132	(X##_f[3] > Y##_f[3] \|\| \
	133	(X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] \|\| \
	134	(X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] \|\| \
	135	(X##_f[1] == Y##_f[1] && X##_f[0] >= Y##_f[0]) \
	136	)) \
	137	)) \
	138	)
	139
	140
	141	#define _FP_FRAC_CLZ_4(R,X) \
	142	do { \
	143	if (X##_f[3]) \
	144	{ \
	145	__FP_CLZ(R,X##_f[3]); \
	146	} \
	147	else if (X##_f[2]) \
	148	{ \
	149	__FP_CLZ(R,X##_f[2]); \
	150	R += _FP_W_TYPE_SIZE; \
	151	} \
	152	else if (X##_f[1]) \
	153	{ \
	154	__FP_CLZ(R,X##_f[2]); \
	155	R += _FP_W_TYPE_SIZE*2; \
	156	} \
	157	else \
	158	{ \
	159	__FP_CLZ(R,X##_f[0]); \
	160	R += _FP_W_TYPE_SIZE*3; \
	161	} \
	162	} while(0)
	163
	164
	165	#define _FP_UNPACK_RAW_4(fs, X, val) \
	166	do { \
	167	union _FP_UNION_##fs _flo; _flo.flt = (val); \
	168	X##_f[0] = _flo.bits.frac0; \
	169	X##_f[1] = _flo.bits.frac1; \
	170	X##_f[2] = _flo.bits.frac2; \
	171	X##_f[3] = _flo.bits.frac3; \
	172	X##_e = _flo.bits.exp; \
	173	X##_s = _flo.bits.sign; \
	174	} while (0)
	175
	176	#define _FP_PACK_RAW_4(fs, val, X) \
	177	do { \
	178	union _FP_UNION_##fs _flo; \
	179	_flo.bits.frac0 = X##_f[0]; \
	180	_flo.bits.frac1 = X##_f[1]; \
	181	_flo.bits.frac2 = X##_f[2]; \
	182	_flo.bits.frac3 = X##_f[3]; \
	183	_flo.bits.exp = X##_e; \
	184	_flo.bits.sign = X##_s; \
	185	(val) = _flo.flt; \
	186	} while (0)
	187
	188
	189	/*
	190	* Internals
	191	*/
	192
	193	#define __FP_FRAC_SET_4(X,I3,I2,I1,I0) \
	194	(X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
	195
	196	#ifndef __FP_FRAC_ADD_4
	197	#define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \
	198	(r0 = x0 + y0, \
	199	r1 = x1 + y1 + (r0 < x0), \
	200	r2 = x2 + y2 + (r1 < x1), \
	201	r3 = x3 + y3 + (r2 < x2))
	202	#endif
	203
	204	#ifndef __FP_FRAC_SUB_4
	205	#define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \
	206	(r0 = x0 - y0, \
	207	r1 = x1 - y1 - (r0 > x0), \
	208	r2 = x2 - y2 - (r1 > x1), \
	209	r3 = x3 - y3 - (r2 > x2))
	210	#endif
	211
	212	#ifndef __FP_FRAC_ADDI_4
	213	/* I always wanted to be a lisp programmer :-> */
	214	#define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i) \
	215	(x3 += ((x2 += ((x1 += ((x0 += i) < x0)) < x1) < x2)))
	216	#endif
	217
	218	/* Convert FP values between word sizes. This appears to be more
	219	* complicated than I'd have expected it to be, so these might be
	220	* wrong... These macros are in any case somewhat bogus because they
	221	* use information about what various FRAC_n variables look like
	222	* internally [eg, that 2 word vars are X_f0 and x_f1]. But so do
	223	* the ones in op-2.h and op-1.h.
	224	*/
	225	#define _FP_FRAC_CONV_1_4(dfs, sfs, D, S) \
	226	do { \
	227	_FP_FRAC_SRS_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \
	228	_FP_WFRACBITS_##sfs); \
	229	D##_f = S##_f[0]; \
	230	} while (0)
	231
	232	#define _FP_FRAC_CONV_2_4(dfs, sfs, D, S) \
	233	do { \
	234	_FP_FRAC_SRS_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \
	235	_FP_WFRACBITS_##sfs); \
	236	D##_f0 = S##_f[0]; \
	237	D##_f1 = S##_f[1]; \
	238	} while (0)
	239
	240	/* Assembly/disassembly for converting to/from integral types.
	241	* No shifting or overflow handled here.
	242	*/
	243	/* Put the FP value X into r, which is an integer of size rsize. */
	244	#define _FP_FRAC_ASSEMBLE_4(r, X, rsize) \
	245	do { \
	246	if (rsize <= _FP_W_TYPE_SIZE) \
	247	r = X##_f[0]; \
	248	else if (rsize <= 2*_FP_W_TYPE_SIZE) \
	249	{ \
	250	r = X##_f[1]; \
	251	r <<= _FP_W_TYPE_SIZE; \
	252	r += X##_f[0]; \
	253	} \
	254	else \
	255	{ \
	256	/* I'm feeling lazy so we deal with int == 3words (implausible)*/ \
	257	/* and int == 4words as a single case. */ \
	258	r = X##_f[3]; \
	259	r <<= _FP_W_TYPE_SIZE; \
	260	r += X##_f[2]; \
	261	r <<= _FP_W_TYPE_SIZE; \
	262	r += X##_f[1]; \
	263	r <<= _FP_W_TYPE_SIZE; \
	264	r += X##_f[0]; \
	265	} \
	266	} while (0)
	267
	268	/* "No disassemble Number Five!" */
	269	/* move an integer of size rsize into X's fractional part. We rely on
	270	* the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid
	271	* having to mask the values we store into it.
	272	*/
	273	#define _FP_FRAC_DISASSEMBLE_4(X, r, rsize) \
	274	do { \
	275	X##_f[0] = r; \
	276	X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \
	277	X##_f[2] = (rsize <= 2_FP_W_TYPE_SIZE ? 0 : r >> 2_FP_W_TYPE_SIZE); \
	278	X##_f[3] = (rsize <= 3_FP_W_TYPE_SIZE ? 0 : r >> 3_FP_W_TYPE_SIZE); \
	279	} while (0)
	280
	281	#define _FP_FRAC_CONV_4_1(dfs, sfs, D, S) \
	282	do { \
	283	D##_f[0] = S##_f; \
	284	D##_f[1] = D##_f[2] = D##_f[3] = 0; \
	285	_FP_FRAC_SLL_4(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \
	286	} while (0)
	287
	288	#define _FP_FRAC_CONV_4_2(dfs, sfs, D, S) \
	289	do { \
	290	D##_f[0] = S##_f0; \
	291	D##_f[1] = S##_f1; \
	292	D##_f[2] = D##_f[3] = 0; \
	293	_FP_FRAC_SLL_4(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \
	294	} while (0)
	295
	296	/* FIXME! This has to be written */
	297	#define _FP_SQRT_MEAT_4(R, S, T, X, q)