1 files changed, 245 insertions, 0 deletions
diff --git a/arch/powerpc/math-emu/op-1.h b/arch/powerpc/math-emu/op-1.h
new file mode 100644
index 000000000000..c92fa95f562e
--- /dev/null
+++ b/arch/powerpc/math-emu/op-1.h
@@ -0,0 +1,245 @@
+/*
+ * Basic one-word fraction declaration and manipulation.
+ */
+#define _FP_FRAC_DECL_1(X)      _FP_W_TYPE X##_f
+#define _FP_FRAC_COPY_1(D,S)    (D##_f = S##_f)
+#define _FP_FRAC_SET_1(X,I)     (X##_f = I)
+#define _FP_FRAC_HIGH_1(X)      (X##_f)
+#define _FP_FRAC_LOW_1(X)       (X##_f)
+#define _FP_FRAC_WORD_1(X,w)    (X##_f)
+#define _FP_FRAC_ADDI_1(X,I)    (X##_f += I)
+#define _FP_FRAC_SLL_1(X,N)                     \
+  do {                                          \
+    if (__builtin_constant_p(N) && (N) == 1)    \
+      X##_f += X##_f;                           \
+    else                                        \
+      X##_f <<= (N);                            \
+  } while (0)
+#define _FP_FRAC_SRL_1(X,N)     (X##_f >>= N)
+/* Right shift with sticky-lsb.  */
+#define _FP_FRAC_SRS_1(X,N,sz)  __FP_FRAC_SRS_1(X##_f, N, sz)
+#define __FP_FRAC_SRS_1(X,N,sz)                                         \
+   (X = (X >> (N) | (__builtin_constant_p(N) && (N) == 1                \
+                     ? X & 1 : (X << (_FP_W_TYPE_SIZE - (N))) != 0)))
+#define _FP_FRAC_ADD_1(R,X,Y)   (R##_f = X##_f + Y##_f)
+#define _FP_FRAC_SUB_1(R,X,Y)   (R##_f = X##_f - Y##_f)
+#define _FP_FRAC_CLZ_1(z, X)    __FP_CLZ(z, X##_f)
+/* Predicates */
+#define _FP_FRAC_NEGP_1(X)      ((_FP_WS_TYPE)X##_f < 0)
+#define _FP_FRAC_ZEROP_1(X)     (X##_f == 0)
+#define _FP_FRAC_OVERP_1(fs,X)  (X##_f & _FP_OVERFLOW_##fs)
+#define _FP_FRAC_EQ_1(X, Y)     (X##_f == Y##_f)
+#define _FP_FRAC_GE_1(X, Y)     (X##_f >= Y##_f)
+#define _FP_FRAC_GT_1(X, Y)     (X##_f > Y##_f)
+#define _FP_ZEROFRAC_1          0
+#define _FP_MINFRAC_1           1
+/*
+ * Unpack the raw bits of a native fp value.  Do not classify or
+ * normalize the data.
+ */
+#define _FP_UNPACK_RAW_1(fs, X, val)                            \
+  do {                                                          \
+    union _FP_UNION_##fs _flo; _flo.flt = (val);                \
+                                                                \
+    X##_f = _flo.bits.frac;                                     \
+    X##_e = _flo.bits.exp;                                      \
+    X##_s = _flo.bits.sign;                                     \
+  } while (0)
+/*
+ * Repack the raw bits of a native fp value.
+ */
+#define _FP_PACK_RAW_1(fs, val, X)                              \
+  do {                                                          \
+    union _FP_UNION_##fs _flo;                                  \
+                                                                \
+    _flo.bits.frac = X##_f;                                     \
+    _flo.bits.exp  = X##_e;                                     \
+    _flo.bits.sign = X##_s;                                     \
+                                                                \
+    (val) = _flo.flt;                                           \
+  } while (0)
+/*
+ * Multiplication algorithms:
+ */
+/* Basic.  Assuming the host word size is >= 2*FRACBITS, we can do the
+   multiplication immediately.  */
+#define _FP_MUL_MEAT_1_imm(fs, R, X, Y)                                 \
+  do {                                                                  \
+    R##_f = X##_f * Y##_f;                                              \
+    /* Normalize since we know where the msb of the multiplicands       \
+       were (bit B), we know that the msb of the of the product is      \
+       at either 2B or 2B-1.  */                                        \
+    _FP_FRAC_SRS_1(R, _FP_WFRACBITS_##fs-1, 2*_FP_WFRACBITS_##fs);      \
+  } while (0)
+/* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
+#define _FP_MUL_MEAT_1_wide(fs, R, X, Y, doit)                          \
+  do {                                                                  \
+    _FP_W_TYPE _Z_f0, _Z_f1;                                            \
+    doit(_Z_f1, _Z_f0, X##_f, Y##_f);                                   \
+    /* Normalize since we know where the msb of the multiplicands       \
+       were (bit B), we know that the msb of the of the product is      \
+       at either 2B or 2B-1.  */                                        \
+    _FP_FRAC_SRS_2(_Z, _FP_WFRACBITS_##fs-1, 2*_FP_WFRACBITS_##fs);     \
+    R##_f = _Z_f0;                                                      \
+  } while (0)
+/* Finally, a simple widening multiply algorithm.  What fun!  */
+#define _FP_MUL_MEAT_1_hard(fs, R, X, Y)                                \
+  do {                                                                  \
+    _FP_W_TYPE _xh, _xl, _yh, _yl, _z_f0, _z_f1, _a_f0, _a_f1;          \
+                                                                        \
+    /* split the words in half */                                       \
+    _xh = X##_f >> (_FP_W_TYPE_SIZE/2);                                 \
+    _xl = X##_f & (((_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2)) - 1);         \
+    _yh = Y##_f >> (_FP_W_TYPE_SIZE/2);                                 \
+    _yl = Y##_f & (((_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2)) - 1);         \
+                                                                        \
+    /* multiply the pieces */                                           \
+    _z_f0 = _xl * _yl;                                                  \
+    _a_f0 = _xh * _yl;                                                  \
+    _a_f1 = _xl * _yh;                                                  \
+    _z_f1 = _xh * _yh;                                                  \
+                                                                        \
+    /* reassemble into two full words */                                \
+    if ((_a_f0 += _a_f1) < _a_f1)                                       \
+      _z_f1 += (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2);                    \
+    _a_f1 = _a_f0 >> (_FP_W_TYPE_SIZE/2);                               \
+    _a_f0 = _a_f0 << (_FP_W_TYPE_SIZE/2);                               \
+    _FP_FRAC_ADD_2(_z, _z, _a);                                         \
+                                                                        \
+    /* normalize */                                                     \
+    _FP_FRAC_SRS_2(_z, _FP_WFRACBITS_##fs - 1, 2*_FP_WFRACBITS_##fs);   \
+    R##_f = _z_f0;                                                      \
+  } while (0)
+/*
+ * Division algorithms:
+ */
+/* Basic.  Assuming the host word size is >= 2*FRACBITS, we can do the
+   division immediately.  Give this macro either _FP_DIV_HELP_imm for
+   C primitives or _FP_DIV_HELP_ldiv for the ISO function.  Which you
+   choose will depend on what the compiler does with divrem4.  */
+#define _FP_DIV_MEAT_1_imm(fs, R, X, Y, doit)           \
+  do {                                                  \
+    _FP_W_TYPE _q, _r;                                  \
+    X##_f <<= (X##_f < Y##_f                            \
+               ? R##_e--, _FP_WFRACBITS_##fs            \
+               : _FP_WFRACBITS_##fs - 1);               \
+    doit(_q, _r, X##_f, Y##_f);                         \
+    R##_f = _q | (_r != 0);                             \
+  } while (0)
+/* GCC's longlong.h defines a 2W / 1W => (1W,1W) primitive udiv_qrnnd
+   that may be useful in this situation.  This first is for a primitive
+   that requires normalization, the second for one that does not.  Look
+   for UDIV_NEEDS_NORMALIZATION to tell which your machine needs.  */
+#define _FP_DIV_MEAT_1_udiv_norm(fs, R, X, Y)                           \
+  do {                                                                  \
+    _FP_W_TYPE _nh, _nl, _q, _r;                                        \
+                                                                        \
+    /* Normalize Y -- i.e. make the most significant bit set.  */       \
+    Y##_f <<= _FP_WFRACXBITS_##fs - 1;                                  \
+                                                                        \
+    /* Shift X op correspondingly high, that is, up one full word.  */  \
+    if (X##_f <= Y##_f)                                                 \
+      {                                                                 \
+        _nl = 0;                                                        \
+        _nh = X##_f;                                                    \
+      }                                                                 \
+    else                                                                \
+      {                                                                 \
+        R##_e++;                                                        \
+        _nl = X##_f << (_FP_W_TYPE_SIZE-1);                             \
+        _nh = X##_f >> 1;                                               \
+      }                                                                 \
+                                                                        \
+    udiv_qrnnd(_q, _r, _nh, _nl, Y##_f);                                \
+    R##_f = _q | (_r != 0);                                             \
+  } while (0)
+#define _FP_DIV_MEAT_1_udiv(fs, R, X, Y)                \
+  do {                                                  \
+    _FP_W_TYPE _nh, _nl, _q, _r;                        \
+    if (X##_f < Y##_f)                                  \
+      {                                                 \
+        R##_e--;                                        \
+        _nl = X##_f << _FP_WFRACBITS_##fs;              \
+        _nh = X##_f >> _FP_WFRACXBITS_##fs;             \
+      }                                                 \
+    else                                                \
+      {                                                 \
+        _nl = X##_f << (_FP_WFRACBITS_##fs - 1);        \
+        _nh = X##_f >> (_FP_WFRACXBITS_##fs + 1);       \
+      }                                                 \
+    udiv_qrnnd(_q, _r, _nh, _nl, Y##_f);                \
+    R##_f = _q | (_r != 0);                             \
+  } while (0)
+/*
+ * Square root algorithms:
+ * We have just one right now, maybe Newton approximation
+ * should be added for those machines where division is fast.
+ */
+#define _FP_SQRT_MEAT_1(R, S, T, X, q)                  \
+  do {                                                  \
+    while (q)                                           \
+      {                                                 \
+        T##_f = S##_f + q;                              \
+        if (T##_f <= X##_f)                             \
+          {                                             \
+            S##_f = T##_f + q;                          \
+            X##_f -= T##_f;                             \
+            R##_f += q;                                 \
+          }                                             \
+        _FP_FRAC_SLL_1(X, 1);                           \
+        q >>= 1;                                        \
+      }                                                 \
+  } while (0)
+/*
+ * Assembly/disassembly for converting to/from integral types.
+ * No shifting or overflow handled here.
+ */
+#define _FP_FRAC_ASSEMBLE_1(r, X, rsize)        (r = X##_f)
+#define _FP_FRAC_DISASSEMBLE_1(X, r, rsize)     (X##_f = r)
+/*
+ * Convert FP values between word sizes
+ */
+#define _FP_FRAC_CONV_1_1(dfs, sfs, D, S)                               \
+  do {                                                                  \
+    D##_f = S##_f;                                                      \
+    if (_FP_WFRACBITS_##sfs > _FP_WFRACBITS_##dfs)                      \
+      _FP_FRAC_SRS_1(D, (_FP_WFRACBITS_##sfs-_FP_WFRACBITS_##dfs),      \
+                     _FP_WFRACBITS_##sfs);                              \
+    else                                                                \
+      D##_f <<= _FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs;              \
+  } while (0)

diff --git a/arch/powerpc/math-emu/op-1.h b/arch/powerpc/math-emu/op-1.h new file mode 100644 index 000000000000..c92fa95f562e --- /dev/null +++ b/arch/powerpc/math-emu/op-1.h
@@ -0,0 +1,245 @@
	1	/*
	2	* Basic one-word fraction declaration and manipulation.
	3	*/
	4
	5	#define _FP_FRAC_DECL_1(X) _FP_W_TYPE X##_f
	6	#define _FP_FRAC_COPY_1(D,S) (D##_f = S##_f)
	7	#define _FP_FRAC_SET_1(X,I) (X##_f = I)
	8	#define _FP_FRAC_HIGH_1(X) (X##_f)
	9	#define _FP_FRAC_LOW_1(X) (X##_f)
	10	#define _FP_FRAC_WORD_1(X,w) (X##_f)
	11
	12	#define _FP_FRAC_ADDI_1(X,I) (X##_f += I)
	13	#define _FP_FRAC_SLL_1(X,N) \
	14	do { \
	15	if (__builtin_constant_p(N) && (N) == 1) \
	16	X##_f += X##_f; \
	17	else \
	18	X##_f <<= (N); \
	19	} while (0)
	20	#define _FP_FRAC_SRL_1(X,N) (X##_f >>= N)
	21
	22	/* Right shift with sticky-lsb. */
	23	#define _FP_FRAC_SRS_1(X,N,sz) __FP_FRAC_SRS_1(X##_f, N, sz)
	24
	25	#define __FP_FRAC_SRS_1(X,N,sz) \
	26	(X = (X >> (N) \| (__builtin_constant_p(N) && (N) == 1 \
	27	? X & 1 : (X << (_FP_W_TYPE_SIZE - (N))) != 0)))
	28
	29	#define _FP_FRAC_ADD_1(R,X,Y) (R##_f = X##_f + Y##_f)
	30	#define _FP_FRAC_SUB_1(R,X,Y) (R##_f = X##_f - Y##_f)
	31	#define _FP_FRAC_CLZ_1(z, X) __FP_CLZ(z, X##_f)
	32
	33	/* Predicates */
	34	#define _FP_FRAC_NEGP_1(X) ((_FP_WS_TYPE)X##_f < 0)
	35	#define _FP_FRAC_ZEROP_1(X) (X##_f == 0)
	36	#define _FP_FRAC_OVERP_1(fs,X) (X##_f & _FP_OVERFLOW_##fs)
	37	#define _FP_FRAC_EQ_1(X, Y) (X##_f == Y##_f)
	38	#define _FP_FRAC_GE_1(X, Y) (X##_f >= Y##_f)
	39	#define _FP_FRAC_GT_1(X, Y) (X##_f > Y##_f)
	40
	41	#define _FP_ZEROFRAC_1 0
	42	#define _FP_MINFRAC_1 1
	43
	44	/*
	45	* Unpack the raw bits of a native fp value. Do not classify or
	46	* normalize the data.
	47	*/
	48
	49	#define _FP_UNPACK_RAW_1(fs, X, val) \
	50	do { \
	51	union _FP_UNION_##fs _flo; _flo.flt = (val); \
	52	\
	53	X##_f = _flo.bits.frac; \
	54	X##_e = _flo.bits.exp; \
	55	X##_s = _flo.bits.sign; \
	56	} while (0)
	57
	58
	59	/*
	60	* Repack the raw bits of a native fp value.
	61	*/
	62
	63	#define _FP_PACK_RAW_1(fs, val, X) \
	64	do { \
	65	union _FP_UNION_##fs _flo; \
	66	\
	67	_flo.bits.frac = X##_f; \
	68	_flo.bits.exp = X##_e; \
	69	_flo.bits.sign = X##_s; \
	70	\
	71	(val) = _flo.flt; \
	72	} while (0)
	73
	74
	75	/*
	76	* Multiplication algorithms:
	77	*/
	78
	79	/* Basic. Assuming the host word size is >= 2*FRACBITS, we can do the
	80	multiplication immediately. */
	81
	82	#define _FP_MUL_MEAT_1_imm(fs, R, X, Y) \
	83	do { \
	84	R##_f = X##_f * Y##_f; \
	85	/* Normalize since we know where the msb of the multiplicands \
	86	were (bit B), we know that the msb of the of the product is \
	87	at either 2B or 2B-1. */ \
	88	_FP_FRAC_SRS_1(R, _FP_WFRACBITS_##fs-1, 2*_FP_WFRACBITS_##fs); \
	89	} while (0)
	90
	91	/* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
	92
	93	#define _FP_MUL_MEAT_1_wide(fs, R, X, Y, doit) \
	94	do { \
	95	_FP_W_TYPE _Z_f0, _Z_f1; \
	96	doit(_Z_f1, _Z_f0, X##_f, Y##_f); \
	97	/* Normalize since we know where the msb of the multiplicands \
	98	were (bit B), we know that the msb of the of the product is \
	99	at either 2B or 2B-1. */ \
	100	_FP_FRAC_SRS_2(_Z, _FP_WFRACBITS_##fs-1, 2*_FP_WFRACBITS_##fs); \
	101	R##_f = _Z_f0; \
	102	} while (0)
	103
	104	/* Finally, a simple widening multiply algorithm. What fun! */
	105
	106	#define _FP_MUL_MEAT_1_hard(fs, R, X, Y) \
	107	do { \
	108	_FP_W_TYPE _xh, _xl, _yh, _yl, _z_f0, _z_f1, _a_f0, _a_f1; \
	109	\
	110	/* split the words in half */ \
	111	_xh = X##_f >> (_FP_W_TYPE_SIZE/2); \
	112	_xl = X##_f & (((_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2)) - 1); \
	113	_yh = Y##_f >> (_FP_W_TYPE_SIZE/2); \
	114	_yl = Y##_f & (((_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2)) - 1); \
	115	\
	116	/* multiply the pieces */ \
	117	_z_f0 = _xl * _yl; \
	118	_a_f0 = _xh * _yl; \
	119	_a_f1 = _xl * _yh; \
	120	_z_f1 = _xh * _yh; \
	121	\
	122	/* reassemble into two full words */ \
	123	if ((_a_f0 += _a_f1) < _a_f1) \
	124	_z_f1 += (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2); \
	125	_a_f1 = _a_f0 >> (_FP_W_TYPE_SIZE/2); \
	126	_a_f0 = _a_f0 << (_FP_W_TYPE_SIZE/2); \
	127	_FP_FRAC_ADD_2(_z, _z, _a); \
	128	\
	129	/* normalize */ \
	130	_FP_FRAC_SRS_2(_z, _FP_WFRACBITS_##fs - 1, 2*_FP_WFRACBITS_##fs); \
	131	R##_f = _z_f0; \
	132	} while (0)
	133
	134
	135	/*
	136	* Division algorithms:
	137	*/
	138
	139	/* Basic. Assuming the host word size is >= 2*FRACBITS, we can do the
	140	division immediately. Give this macro either _FP_DIV_HELP_imm for
	141	C primitives or _FP_DIV_HELP_ldiv for the ISO function. Which you
	142	choose will depend on what the compiler does with divrem4. */
	143
	144	#define _FP_DIV_MEAT_1_imm(fs, R, X, Y, doit) \
	145	do { \
	146	_FP_W_TYPE _q, _r; \
	147	X##_f <<= (X##_f < Y##_f \
	148	? R##_e--, _FP_WFRACBITS_##fs \
	149	: _FP_WFRACBITS_##fs - 1); \
	150	doit(_q, _r, X##_f, Y##_f); \
	151	R##_f = _q \| (_r != 0); \
	152	} while (0)
	153
	154	/* GCC's longlong.h defines a 2W / 1W => (1W,1W) primitive udiv_qrnnd
	155	that may be useful in this situation. This first is for a primitive
	156	that requires normalization, the second for one that does not. Look
	157	for UDIV_NEEDS_NORMALIZATION to tell which your machine needs. */
	158
	159	#define _FP_DIV_MEAT_1_udiv_norm(fs, R, X, Y) \
	160	do { \
	161	_FP_W_TYPE _nh, _nl, _q, _r; \
	162	\
	163	/* Normalize Y -- i.e. make the most significant bit set. */ \
	164	Y##_f <<= _FP_WFRACXBITS_##fs - 1; \
	165	\
	166	/* Shift X op correspondingly high, that is, up one full word. */ \
	167	if (X##_f <= Y##_f) \
	168	{ \
	169	_nl = 0; \
	170	_nh = X##_f; \
	171	} \
	172	else \
	173	{ \
	174	R##_e++; \
	175	_nl = X##_f << (_FP_W_TYPE_SIZE-1); \
	176	_nh = X##_f >> 1; \
	177	} \
	178	\
	179	udiv_qrnnd(_q, _r, _nh, _nl, Y##_f); \
	180	R##_f = _q \| (_r != 0); \
	181	} while (0)
	182
	183	#define _FP_DIV_MEAT_1_udiv(fs, R, X, Y) \
	184	do { \
	185	_FP_W_TYPE _nh, _nl, _q, _r; \
	186	if (X##_f < Y##_f) \
	187	{ \
	188	R##_e--; \
	189	_nl = X##_f << _FP_WFRACBITS_##fs; \
	190	_nh = X##_f >> _FP_WFRACXBITS_##fs; \
	191	} \
	192	else \
	193	{ \
	194	_nl = X##_f << (_FP_WFRACBITS_##fs - 1); \
	195	_nh = X##_f >> (_FP_WFRACXBITS_##fs + 1); \
	196	} \
	197	udiv_qrnnd(_q, _r, _nh, _nl, Y##_f); \
	198	R##_f = _q \| (_r != 0); \
	199	} while (0)
	200
	201
	202	/*
	203	* Square root algorithms:
	204	* We have just one right now, maybe Newton approximation
	205	* should be added for those machines where division is fast.
	206	*/
	207
	208	#define _FP_SQRT_MEAT_1(R, S, T, X, q) \
	209	do { \
	210	while (q) \
	211	{ \
	212	T##_f = S##_f + q; \
	213	if (T##_f <= X##_f) \
	214	{ \
	215	S##_f = T##_f + q; \
	216	X##_f -= T##_f; \
	217	R##_f += q; \
	218	} \
	219	_FP_FRAC_SLL_1(X, 1); \
	220	q >>= 1; \
	221	} \
	222	} while (0)
	223
	224	/*
	225	* Assembly/disassembly for converting to/from integral types.
	226	* No shifting or overflow handled here.
	227	*/
	228
	229	#define _FP_FRAC_ASSEMBLE_1(r, X, rsize) (r = X##_f)
	230	#define _FP_FRAC_DISASSEMBLE_1(X, r, rsize) (X##_f = r)
	231
	232
	233	/*
	234	* Convert FP values between word sizes
	235	*/
	236
	237	#define _FP_FRAC_CONV_1_1(dfs, sfs, D, S) \
	238	do { \
	239	D##_f = S##_f; \
	240	if (_FP_WFRACBITS_##sfs > _FP_WFRACBITS_##dfs) \
	241	_FP_FRAC_SRS_1(D, (_FP_WFRACBITS_##sfs-_FP_WFRACBITS_##dfs), \
	242	_FP_WFRACBITS_##sfs); \
	243	else \
	244	D##_f <<= _FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs; \
	245	} while (0)