aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/math-emu/op-1.h
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/math-emu/op-1.h')
-rw-r--r--arch/powerpc/math-emu/op-1.h245
1 files changed, 245 insertions, 0 deletions
diff --git a/arch/powerpc/math-emu/op-1.h b/arch/powerpc/math-emu/op-1.h
new file mode 100644
index 000000000000..c92fa95f562e
--- /dev/null
+++ b/arch/powerpc/math-emu/op-1.h
@@ -0,0 +1,245 @@
1/*
2 * Basic one-word fraction declaration and manipulation.
3 */
4
5#define _FP_FRAC_DECL_1(X) _FP_W_TYPE X##_f
6#define _FP_FRAC_COPY_1(D,S) (D##_f = S##_f)
7#define _FP_FRAC_SET_1(X,I) (X##_f = I)
8#define _FP_FRAC_HIGH_1(X) (X##_f)
9#define _FP_FRAC_LOW_1(X) (X##_f)
10#define _FP_FRAC_WORD_1(X,w) (X##_f)
11
12#define _FP_FRAC_ADDI_1(X,I) (X##_f += I)
13#define _FP_FRAC_SLL_1(X,N) \
14 do { \
15 if (__builtin_constant_p(N) && (N) == 1) \
16 X##_f += X##_f; \
17 else \
18 X##_f <<= (N); \
19 } while (0)
20#define _FP_FRAC_SRL_1(X,N) (X##_f >>= N)
21
22/* Right shift with sticky-lsb. */
23#define _FP_FRAC_SRS_1(X,N,sz) __FP_FRAC_SRS_1(X##_f, N, sz)
24
25#define __FP_FRAC_SRS_1(X,N,sz) \
26 (X = (X >> (N) | (__builtin_constant_p(N) && (N) == 1 \
27 ? X & 1 : (X << (_FP_W_TYPE_SIZE - (N))) != 0)))
28
29#define _FP_FRAC_ADD_1(R,X,Y) (R##_f = X##_f + Y##_f)
30#define _FP_FRAC_SUB_1(R,X,Y) (R##_f = X##_f - Y##_f)
31#define _FP_FRAC_CLZ_1(z, X) __FP_CLZ(z, X##_f)
32
33/* Predicates */
34#define _FP_FRAC_NEGP_1(X) ((_FP_WS_TYPE)X##_f < 0)
35#define _FP_FRAC_ZEROP_1(X) (X##_f == 0)
36#define _FP_FRAC_OVERP_1(fs,X) (X##_f & _FP_OVERFLOW_##fs)
37#define _FP_FRAC_EQ_1(X, Y) (X##_f == Y##_f)
38#define _FP_FRAC_GE_1(X, Y) (X##_f >= Y##_f)
39#define _FP_FRAC_GT_1(X, Y) (X##_f > Y##_f)
40
41#define _FP_ZEROFRAC_1 0
42#define _FP_MINFRAC_1 1
43
44/*
45 * Unpack the raw bits of a native fp value. Do not classify or
46 * normalize the data.
47 */
48
49#define _FP_UNPACK_RAW_1(fs, X, val) \
50 do { \
51 union _FP_UNION_##fs _flo; _flo.flt = (val); \
52 \
53 X##_f = _flo.bits.frac; \
54 X##_e = _flo.bits.exp; \
55 X##_s = _flo.bits.sign; \
56 } while (0)
57
58
59/*
60 * Repack the raw bits of a native fp value.
61 */
62
63#define _FP_PACK_RAW_1(fs, val, X) \
64 do { \
65 union _FP_UNION_##fs _flo; \
66 \
67 _flo.bits.frac = X##_f; \
68 _flo.bits.exp = X##_e; \
69 _flo.bits.sign = X##_s; \
70 \
71 (val) = _flo.flt; \
72 } while (0)
73
74
75/*
76 * Multiplication algorithms:
77 */
78
79/* Basic. Assuming the host word size is >= 2*FRACBITS, we can do the
80 multiplication immediately. */
81
82#define _FP_MUL_MEAT_1_imm(fs, R, X, Y) \
83 do { \
84 R##_f = X##_f * Y##_f; \
85 /* Normalize since we know where the msb of the multiplicands \
86 were (bit B), we know that the msb of the of the product is \
87 at either 2B or 2B-1. */ \
88 _FP_FRAC_SRS_1(R, _FP_WFRACBITS_##fs-1, 2*_FP_WFRACBITS_##fs); \
89 } while (0)
90
91/* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
92
93#define _FP_MUL_MEAT_1_wide(fs, R, X, Y, doit) \
94 do { \
95 _FP_W_TYPE _Z_f0, _Z_f1; \
96 doit(_Z_f1, _Z_f0, X##_f, Y##_f); \
97 /* Normalize since we know where the msb of the multiplicands \
98 were (bit B), we know that the msb of the of the product is \
99 at either 2B or 2B-1. */ \
100 _FP_FRAC_SRS_2(_Z, _FP_WFRACBITS_##fs-1, 2*_FP_WFRACBITS_##fs); \
101 R##_f = _Z_f0; \
102 } while (0)
103
104/* Finally, a simple widening multiply algorithm. What fun! */
105
106#define _FP_MUL_MEAT_1_hard(fs, R, X, Y) \
107 do { \
108 _FP_W_TYPE _xh, _xl, _yh, _yl, _z_f0, _z_f1, _a_f0, _a_f1; \
109 \
110 /* split the words in half */ \
111 _xh = X##_f >> (_FP_W_TYPE_SIZE/2); \
112 _xl = X##_f & (((_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2)) - 1); \
113 _yh = Y##_f >> (_FP_W_TYPE_SIZE/2); \
114 _yl = Y##_f & (((_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2)) - 1); \
115 \
116 /* multiply the pieces */ \
117 _z_f0 = _xl * _yl; \
118 _a_f0 = _xh * _yl; \
119 _a_f1 = _xl * _yh; \
120 _z_f1 = _xh * _yh; \
121 \
122 /* reassemble into two full words */ \
123 if ((_a_f0 += _a_f1) < _a_f1) \
124 _z_f1 += (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2); \
125 _a_f1 = _a_f0 >> (_FP_W_TYPE_SIZE/2); \
126 _a_f0 = _a_f0 << (_FP_W_TYPE_SIZE/2); \
127 _FP_FRAC_ADD_2(_z, _z, _a); \
128 \
129 /* normalize */ \
130 _FP_FRAC_SRS_2(_z, _FP_WFRACBITS_##fs - 1, 2*_FP_WFRACBITS_##fs); \
131 R##_f = _z_f0; \
132 } while (0)
133
134
135/*
136 * Division algorithms:
137 */
138
139/* Basic. Assuming the host word size is >= 2*FRACBITS, we can do the
140 division immediately. Give this macro either _FP_DIV_HELP_imm for
141 C primitives or _FP_DIV_HELP_ldiv for the ISO function. Which you
142 choose will depend on what the compiler does with divrem4. */
143
144#define _FP_DIV_MEAT_1_imm(fs, R, X, Y, doit) \
145 do { \
146 _FP_W_TYPE _q, _r; \
147 X##_f <<= (X##_f < Y##_f \
148 ? R##_e--, _FP_WFRACBITS_##fs \
149 : _FP_WFRACBITS_##fs - 1); \
150 doit(_q, _r, X##_f, Y##_f); \
151 R##_f = _q | (_r != 0); \
152 } while (0)
153
154/* GCC's longlong.h defines a 2W / 1W => (1W,1W) primitive udiv_qrnnd
155 that may be useful in this situation. This first is for a primitive
156 that requires normalization, the second for one that does not. Look
157 for UDIV_NEEDS_NORMALIZATION to tell which your machine needs. */
158
159#define _FP_DIV_MEAT_1_udiv_norm(fs, R, X, Y) \
160 do { \
161 _FP_W_TYPE _nh, _nl, _q, _r; \
162 \
163 /* Normalize Y -- i.e. make the most significant bit set. */ \
164 Y##_f <<= _FP_WFRACXBITS_##fs - 1; \
165 \
166 /* Shift X op correspondingly high, that is, up one full word. */ \
167 if (X##_f <= Y##_f) \
168 { \
169 _nl = 0; \
170 _nh = X##_f; \
171 } \
172 else \
173 { \
174 R##_e++; \
175 _nl = X##_f << (_FP_W_TYPE_SIZE-1); \
176 _nh = X##_f >> 1; \
177 } \
178 \
179 udiv_qrnnd(_q, _r, _nh, _nl, Y##_f); \
180 R##_f = _q | (_r != 0); \
181 } while (0)
182
183#define _FP_DIV_MEAT_1_udiv(fs, R, X, Y) \
184 do { \
185 _FP_W_TYPE _nh, _nl, _q, _r; \
186 if (X##_f < Y##_f) \
187 { \
188 R##_e--; \
189 _nl = X##_f << _FP_WFRACBITS_##fs; \
190 _nh = X##_f >> _FP_WFRACXBITS_##fs; \
191 } \
192 else \
193 { \
194 _nl = X##_f << (_FP_WFRACBITS_##fs - 1); \
195 _nh = X##_f >> (_FP_WFRACXBITS_##fs + 1); \
196 } \
197 udiv_qrnnd(_q, _r, _nh, _nl, Y##_f); \
198 R##_f = _q | (_r != 0); \
199 } while (0)
200
201
202/*
203 * Square root algorithms:
204 * We have just one right now, maybe Newton approximation
205 * should be added for those machines where division is fast.
206 */
207
208#define _FP_SQRT_MEAT_1(R, S, T, X, q) \
209 do { \
210 while (q) \
211 { \
212 T##_f = S##_f + q; \
213 if (T##_f <= X##_f) \
214 { \
215 S##_f = T##_f + q; \
216 X##_f -= T##_f; \
217 R##_f += q; \
218 } \
219 _FP_FRAC_SLL_1(X, 1); \
220 q >>= 1; \
221 } \
222 } while (0)
223
224/*
225 * Assembly/disassembly for converting to/from integral types.
226 * No shifting or overflow handled here.
227 */
228
229#define _FP_FRAC_ASSEMBLE_1(r, X, rsize) (r = X##_f)
230#define _FP_FRAC_DISASSEMBLE_1(X, r, rsize) (X##_f = r)
231
232
233/*
234 * Convert FP values between word sizes
235 */
236
237#define _FP_FRAC_CONV_1_1(dfs, sfs, D, S) \
238 do { \
239 D##_f = S##_f; \
240 if (_FP_WFRACBITS_##sfs > _FP_WFRACBITS_##dfs) \
241 _FP_FRAC_SRS_1(D, (_FP_WFRACBITS_##sfs-_FP_WFRACBITS_##dfs), \
242 _FP_WFRACBITS_##sfs); \
243 else \
244 D##_f <<= _FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs; \
245 } while (0)