diff options
Diffstat (limited to 'arch/powerpc/math-emu/sfp-machine.h')
-rw-r--r-- | arch/powerpc/math-emu/sfp-machine.h | 377 |
1 files changed, 377 insertions, 0 deletions
diff --git a/arch/powerpc/math-emu/sfp-machine.h b/arch/powerpc/math-emu/sfp-machine.h new file mode 100644 index 000000000000..4b17d83cfcdd --- /dev/null +++ b/arch/powerpc/math-emu/sfp-machine.h | |||
@@ -0,0 +1,377 @@ | |||
1 | /* Machine-dependent software floating-point definitions. PPC version. | ||
2 | Copyright (C) 1997 Free Software Foundation, Inc. | ||
3 | This file is part of the GNU C Library. | ||
4 | |||
5 | The GNU C Library is free software; you can redistribute it and/or | ||
6 | modify it under the terms of the GNU Library General Public License as | ||
7 | published by the Free Software Foundation; either version 2 of the | ||
8 | License, or (at your option) any later version. | ||
9 | |||
10 | The GNU C Library is distributed in the hope that it will be useful, | ||
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Library General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Library General Public | ||
16 | License along with the GNU C Library; see the file COPYING.LIB. If | ||
17 | not, write to the Free Software Foundation, Inc., | ||
18 | 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
19 | |||
20 | Actually, this is a PPC (32bit) version, written based on the | ||
21 | i386, sparc, and sparc64 versions, by me, | ||
22 | Peter Maydell (pmaydell@chiark.greenend.org.uk). | ||
23 | Comments are by and large also mine, although they may be inaccurate. | ||
24 | |||
25 | In picking out asm fragments I've gone with the lowest common | ||
26 | denominator, which also happens to be the hardware I have :-> | ||
27 | That is, a SPARC without hardware multiply and divide. | ||
28 | */ | ||
29 | |||
30 | /* basic word size definitions */ | ||
31 | #define _FP_W_TYPE_SIZE 32 | ||
32 | #define _FP_W_TYPE unsigned long | ||
33 | #define _FP_WS_TYPE signed long | ||
34 | #define _FP_I_TYPE long | ||
35 | |||
36 | #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) | ||
37 | #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) | ||
38 | #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) | ||
39 | |||
40 | /* You can optionally code some things like addition in asm. For | ||
41 | * example, i386 defines __FP_FRAC_ADD_2 as asm. If you don't | ||
42 | * then you get a fragment of C code [if you change an #ifdef 0 | ||
43 | * in op-2.h] or a call to add_ssaaaa (see below). | ||
44 | * Good places to look for asm fragments to use are gcc and glibc. | ||
45 | * gcc's longlong.h is useful. | ||
46 | */ | ||
47 | |||
48 | /* We need to know how to multiply and divide. If the host word size | ||
49 | * is >= 2*fracbits you can use FP_MUL_MEAT_n_imm(t,R,X,Y) which | ||
50 | * codes the multiply with whatever gcc does to 'a * b'. | ||
51 | * _FP_MUL_MEAT_n_wide(t,R,X,Y,f) is used when you have an asm | ||
52 | * function that can multiply two 1W values and get a 2W result. | ||
53 | * Otherwise you're stuck with _FP_MUL_MEAT_n_hard(t,R,X,Y) which | ||
54 | * does bitshifting to avoid overflow. | ||
55 | * For division there is FP_DIV_MEAT_n_imm(t,R,X,Y,f) for word size | ||
56 | * >= 2*fracbits, where f is either _FP_DIV_HELP_imm or | ||
57 | * _FP_DIV_HELP_ldiv (see op-1.h). | ||
58 | * _FP_DIV_MEAT_udiv() is if you have asm to do 2W/1W => (1W, 1W). | ||
59 | * [GCC and glibc have longlong.h which has the asm macro udiv_qrnnd | ||
60 | * to do this.] | ||
61 | * In general, 'n' is the number of words required to hold the type, | ||
62 | * and 't' is either S, D or Q for single/double/quad. | ||
63 | * -- PMM | ||
64 | */ | ||
65 | /* Example: SPARC64: | ||
66 | * #define _FP_MUL_MEAT_S(R,X,Y) _FP_MUL_MEAT_1_imm(S,R,X,Y) | ||
67 | * #define _FP_MUL_MEAT_D(R,X,Y) _FP_MUL_MEAT_1_wide(D,R,X,Y,umul_ppmm) | ||
68 | * #define _FP_MUL_MEAT_Q(R,X,Y) _FP_MUL_MEAT_2_wide(Q,R,X,Y,umul_ppmm) | ||
69 | * | ||
70 | * #define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_imm(S,R,X,Y,_FP_DIV_HELP_imm) | ||
71 | * #define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_1_udiv(D,R,X,Y) | ||
72 | * #define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_2_udiv_64(Q,R,X,Y) | ||
73 | * | ||
74 | * Example: i386: | ||
75 | * #define _FP_MUL_MEAT_S(R,X,Y) _FP_MUL_MEAT_1_wide(S,R,X,Y,_i386_mul_32_64) | ||
76 | * #define _FP_MUL_MEAT_D(R,X,Y) _FP_MUL_MEAT_2_wide(D,R,X,Y,_i386_mul_32_64) | ||
77 | * | ||
78 | * #define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_udiv(S,R,X,Y,_i386_div_64_32) | ||
79 | * #define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv_64(D,R,X,Y) | ||
80 | */ | ||
81 | |||
82 | #define _FP_MUL_MEAT_S(R,X,Y) _FP_MUL_MEAT_1_wide(S,R,X,Y,umul_ppmm) | ||
83 | #define _FP_MUL_MEAT_D(R,X,Y) _FP_MUL_MEAT_2_wide(D,R,X,Y,umul_ppmm) | ||
84 | |||
85 | #define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_udiv(S,R,X,Y) | ||
86 | #define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv_64(D,R,X,Y) | ||
87 | |||
88 | /* These macros define what NaN looks like. They're supposed to expand to | ||
89 | * a comma-separated set of 32bit unsigned ints that encode NaN. | ||
90 | */ | ||
91 | #define _FP_NANFRAC_S _FP_QNANBIT_S | ||
92 | #define _FP_NANFRAC_D _FP_QNANBIT_D, 0 | ||
93 | #define _FP_NANFRAC_Q _FP_QNANBIT_Q, 0, 0, 0 | ||
94 | |||
95 | #define _FP_KEEPNANFRACP 1 | ||
96 | |||
97 | /* This macro appears to be called when both X and Y are NaNs, and | ||
98 | * has to choose one and copy it to R. i386 goes for the larger of the | ||
99 | * two, sparc64 just picks Y. I don't understand this at all so I'll | ||
100 | * go with sparc64 because it's shorter :-> -- PMM | ||
101 | */ | ||
102 | #define _FP_CHOOSENAN(fs, wc, R, X, Y) \ | ||
103 | do { \ | ||
104 | R##_s = Y##_s; \ | ||
105 | _FP_FRAC_COPY_##wc(R,Y); \ | ||
106 | R##_c = FP_CLS_NAN; \ | ||
107 | } while (0) | ||
108 | |||
109 | |||
110 | extern void fp_unpack_d(long *, unsigned long *, unsigned long *, | ||
111 | long *, long *, void *); | ||
112 | extern int fp_pack_d(void *, long, unsigned long, unsigned long, long, long); | ||
113 | extern int fp_pack_ds(void *, long, unsigned long, unsigned long, long, long); | ||
114 | |||
115 | #define __FP_UNPACK_RAW_1(fs, X, val) \ | ||
116 | do { \ | ||
117 | union _FP_UNION_##fs *_flo = \ | ||
118 | (union _FP_UNION_##fs *)val; \ | ||
119 | \ | ||
120 | X##_f = _flo->bits.frac; \ | ||
121 | X##_e = _flo->bits.exp; \ | ||
122 | X##_s = _flo->bits.sign; \ | ||
123 | } while (0) | ||
124 | |||
125 | #define __FP_UNPACK_RAW_2(fs, X, val) \ | ||
126 | do { \ | ||
127 | union _FP_UNION_##fs *_flo = \ | ||
128 | (union _FP_UNION_##fs *)val; \ | ||
129 | \ | ||
130 | X##_f0 = _flo->bits.frac0; \ | ||
131 | X##_f1 = _flo->bits.frac1; \ | ||
132 | X##_e = _flo->bits.exp; \ | ||
133 | X##_s = _flo->bits.sign; \ | ||
134 | } while (0) | ||
135 | |||
136 | #define __FP_UNPACK_S(X,val) \ | ||
137 | do { \ | ||
138 | __FP_UNPACK_RAW_1(S,X,val); \ | ||
139 | _FP_UNPACK_CANONICAL(S,1,X); \ | ||
140 | } while (0) | ||
141 | |||
142 | #define __FP_UNPACK_D(X,val) \ | ||
143 | fp_unpack_d(&X##_s, &X##_f1, &X##_f0, &X##_e, &X##_c, val) | ||
144 | |||
145 | #define __FP_PACK_RAW_1(fs, val, X) \ | ||
146 | do { \ | ||
147 | union _FP_UNION_##fs *_flo = \ | ||
148 | (union _FP_UNION_##fs *)val; \ | ||
149 | \ | ||
150 | _flo->bits.frac = X##_f; \ | ||
151 | _flo->bits.exp = X##_e; \ | ||
152 | _flo->bits.sign = X##_s; \ | ||
153 | } while (0) | ||
154 | |||
155 | #define __FP_PACK_RAW_2(fs, val, X) \ | ||
156 | do { \ | ||
157 | union _FP_UNION_##fs *_flo = \ | ||
158 | (union _FP_UNION_##fs *)val; \ | ||
159 | \ | ||
160 | _flo->bits.frac0 = X##_f0; \ | ||
161 | _flo->bits.frac1 = X##_f1; \ | ||
162 | _flo->bits.exp = X##_e; \ | ||
163 | _flo->bits.sign = X##_s; \ | ||
164 | } while (0) | ||
165 | |||
166 | #include <linux/kernel.h> | ||
167 | #include <linux/sched.h> | ||
168 | |||
169 | #define __FPU_FPSCR (current->thread.fpscr.val) | ||
170 | |||
171 | /* We only actually write to the destination register | ||
172 | * if exceptions signalled (if any) will not trap. | ||
173 | */ | ||
174 | #define __FPU_ENABLED_EXC \ | ||
175 | ({ \ | ||
176 | (__FPU_FPSCR >> 3) & 0x1f; \ | ||
177 | }) | ||
178 | |||
179 | #define __FPU_TRAP_P(bits) \ | ||
180 | ((__FPU_ENABLED_EXC & (bits)) != 0) | ||
181 | |||
182 | #define __FP_PACK_S(val,X) \ | ||
183 | ({ int __exc = _FP_PACK_CANONICAL(S,1,X); \ | ||
184 | if(!__exc || !__FPU_TRAP_P(__exc)) \ | ||
185 | __FP_PACK_RAW_1(S,val,X); \ | ||
186 | __exc; \ | ||
187 | }) | ||
188 | |||
189 | #define __FP_PACK_D(val,X) \ | ||
190 | fp_pack_d(val, X##_s, X##_f1, X##_f0, X##_e, X##_c) | ||
191 | |||
192 | #define __FP_PACK_DS(val,X) \ | ||
193 | fp_pack_ds(val, X##_s, X##_f1, X##_f0, X##_e, X##_c) | ||
194 | |||
195 | /* Obtain the current rounding mode. */ | ||
196 | #define FP_ROUNDMODE \ | ||
197 | ({ \ | ||
198 | __FPU_FPSCR & 0x3; \ | ||
199 | }) | ||
200 | |||
201 | /* the asm fragments go here: all these are taken from glibc-2.0.5's | ||
202 | * stdlib/longlong.h | ||
203 | */ | ||
204 | |||
205 | #include <linux/types.h> | ||
206 | #include <asm/byteorder.h> | ||
207 | |||
208 | /* add_ssaaaa is used in op-2.h and should be equivalent to | ||
209 | * #define add_ssaaaa(sh,sl,ah,al,bh,bl) (sh = ah+bh+ (( sl = al+bl) < al)) | ||
210 | * add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, | ||
211 | * high_addend_2, low_addend_2) adds two UWtype integers, composed by | ||
212 | * HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2 | ||
213 | * respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow | ||
214 | * (i.e. carry out) is not stored anywhere, and is lost. | ||
215 | */ | ||
216 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ | ||
217 | do { \ | ||
218 | if (__builtin_constant_p (bh) && (bh) == 0) \ | ||
219 | __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \ | ||
220 | : "=r" ((USItype)(sh)), \ | ||
221 | "=&r" ((USItype)(sl)) \ | ||
222 | : "%r" ((USItype)(ah)), \ | ||
223 | "%r" ((USItype)(al)), \ | ||
224 | "rI" ((USItype)(bl))); \ | ||
225 | else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0) \ | ||
226 | __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \ | ||
227 | : "=r" ((USItype)(sh)), \ | ||
228 | "=&r" ((USItype)(sl)) \ | ||
229 | : "%r" ((USItype)(ah)), \ | ||
230 | "%r" ((USItype)(al)), \ | ||
231 | "rI" ((USItype)(bl))); \ | ||
232 | else \ | ||
233 | __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \ | ||
234 | : "=r" ((USItype)(sh)), \ | ||
235 | "=&r" ((USItype)(sl)) \ | ||
236 | : "%r" ((USItype)(ah)), \ | ||
237 | "r" ((USItype)(bh)), \ | ||
238 | "%r" ((USItype)(al)), \ | ||
239 | "rI" ((USItype)(bl))); \ | ||
240 | } while (0) | ||
241 | |||
242 | /* sub_ddmmss is used in op-2.h and udivmodti4.c and should be equivalent to | ||
243 | * #define sub_ddmmss(sh, sl, ah, al, bh, bl) (sh = ah-bh - ((sl = al-bl) > al)) | ||
244 | * sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend, | ||
245 | * high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers, | ||
246 | * composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and | ||
247 | * LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE | ||
248 | * and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, | ||
249 | * and is lost. | ||
250 | */ | ||
251 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ | ||
252 | do { \ | ||
253 | if (__builtin_constant_p (ah) && (ah) == 0) \ | ||
254 | __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ | ||
255 | : "=r" ((USItype)(sh)), \ | ||
256 | "=&r" ((USItype)(sl)) \ | ||
257 | : "r" ((USItype)(bh)), \ | ||
258 | "rI" ((USItype)(al)), \ | ||
259 | "r" ((USItype)(bl))); \ | ||
260 | else if (__builtin_constant_p (ah) && (ah) ==~(USItype) 0) \ | ||
261 | __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ | ||
262 | : "=r" ((USItype)(sh)), \ | ||
263 | "=&r" ((USItype)(sl)) \ | ||
264 | : "r" ((USItype)(bh)), \ | ||
265 | "rI" ((USItype)(al)), \ | ||
266 | "r" ((USItype)(bl))); \ | ||
267 | else if (__builtin_constant_p (bh) && (bh) == 0) \ | ||
268 | __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ | ||
269 | : "=r" ((USItype)(sh)), \ | ||
270 | "=&r" ((USItype)(sl)) \ | ||
271 | : "r" ((USItype)(ah)), \ | ||
272 | "rI" ((USItype)(al)), \ | ||
273 | "r" ((USItype)(bl))); \ | ||
274 | else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0) \ | ||
275 | __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ | ||
276 | : "=r" ((USItype)(sh)), \ | ||
277 | "=&r" ((USItype)(sl)) \ | ||
278 | : "r" ((USItype)(ah)), \ | ||
279 | "rI" ((USItype)(al)), \ | ||
280 | "r" ((USItype)(bl))); \ | ||
281 | else \ | ||
282 | __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ | ||
283 | : "=r" ((USItype)(sh)), \ | ||
284 | "=&r" ((USItype)(sl)) \ | ||
285 | : "r" ((USItype)(ah)), \ | ||
286 | "r" ((USItype)(bh)), \ | ||
287 | "rI" ((USItype)(al)), \ | ||
288 | "r" ((USItype)(bl))); \ | ||
289 | } while (0) | ||
290 | |||
291 | /* asm fragments for mul and div */ | ||
292 | |||
293 | /* umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two | ||
294 | * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype | ||
295 | * word product in HIGH_PROD and LOW_PROD. | ||
296 | */ | ||
297 | #define umul_ppmm(ph, pl, m0, m1) \ | ||
298 | do { \ | ||
299 | USItype __m0 = (m0), __m1 = (m1); \ | ||
300 | __asm__ ("mulhwu %0,%1,%2" \ | ||
301 | : "=r" ((USItype)(ph)) \ | ||
302 | : "%r" (__m0), \ | ||
303 | "r" (__m1)); \ | ||
304 | (pl) = __m0 * __m1; \ | ||
305 | } while (0) | ||
306 | |||
307 | /* udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, | ||
308 | * denominator) divides a UDWtype, composed by the UWtype integers | ||
309 | * HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient | ||
310 | * in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less | ||
311 | * than DENOMINATOR for correct operation. If, in addition, the most | ||
312 | * significant bit of DENOMINATOR must be 1, then the pre-processor symbol | ||
313 | * UDIV_NEEDS_NORMALIZATION is defined to 1. | ||
314 | */ | ||
315 | #define udiv_qrnnd(q, r, n1, n0, d) \ | ||
316 | do { \ | ||
317 | UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \ | ||
318 | __d1 = __ll_highpart (d); \ | ||
319 | __d0 = __ll_lowpart (d); \ | ||
320 | \ | ||
321 | __r1 = (n1) % __d1; \ | ||
322 | __q1 = (n1) / __d1; \ | ||
323 | __m = (UWtype) __q1 * __d0; \ | ||
324 | __r1 = __r1 * __ll_B | __ll_highpart (n0); \ | ||
325 | if (__r1 < __m) \ | ||
326 | { \ | ||
327 | __q1--, __r1 += (d); \ | ||
328 | if (__r1 >= (d)) /* we didn't get carry when adding to __r1 */ \ | ||
329 | if (__r1 < __m) \ | ||
330 | __q1--, __r1 += (d); \ | ||
331 | } \ | ||
332 | __r1 -= __m; \ | ||
333 | \ | ||
334 | __r0 = __r1 % __d1; \ | ||
335 | __q0 = __r1 / __d1; \ | ||
336 | __m = (UWtype) __q0 * __d0; \ | ||
337 | __r0 = __r0 * __ll_B | __ll_lowpart (n0); \ | ||
338 | if (__r0 < __m) \ | ||
339 | { \ | ||
340 | __q0--, __r0 += (d); \ | ||
341 | if (__r0 >= (d)) \ | ||
342 | if (__r0 < __m) \ | ||
343 | __q0--, __r0 += (d); \ | ||
344 | } \ | ||
345 | __r0 -= __m; \ | ||
346 | \ | ||
347 | (q) = (UWtype) __q1 * __ll_B | __q0; \ | ||
348 | (r) = __r0; \ | ||
349 | } while (0) | ||
350 | |||
351 | #define UDIV_NEEDS_NORMALIZATION 1 | ||
352 | |||
353 | #define abort() \ | ||
354 | return 0 | ||
355 | |||
356 | #ifdef __BIG_ENDIAN | ||
357 | #define __BYTE_ORDER __BIG_ENDIAN | ||
358 | #else | ||
359 | #define __BYTE_ORDER __LITTLE_ENDIAN | ||
360 | #endif | ||
361 | |||
362 | /* Exception flags. */ | ||
363 | #define EFLAG_INVALID (1 << (31 - 2)) | ||
364 | #define EFLAG_OVERFLOW (1 << (31 - 3)) | ||
365 | #define EFLAG_UNDERFLOW (1 << (31 - 4)) | ||
366 | #define EFLAG_DIVZERO (1 << (31 - 5)) | ||
367 | #define EFLAG_INEXACT (1 << (31 - 6)) | ||
368 | |||
369 | #define EFLAG_VXSNAN (1 << (31 - 7)) | ||
370 | #define EFLAG_VXISI (1 << (31 - 8)) | ||
371 | #define EFLAG_VXIDI (1 << (31 - 9)) | ||
372 | #define EFLAG_VXZDZ (1 << (31 - 10)) | ||
373 | #define EFLAG_VXIMZ (1 << (31 - 11)) | ||
374 | #define EFLAG_VXVC (1 << (31 - 12)) | ||
375 | #define EFLAG_VXSOFT (1 << (31 - 21)) | ||
376 | #define EFLAG_VXSQRT (1 << (31 - 22)) | ||
377 | #define EFLAG_VXCVI (1 << (31 - 23)) | ||