aboutsummaryrefslogtreecommitdiffstats
path: root/arch/alpha/math-emu
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/alpha/math-emu
Linux-2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'arch/alpha/math-emu')
-rw-r--r--arch/alpha/math-emu/Makefile9
-rw-r--r--arch/alpha/math-emu/math.c400
-rw-r--r--arch/alpha/math-emu/qrnnd.S163
-rw-r--r--arch/alpha/math-emu/sfp-util.h35
4 files changed, 607 insertions, 0 deletions
diff --git a/arch/alpha/math-emu/Makefile b/arch/alpha/math-emu/Makefile
new file mode 100644
index 000000000000..359ef087e69e
--- /dev/null
+++ b/arch/alpha/math-emu/Makefile
@@ -0,0 +1,9 @@
1#
2# Makefile for the FPU instruction emulation.
3#
4
5EXTRA_CFLAGS := -w
6
7obj-$(CONFIG_MATHEMU) += math-emu.o
8
9math-emu-objs := math.o qrnnd.o
diff --git a/arch/alpha/math-emu/math.c b/arch/alpha/math-emu/math.c
new file mode 100644
index 000000000000..ae79dd970b02
--- /dev/null
+++ b/arch/alpha/math-emu/math.c
@@ -0,0 +1,400 @@
1#include <linux/module.h>
2#include <linux/types.h>
3#include <linux/kernel.h>
4#include <linux/sched.h>
5
6#include <asm/uaccess.h>
7
8#include "sfp-util.h"
9#include <math-emu/soft-fp.h>
10#include <math-emu/single.h>
11#include <math-emu/double.h>
12
13#define OPC_PAL 0x00
14#define OPC_INTA 0x10
15#define OPC_INTL 0x11
16#define OPC_INTS 0x12
17#define OPC_INTM 0x13
18#define OPC_FLTC 0x14
19#define OPC_FLTV 0x15
20#define OPC_FLTI 0x16
21#define OPC_FLTL 0x17
22#define OPC_MISC 0x18
23#define OPC_JSR 0x1a
24
25#define FOP_SRC_S 0
26#define FOP_SRC_T 2
27#define FOP_SRC_Q 3
28
29#define FOP_FNC_ADDx 0
30#define FOP_FNC_CVTQL 0
31#define FOP_FNC_SUBx 1
32#define FOP_FNC_MULx 2
33#define FOP_FNC_DIVx 3
34#define FOP_FNC_CMPxUN 4
35#define FOP_FNC_CMPxEQ 5
36#define FOP_FNC_CMPxLT 6
37#define FOP_FNC_CMPxLE 7
38#define FOP_FNC_SQRTx 11
39#define FOP_FNC_CVTxS 12
40#define FOP_FNC_CVTxT 14
41#define FOP_FNC_CVTxQ 15
42
43#define MISC_TRAPB 0x0000
44#define MISC_EXCB 0x0400
45
46extern unsigned long alpha_read_fp_reg (unsigned long reg);
47extern void alpha_write_fp_reg (unsigned long reg, unsigned long val);
48extern unsigned long alpha_read_fp_reg_s (unsigned long reg);
49extern void alpha_write_fp_reg_s (unsigned long reg, unsigned long val);
50
51
52#ifdef MODULE
53
54MODULE_DESCRIPTION("FP Software completion module");
55
56extern long (*alpha_fp_emul_imprecise)(struct pt_regs *, unsigned long);
57extern long (*alpha_fp_emul) (unsigned long pc);
58
59static long (*save_emul_imprecise)(struct pt_regs *, unsigned long);
60static long (*save_emul) (unsigned long pc);
61
62long do_alpha_fp_emul_imprecise(struct pt_regs *, unsigned long);
63long do_alpha_fp_emul(unsigned long);
64
65int init_module(void)
66{
67 save_emul_imprecise = alpha_fp_emul_imprecise;
68 save_emul = alpha_fp_emul;
69 alpha_fp_emul_imprecise = do_alpha_fp_emul_imprecise;
70 alpha_fp_emul = do_alpha_fp_emul;
71 return 0;
72}
73
74void cleanup_module(void)
75{
76 alpha_fp_emul_imprecise = save_emul_imprecise;
77 alpha_fp_emul = save_emul;
78}
79
80#undef alpha_fp_emul_imprecise
81#define alpha_fp_emul_imprecise do_alpha_fp_emul_imprecise
82#undef alpha_fp_emul
83#define alpha_fp_emul do_alpha_fp_emul
84
85#endif /* MODULE */
86
87
88/*
89 * Emulate the floating point instruction at address PC. Returns -1 if the
90 * instruction to be emulated is illegal (such as with the opDEC trap), else
91 * the SI_CODE for a SIGFPE signal, else 0 if everything's ok.
92 *
93 * Notice that the kernel does not and cannot use FP regs. This is good
94 * because it means that instead of saving/restoring all fp regs, we simply
95 * stick the result of the operation into the appropriate register.
96 */
97long
98alpha_fp_emul (unsigned long pc)
99{
100 FP_DECL_EX;
101 FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
102 FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR);
103
104 unsigned long fa, fb, fc, func, mode, src;
105 unsigned long res, va, vb, vc, swcr, fpcr;
106 __u32 insn;
107 long si_code;
108
109 get_user(insn, (__u32 __user *)pc);
110 fc = (insn >> 0) & 0x1f; /* destination register */
111 fb = (insn >> 16) & 0x1f;
112 fa = (insn >> 21) & 0x1f;
113 func = (insn >> 5) & 0xf;
114 src = (insn >> 9) & 0x3;
115 mode = (insn >> 11) & 0x3;
116
117 fpcr = rdfpcr();
118 swcr = swcr_update_status(current_thread_info()->ieee_state, fpcr);
119
120 if (mode == 3) {
121 /* Dynamic -- get rounding mode from fpcr. */
122 mode = (fpcr >> FPCR_DYN_SHIFT) & 3;
123 }
124
125 switch (src) {
126 case FOP_SRC_S:
127 va = alpha_read_fp_reg_s(fa);
128 vb = alpha_read_fp_reg_s(fb);
129
130 FP_UNPACK_SP(SA, &va);
131 FP_UNPACK_SP(SB, &vb);
132
133 switch (func) {
134 case FOP_FNC_SUBx:
135 FP_SUB_S(SR, SA, SB);
136 goto pack_s;
137
138 case FOP_FNC_ADDx:
139 FP_ADD_S(SR, SA, SB);
140 goto pack_s;
141
142 case FOP_FNC_MULx:
143 FP_MUL_S(SR, SA, SB);
144 goto pack_s;
145
146 case FOP_FNC_DIVx:
147 FP_DIV_S(SR, SA, SB);
148 goto pack_s;
149
150 case FOP_FNC_SQRTx:
151 FP_SQRT_S(SR, SB);
152 goto pack_s;
153 }
154 goto bad_insn;
155
156 case FOP_SRC_T:
157 va = alpha_read_fp_reg(fa);
158 vb = alpha_read_fp_reg(fb);
159
160 if ((func & ~3) == FOP_FNC_CMPxUN) {
161 FP_UNPACK_RAW_DP(DA, &va);
162 FP_UNPACK_RAW_DP(DB, &vb);
163 if (!DA_e && !_FP_FRAC_ZEROP_1(DA)) {
164 FP_SET_EXCEPTION(FP_EX_DENORM);
165 if (FP_DENORM_ZERO)
166 _FP_FRAC_SET_1(DA, _FP_ZEROFRAC_1);
167 }
168 if (!DB_e && !_FP_FRAC_ZEROP_1(DB)) {
169 FP_SET_EXCEPTION(FP_EX_DENORM);
170 if (FP_DENORM_ZERO)
171 _FP_FRAC_SET_1(DB, _FP_ZEROFRAC_1);
172 }
173 FP_CMP_D(res, DA, DB, 3);
174 vc = 0x4000000000000000UL;
175 /* CMPTEQ, CMPTUN don't trap on QNaN,
176 while CMPTLT and CMPTLE do */
177 if (res == 3
178 && ((func & 3) >= 2
179 || FP_ISSIGNAN_D(DA)
180 || FP_ISSIGNAN_D(DB))) {
181 FP_SET_EXCEPTION(FP_EX_INVALID);
182 }
183 switch (func) {
184 case FOP_FNC_CMPxUN: if (res != 3) vc = 0; break;
185 case FOP_FNC_CMPxEQ: if (res) vc = 0; break;
186 case FOP_FNC_CMPxLT: if (res != -1) vc = 0; break;
187 case FOP_FNC_CMPxLE: if ((long)res > 0) vc = 0; break;
188 }
189 goto done_d;
190 }
191
192 FP_UNPACK_DP(DA, &va);
193 FP_UNPACK_DP(DB, &vb);
194
195 switch (func) {
196 case FOP_FNC_SUBx:
197 FP_SUB_D(DR, DA, DB);
198 goto pack_d;
199
200 case FOP_FNC_ADDx:
201 FP_ADD_D(DR, DA, DB);
202 goto pack_d;
203
204 case FOP_FNC_MULx:
205 FP_MUL_D(DR, DA, DB);
206 goto pack_d;
207
208 case FOP_FNC_DIVx:
209 FP_DIV_D(DR, DA, DB);
210 goto pack_d;
211
212 case FOP_FNC_SQRTx:
213 FP_SQRT_D(DR, DB);
214 goto pack_d;
215
216 case FOP_FNC_CVTxS:
217 /* It is irritating that DEC encoded CVTST with
218 SRC == T_floating. It is also interesting that
219 the bit used to tell the two apart is /U... */
220 if (insn & 0x2000) {
221 FP_CONV(S,D,1,1,SR,DB);
222 goto pack_s;
223 } else {
224 vb = alpha_read_fp_reg_s(fb);
225 FP_UNPACK_SP(SB, &vb);
226 DR_c = DB_c;
227 DR_s = DB_s;
228 DR_e = DB_e;
229 DR_f = SB_f << (52 - 23);
230 goto pack_d;
231 }
232
233 case FOP_FNC_CVTxQ:
234 if (DB_c == FP_CLS_NAN
235 && (_FP_FRAC_HIGH_RAW_D(DB) & _FP_QNANBIT_D)) {
236 /* AAHB Table B-2 says QNaN should not trigger INV */
237 vc = 0;
238 } else
239 FP_TO_INT_ROUND_D(vc, DB, 64, 2);
240 goto done_d;
241 }
242 goto bad_insn;
243
244 case FOP_SRC_Q:
245 vb = alpha_read_fp_reg(fb);
246
247 switch (func) {
248 case FOP_FNC_CVTQL:
249 /* Notice: We can get here only due to an integer
250 overflow. Such overflows are reported as invalid
251 ops. We return the result the hw would have
252 computed. */
253 vc = ((vb & 0xc0000000) << 32 | /* sign and msb */
254 (vb & 0x3fffffff) << 29); /* rest of the int */
255 FP_SET_EXCEPTION (FP_EX_INVALID);
256 goto done_d;
257
258 case FOP_FNC_CVTxS:
259 FP_FROM_INT_S(SR, ((long)vb), 64, long);
260 goto pack_s;
261
262 case FOP_FNC_CVTxT:
263 FP_FROM_INT_D(DR, ((long)vb), 64, long);
264 goto pack_d;
265 }
266 goto bad_insn;
267 }
268 goto bad_insn;
269
270pack_s:
271 FP_PACK_SP(&vc, SR);
272 if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ))
273 vc = 0;
274 alpha_write_fp_reg_s(fc, vc);
275 goto done;
276
277pack_d:
278 FP_PACK_DP(&vc, DR);
279 if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ))
280 vc = 0;
281done_d:
282 alpha_write_fp_reg(fc, vc);
283 goto done;
284
285 /*
286 * Take the appropriate action for each possible
287 * floating-point result:
288 *
289 * - Set the appropriate bits in the FPCR
290 * - If the specified exception is enabled in the FPCR,
291 * return. The caller (entArith) will dispatch
292 * the appropriate signal to the translated program.
293 *
294 * In addition, properly track the exception state in software
295 * as described in the Alpha Architecture Handbook section 4.7.7.3.
296 */
297done:
298 if (_fex) {
299 /* Record exceptions in software control word. */
300 swcr |= (_fex << IEEE_STATUS_TO_EXCSUM_SHIFT);
301 current_thread_info()->ieee_state
302 |= (_fex << IEEE_STATUS_TO_EXCSUM_SHIFT);
303
304 /* Update hardware control register. */
305 fpcr &= (~FPCR_MASK | FPCR_DYN_MASK);
306 fpcr |= ieee_swcr_to_fpcr(swcr);
307 wrfpcr(fpcr);
308
309 /* Do we generate a signal? */
310 _fex = _fex & swcr & IEEE_TRAP_ENABLE_MASK;
311 si_code = 0;
312 if (_fex) {
313 if (_fex & IEEE_TRAP_ENABLE_DNO) si_code = FPE_FLTUND;
314 if (_fex & IEEE_TRAP_ENABLE_INE) si_code = FPE_FLTRES;
315 if (_fex & IEEE_TRAP_ENABLE_UNF) si_code = FPE_FLTUND;
316 if (_fex & IEEE_TRAP_ENABLE_OVF) si_code = FPE_FLTOVF;
317 if (_fex & IEEE_TRAP_ENABLE_DZE) si_code = FPE_FLTDIV;
318 if (_fex & IEEE_TRAP_ENABLE_INV) si_code = FPE_FLTINV;
319 }
320
321 return si_code;
322 }
323
324 /* We used to write the destination register here, but DEC FORTRAN
325 requires that the result *always* be written... so we do the write
326 immediately after the operations above. */
327
328 return 0;
329
330bad_insn:
331 printk(KERN_ERR "alpha_fp_emul: Invalid FP insn %#x at %#lx\n",
332 insn, pc);
333 return -1;
334}
335
336long
337alpha_fp_emul_imprecise (struct pt_regs *regs, unsigned long write_mask)
338{
339 unsigned long trigger_pc = regs->pc - 4;
340 unsigned long insn, opcode, rc, si_code = 0;
341
342 /*
343 * Turn off the bits corresponding to registers that are the
344 * target of instructions that set bits in the exception
345 * summary register. We have some slack doing this because a
346 * register that is the target of a trapping instruction can
347 * be written at most once in the trap shadow.
348 *
349 * Branches, jumps, TRAPBs, EXCBs and calls to PALcode all
350 * bound the trap shadow, so we need not look any further than
351 * up to the first occurrence of such an instruction.
352 */
353 while (write_mask) {
354 get_user(insn, (__u32 __user *)(trigger_pc));
355 opcode = insn >> 26;
356 rc = insn & 0x1f;
357
358 switch (opcode) {
359 case OPC_PAL:
360 case OPC_JSR:
361 case 0x30 ... 0x3f: /* branches */
362 goto egress;
363
364 case OPC_MISC:
365 switch (insn & 0xffff) {
366 case MISC_TRAPB:
367 case MISC_EXCB:
368 goto egress;
369
370 default:
371 break;
372 }
373 break;
374
375 case OPC_INTA:
376 case OPC_INTL:
377 case OPC_INTS:
378 case OPC_INTM:
379 write_mask &= ~(1UL << rc);
380 break;
381
382 case OPC_FLTC:
383 case OPC_FLTV:
384 case OPC_FLTI:
385 case OPC_FLTL:
386 write_mask &= ~(1UL << (rc + 32));
387 break;
388 }
389 if (!write_mask) {
390 /* Re-execute insns in the trap-shadow. */
391 regs->pc = trigger_pc + 4;
392 si_code = alpha_fp_emul(trigger_pc);
393 goto egress;
394 }
395 trigger_pc -= 4;
396 }
397
398egress:
399 return si_code;
400}
diff --git a/arch/alpha/math-emu/qrnnd.S b/arch/alpha/math-emu/qrnnd.S
new file mode 100644
index 000000000000..d6373ec1bff9
--- /dev/null
+++ b/arch/alpha/math-emu/qrnnd.S
@@ -0,0 +1,163 @@
1 # Alpha 21064 __udiv_qrnnd
2 # Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
3
4 # This file is part of GCC.
5
6 # The GNU MP Library is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or (at your
9 # option) any later version.
10
11 # In addition to the permissions in the GNU General Public License, the
12 # Free Software Foundation gives you unlimited permission to link the
13 # compiled version of this file with other programs, and to distribute
14 # those programs without any restriction coming from the use of this
15 # file. (The General Public License restrictions do apply in other
16 # respects; for example, they cover modification of the file, and
17 # distribution when not linked into another program.)
18
19 # This file is distributed in the hope that it will be useful, but
20 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
21 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
22 # License for more details.
23
24 # You should have received a copy of the GNU General Public License
25 # along with GCC; see the file COPYING. If not, write to the
26 # Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27 # MA 02111-1307, USA.
28
29 .set noreorder
30 .set noat
31
32 .text
33
34 .globl __udiv_qrnnd
35 .ent __udiv_qrnnd
36__udiv_qrnnd:
37 .frame $30,0,$26,0
38 .prologue 0
39
40#define cnt $2
41#define tmp $3
42#define rem_ptr $16
43#define n1 $17
44#define n0 $18
45#define d $19
46#define qb $20
47#define AT $at
48
49 ldiq cnt,16
50 blt d,$largedivisor
51
52$loop1: cmplt n0,0,tmp
53 addq n1,n1,n1
54 bis n1,tmp,n1
55 addq n0,n0,n0
56 cmpule d,n1,qb
57 subq n1,d,tmp
58 cmovne qb,tmp,n1
59 bis n0,qb,n0
60 cmplt n0,0,tmp
61 addq n1,n1,n1
62 bis n1,tmp,n1
63 addq n0,n0,n0
64 cmpule d,n1,qb
65 subq n1,d,tmp
66 cmovne qb,tmp,n1
67 bis n0,qb,n0
68 cmplt n0,0,tmp
69 addq n1,n1,n1
70 bis n1,tmp,n1
71 addq n0,n0,n0
72 cmpule d,n1,qb
73 subq n1,d,tmp
74 cmovne qb,tmp,n1
75 bis n0,qb,n0
76 cmplt n0,0,tmp
77 addq n1,n1,n1
78 bis n1,tmp,n1
79 addq n0,n0,n0
80 cmpule d,n1,qb
81 subq n1,d,tmp
82 cmovne qb,tmp,n1
83 bis n0,qb,n0
84 subq cnt,1,cnt
85 bgt cnt,$loop1
86 stq n1,0(rem_ptr)
87 bis $31,n0,$0
88 ret $31,($26),1
89
90$largedivisor:
91 and n0,1,$4
92
93 srl n0,1,n0
94 sll n1,63,tmp
95 or tmp,n0,n0
96 srl n1,1,n1
97
98 and d,1,$6
99 srl d,1,$5
100 addq $5,$6,$5
101
102$loop2: cmplt n0,0,tmp
103 addq n1,n1,n1
104 bis n1,tmp,n1
105 addq n0,n0,n0
106 cmpule $5,n1,qb
107 subq n1,$5,tmp
108 cmovne qb,tmp,n1
109 bis n0,qb,n0
110 cmplt n0,0,tmp
111 addq n1,n1,n1
112 bis n1,tmp,n1
113 addq n0,n0,n0
114 cmpule $5,n1,qb
115 subq n1,$5,tmp
116 cmovne qb,tmp,n1
117 bis n0,qb,n0
118 cmplt n0,0,tmp
119 addq n1,n1,n1
120 bis n1,tmp,n1
121 addq n0,n0,n0
122 cmpule $5,n1,qb
123 subq n1,$5,tmp
124 cmovne qb,tmp,n1
125 bis n0,qb,n0
126 cmplt n0,0,tmp
127 addq n1,n1,n1
128 bis n1,tmp,n1
129 addq n0,n0,n0
130 cmpule $5,n1,qb
131 subq n1,$5,tmp
132 cmovne qb,tmp,n1
133 bis n0,qb,n0
134 subq cnt,1,cnt
135 bgt cnt,$loop2
136
137 addq n1,n1,n1
138 addq $4,n1,n1
139 bne $6,$Odd
140 stq n1,0(rem_ptr)
141 bis $31,n0,$0
142 ret $31,($26),1
143
144$Odd:
145 /* q' in n0. r' in n1 */
146 addq n1,n0,n1
147
148 cmpult n1,n0,tmp # tmp := carry from addq
149 subq n1,d,AT
150 addq n0,tmp,n0
151 cmovne tmp,AT,n1
152
153 cmpult n1,d,tmp
154 addq n0,1,AT
155 cmoveq tmp,AT,n0
156 subq n1,d,AT
157 cmoveq tmp,AT,n1
158
159 stq n1,0(rem_ptr)
160 bis $31,n0,$0
161 ret $31,($26),1
162
163 .end __udiv_qrnnd
diff --git a/arch/alpha/math-emu/sfp-util.h b/arch/alpha/math-emu/sfp-util.h
new file mode 100644
index 000000000000..f53707f77455
--- /dev/null
+++ b/arch/alpha/math-emu/sfp-util.h
@@ -0,0 +1,35 @@
1#include <linux/kernel.h>
2#include <linux/sched.h>
3#include <linux/types.h>
4#include <asm/byteorder.h>
5#include <asm/fpu.h>
6
7#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
8 ((sl) = (al) + (bl), (sh) = (ah) + (bh) + ((sl) < (al)))
9
10#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
11 ((sl) = (al) - (bl), (sh) = (ah) - (bh) - ((al) < (bl)))
12
13#define umul_ppmm(wh, wl, u, v) \
14 __asm__ ("mulq %2,%3,%1; umulh %2,%3,%0" \
15 : "=r" ((UDItype)(wh)), \
16 "=&r" ((UDItype)(wl)) \
17 : "r" ((UDItype)(u)), \
18 "r" ((UDItype)(v)))
19
20#define udiv_qrnnd(q, r, n1, n0, d) \
21 do { unsigned long __r; \
22 (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \
23 (r) = __r; \
24 } while (0)
25extern unsigned long __udiv_qrnnd (unsigned long *, unsigned long,
26 unsigned long , unsigned long);
27
28#define UDIV_NEEDS_NORMALIZATION 1
29
30#define abort() goto bad_insn
31
32#ifndef __LITTLE_ENDIAN
33#define __LITTLE_ENDIAN -1
34#endif
35#define __BYTE_ORDER __LITTLE_ENDIAN