aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Mundt <lethal@linux-sh.org>2007-11-26 06:38:36 -0500
committerPaul Mundt <lethal@linux-sh.org>2008-01-27 23:18:57 -0500
commit74d99a5e262229ee865f6f68528d10b82471ead6 (patch)
tree1e2382c8779e1ee06226f9cc9acbf9a63eb5fc44
parenta8f67f4b4d4b74cd14d3540ade8657ebee543340 (diff)
sh: SH-2A FPU support.
Signed-off-by: Kieran Bingham <kbingham@mpc-data.co.uk> Signed-off-by: Paul Mundt <lethal@linux-sh.org>
-rw-r--r--arch/sh/Kconfig2
-rw-r--r--arch/sh/Makefile4
-rw-r--r--arch/sh/kernel/cpu/sh2/entry.S17
-rw-r--r--arch/sh/kernel/cpu/sh2a/Makefile2
-rw-r--r--arch/sh/kernel/cpu/sh2a/fpu.c633
-rw-r--r--arch/sh/kernel/cpu/sh3/ex.S2
-rw-r--r--arch/sh/kernel/cpu/sh4/fpu.c23
-rw-r--r--arch/sh/kernel/traps_32.c13
-rw-r--r--include/asm-sh/fpu.h2
-rw-r--r--include/asm-sh/sigcontext.h3
-rw-r--r--include/asm-sh/system.h2
11 files changed, 673 insertions, 30 deletions
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index c18a5512ac82..2dc3b177193c 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -161,6 +161,7 @@ config CPU_SUBTYPE_SH7619
161config CPU_SUBTYPE_SH7203 161config CPU_SUBTYPE_SH7203
162 bool "Support SH7203 processor" 162 bool "Support SH7203 processor"
163 select CPU_SH2A 163 select CPU_SH2A
164 select CPU_HAS_FPU
164 165
165config CPU_SUBTYPE_SH7206 166config CPU_SUBTYPE_SH7206
166 bool "Support SH7206 processor" 167 bool "Support SH7206 processor"
@@ -169,6 +170,7 @@ config CPU_SUBTYPE_SH7206
169config CPU_SUBTYPE_SH7263 170config CPU_SUBTYPE_SH7263
170 bool "Support SH7263 processor" 171 bool "Support SH7263 processor"
171 select CPU_SH2A 172 select CPU_SH2A
173 select CPU_HAS_FPU
172 174
173# SH-3 Processor Support 175# SH-3 Processor Support
174 176
diff --git a/arch/sh/Makefile b/arch/sh/Makefile
index f7cbc13fd2a0..292d8618248d 100644
--- a/arch/sh/Makefile
+++ b/arch/sh/Makefile
@@ -20,10 +20,6 @@ isa-$(CONFIG_CPU_SH4AL_DSP) := sh4al
20isa-$(CONFIG_CPU_SH5) := shmedia 20isa-$(CONFIG_CPU_SH5) := shmedia
21isa-$(CONFIG_SH_DSP) := $(isa-y)-dsp 21isa-$(CONFIG_SH_DSP) := $(isa-y)-dsp
22 22
23ifndef CONFIG_MMU
24isa-y := $(isa-y)-nommu
25endif
26
27ifndef CONFIG_SH_DSP 23ifndef CONFIG_SH_DSP
28ifndef CONFIG_SH_FPU 24ifndef CONFIG_SH_FPU
29isa-y := $(isa-y)-nofpu 25isa-y := $(isa-y)-nofpu
diff --git a/arch/sh/kernel/cpu/sh2/entry.S b/arch/sh/kernel/cpu/sh2/entry.S
index 4ff2334b4a38..7a26569e7956 100644
--- a/arch/sh/kernel/cpu/sh2/entry.S
+++ b/arch/sh/kernel/cpu/sh2/entry.S
@@ -149,6 +149,14 @@ ENTRY(exception_handler)
149 mov #32,r8 149 mov #32,r8
150 cmp/hs r8,r9 150 cmp/hs r8,r9
151 bt trap_entry ! 64 > vec >= 32 is trap 151 bt trap_entry ! 64 > vec >= 32 is trap
152
153#if defined(CONFIG_SH_FPU)
154 mov #13,r8
155 cmp/eq r8,r9
156 bt 10f ! fpu
157 nop
158#endif
159
152 mov.l 4f,r8 160 mov.l 4f,r8
153 mov r9,r4 161 mov r9,r4
154 shll2 r9 162 shll2 r9
@@ -158,6 +166,10 @@ ENTRY(exception_handler)
158 cmp/eq r9,r8 166 cmp/eq r9,r8
159 bf 3f 167 bf 3f
160 mov.l 8f,r8 ! unhandled exception 168 mov.l 8f,r8 ! unhandled exception
169#if defined(CONFIG_SH_FPU)
17010:
171 mov.l 9f, r8 ! unhandled exception
172#endif
1613: 1733:
162 mov.l 5f,r10 174 mov.l 5f,r10
163 jmp @r8 175 jmp @r8
@@ -177,7 +189,10 @@ interrupt_entry:
1776: .long ret_from_irq 1896: .long ret_from_irq
1787: .long do_IRQ 1907: .long do_IRQ
1798: .long do_exception_error 1918: .long do_exception_error
180 192#ifdef CONFIG_SH_FPU
1939: .long fpu_error_trap_handler
194#endif
195
181trap_entry: 196trap_entry:
182 mov #0x30,r8 197 mov #0x30,r8
183 cmp/ge r8,r9 ! vector 0x20-0x2f is systemcall 198 cmp/ge r8,r9 ! vector 0x20-0x2f is systemcall
diff --git a/arch/sh/kernel/cpu/sh2a/Makefile b/arch/sh/kernel/cpu/sh2a/Makefile
index 50e4d0ffdd6f..b279cdc3a233 100644
--- a/arch/sh/kernel/cpu/sh2a/Makefile
+++ b/arch/sh/kernel/cpu/sh2a/Makefile
@@ -6,6 +6,8 @@ obj-y := common.o probe.o opcode_helper.o
6 6
7common-y += $(addprefix ../sh2/, ex.o entry.o) 7common-y += $(addprefix ../sh2/, ex.o entry.o)
8 8
9obj-$(CONFIG_SH_FPU) += fpu.o
10
9obj-$(CONFIG_CPU_SUBTYPE_SH7206) += setup-sh7206.o clock-sh7206.o 11obj-$(CONFIG_CPU_SUBTYPE_SH7206) += setup-sh7206.o clock-sh7206.o
10obj-$(CONFIG_CPU_SUBTYPE_SH7203) += setup-sh7203.o clock-sh7203.o 12obj-$(CONFIG_CPU_SUBTYPE_SH7203) += setup-sh7203.o clock-sh7203.o
11obj-$(CONFIG_CPU_SUBTYPE_SH7263) += setup-sh7203.o clock-sh7203.o 13obj-$(CONFIG_CPU_SUBTYPE_SH7263) += setup-sh7203.o clock-sh7203.o
diff --git a/arch/sh/kernel/cpu/sh2a/fpu.c b/arch/sh/kernel/cpu/sh2a/fpu.c
new file mode 100644
index 000000000000..ff99562456fb
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh2a/fpu.c
@@ -0,0 +1,633 @@
1/*
2 * Save/restore floating point context for signal handlers.
3 *
4 * Copyright (C) 1999, 2000 Kaz Kojima & Niibe Yutaka
5 *
6 * This file is subject to the terms and conditions of the GNU General Public
7 * License. See the file "COPYING" in the main directory of this archive
8 * for more details.
9 *
10 * FIXME! These routines can be optimized in big endian case.
11 */
12#include <linux/sched.h>
13#include <linux/signal.h>
14#include <asm/processor.h>
15#include <asm/io.h>
16
17/* The PR (precision) bit in the FP Status Register must be clear when
18 * an frchg instruction is executed, otherwise the instruction is undefined.
19 * Executing frchg with PR set causes a trap on some SH4 implementations.
20 */
21
22#define FPSCR_RCHG 0x00000000
23
24
25/*
26 * Save FPU registers onto task structure.
27 * Assume called with FPU enabled (SR.FD=0).
28 */
29void
30save_fpu(struct task_struct *tsk, struct pt_regs *regs)
31{
32 unsigned long dummy;
33
34 clear_tsk_thread_flag(tsk, TIF_USEDFPU);
35 enable_fpu();
36 asm volatile("sts.l fpul, @-%0\n\t"
37 "sts.l fpscr, @-%0\n\t"
38 "fmov.s fr15, @-%0\n\t"
39 "fmov.s fr14, @-%0\n\t"
40 "fmov.s fr13, @-%0\n\t"
41 "fmov.s fr12, @-%0\n\t"
42 "fmov.s fr11, @-%0\n\t"
43 "fmov.s fr10, @-%0\n\t"
44 "fmov.s fr9, @-%0\n\t"
45 "fmov.s fr8, @-%0\n\t"
46 "fmov.s fr7, @-%0\n\t"
47 "fmov.s fr6, @-%0\n\t"
48 "fmov.s fr5, @-%0\n\t"
49 "fmov.s fr4, @-%0\n\t"
50 "fmov.s fr3, @-%0\n\t"
51 "fmov.s fr2, @-%0\n\t"
52 "fmov.s fr1, @-%0\n\t"
53 "fmov.s fr0, @-%0\n\t"
54 "lds %3, fpscr\n\t"
55 : "=r" (dummy)
56 : "0" ((char *)(&tsk->thread.fpu.hard.status)),
57 "r" (FPSCR_RCHG),
58 "r" (FPSCR_INIT)
59 : "memory");
60
61 disable_fpu();
62 release_fpu(regs);
63}
64
65static void
66restore_fpu(struct task_struct *tsk)
67{
68 unsigned long dummy;
69
70 enable_fpu();
71 asm volatile("fmov.s @%0+, fr0\n\t"
72 "fmov.s @%0+, fr1\n\t"
73 "fmov.s @%0+, fr2\n\t"
74 "fmov.s @%0+, fr3\n\t"
75 "fmov.s @%0+, fr4\n\t"
76 "fmov.s @%0+, fr5\n\t"
77 "fmov.s @%0+, fr6\n\t"
78 "fmov.s @%0+, fr7\n\t"
79 "fmov.s @%0+, fr8\n\t"
80 "fmov.s @%0+, fr9\n\t"
81 "fmov.s @%0+, fr10\n\t"
82 "fmov.s @%0+, fr11\n\t"
83 "fmov.s @%0+, fr12\n\t"
84 "fmov.s @%0+, fr13\n\t"
85 "fmov.s @%0+, fr14\n\t"
86 "fmov.s @%0+, fr15\n\t"
87 "lds.l @%0+, fpscr\n\t"
88 "lds.l @%0+, fpul\n\t"
89 : "=r" (dummy)
90 : "0" (&tsk->thread.fpu), "r" (FPSCR_RCHG)
91 : "memory");
92 disable_fpu();
93}
94
95/*
96 * Load the FPU with signalling NANS. This bit pattern we're using
97 * has the property that no matter wether considered as single or as
98 * double precission represents signaling NANS.
99 */
100
101static void
102fpu_init(void)
103{
104 enable_fpu();
105 asm volatile("lds %0, fpul\n\t"
106 "fsts fpul, fr0\n\t"
107 "fsts fpul, fr1\n\t"
108 "fsts fpul, fr2\n\t"
109 "fsts fpul, fr3\n\t"
110 "fsts fpul, fr4\n\t"
111 "fsts fpul, fr5\n\t"
112 "fsts fpul, fr6\n\t"
113 "fsts fpul, fr7\n\t"
114 "fsts fpul, fr8\n\t"
115 "fsts fpul, fr9\n\t"
116 "fsts fpul, fr10\n\t"
117 "fsts fpul, fr11\n\t"
118 "fsts fpul, fr12\n\t"
119 "fsts fpul, fr13\n\t"
120 "fsts fpul, fr14\n\t"
121 "fsts fpul, fr15\n\t"
122 "lds %2, fpscr\n\t"
123 : /* no output */
124 : "r" (0), "r" (FPSCR_RCHG), "r" (FPSCR_INIT));
125 disable_fpu();
126}
127
128/*
129 * Emulate arithmetic ops on denormalized number for some FPU insns.
130 */
131
132/* denormalized float * float */
133static int denormal_mulf(int hx, int hy)
134{
135 unsigned int ix, iy;
136 unsigned long long m, n;
137 int exp, w;
138
139 ix = hx & 0x7fffffff;
140 iy = hy & 0x7fffffff;
141 if (iy < 0x00800000 || ix == 0)
142 return ((hx ^ hy) & 0x80000000);
143
144 exp = (iy & 0x7f800000) >> 23;
145 ix &= 0x007fffff;
146 iy = (iy & 0x007fffff) | 0x00800000;
147 m = (unsigned long long)ix * iy;
148 n = m;
149 w = -1;
150 while (n) { n >>= 1; w++; }
151
152 /* FIXME: use guard bits */
153 exp += w - 126 - 46;
154 if (exp > 0)
155 ix = ((int) (m >> (w - 23)) & 0x007fffff) | (exp << 23);
156 else if (exp + 22 >= 0)
157 ix = (int) (m >> (w - 22 - exp)) & 0x007fffff;
158 else
159 ix = 0;
160
161 ix |= (hx ^ hy) & 0x80000000;
162 return ix;
163}
164
165/* denormalized double * double */
166static void mult64(unsigned long long x, unsigned long long y,
167 unsigned long long *highp, unsigned long long *lowp)
168{
169 unsigned long long sub0, sub1, sub2, sub3;
170 unsigned long long high, low;
171
172 sub0 = (x >> 32) * (unsigned long) (y >> 32);
173 sub1 = (x & 0xffffffffLL) * (unsigned long) (y >> 32);
174 sub2 = (x >> 32) * (unsigned long) (y & 0xffffffffLL);
175 sub3 = (x & 0xffffffffLL) * (unsigned long) (y & 0xffffffffLL);
176 low = sub3;
177 high = 0LL;
178 sub3 += (sub1 << 32);
179 if (low > sub3)
180 high++;
181 low = sub3;
182 sub3 += (sub2 << 32);
183 if (low > sub3)
184 high++;
185 low = sub3;
186 high += (sub1 >> 32) + (sub2 >> 32);
187 high += sub0;
188 *lowp = low;
189 *highp = high;
190}
191
192static inline long long rshift64(unsigned long long mh,
193 unsigned long long ml, int n)
194{
195 if (n >= 64)
196 return mh >> (n - 64);
197 return (mh << (64 - n)) | (ml >> n);
198}
199
200static long long denormal_muld(long long hx, long long hy)
201{
202 unsigned long long ix, iy;
203 unsigned long long mh, ml, nh, nl;
204 int exp, w;
205
206 ix = hx & 0x7fffffffffffffffLL;
207 iy = hy & 0x7fffffffffffffffLL;
208 if (iy < 0x0010000000000000LL || ix == 0)
209 return ((hx ^ hy) & 0x8000000000000000LL);
210
211 exp = (iy & 0x7ff0000000000000LL) >> 52;
212 ix &= 0x000fffffffffffffLL;
213 iy = (iy & 0x000fffffffffffffLL) | 0x0010000000000000LL;
214 mult64(ix, iy, &mh, &ml);
215 nh = mh;
216 nl = ml;
217 w = -1;
218 if (nh) {
219 while (nh) { nh >>= 1; w++;}
220 w += 64;
221 } else
222 while (nl) { nl >>= 1; w++;}
223
224 /* FIXME: use guard bits */
225 exp += w - 1022 - 52 * 2;
226 if (exp > 0)
227 ix = (rshift64(mh, ml, w - 52) & 0x000fffffffffffffLL)
228 | ((long long)exp << 52);
229 else if (exp + 51 >= 0)
230 ix = rshift64(mh, ml, w - 51 - exp) & 0x000fffffffffffffLL;
231 else
232 ix = 0;
233
234 ix |= (hx ^ hy) & 0x8000000000000000LL;
235 return ix;
236}
237
238/* ix - iy where iy: denormal and ix, iy >= 0 */
239static int denormal_subf1(unsigned int ix, unsigned int iy)
240{
241 int frac;
242 int exp;
243
244 if (ix < 0x00800000)
245 return ix - iy;
246
247 exp = (ix & 0x7f800000) >> 23;
248 if (exp - 1 > 31)
249 return ix;
250 iy >>= exp - 1;
251 if (iy == 0)
252 return ix;
253
254 frac = (ix & 0x007fffff) | 0x00800000;
255 frac -= iy;
256 while (frac < 0x00800000) {
257 if (--exp == 0)
258 return frac;
259 frac <<= 1;
260 }
261
262 return (exp << 23) | (frac & 0x007fffff);
263}
264
265/* ix + iy where iy: denormal and ix, iy >= 0 */
266static int denormal_addf1(unsigned int ix, unsigned int iy)
267{
268 int frac;
269 int exp;
270
271 if (ix < 0x00800000)
272 return ix + iy;
273
274 exp = (ix & 0x7f800000) >> 23;
275 if (exp - 1 > 31)
276 return ix;
277 iy >>= exp - 1;
278 if (iy == 0)
279 return ix;
280
281 frac = (ix & 0x007fffff) | 0x00800000;
282 frac += iy;
283 if (frac >= 0x01000000) {
284 frac >>= 1;
285 ++exp;
286 }
287
288 return (exp << 23) | (frac & 0x007fffff);
289}
290
291static int denormal_addf(int hx, int hy)
292{
293 unsigned int ix, iy;
294 int sign;
295
296 if ((hx ^ hy) & 0x80000000) {
297 sign = hx & 0x80000000;
298 ix = hx & 0x7fffffff;
299 iy = hy & 0x7fffffff;
300 if (iy < 0x00800000) {
301 ix = denormal_subf1(ix, iy);
302 if (ix < 0) {
303 ix = -ix;
304 sign ^= 0x80000000;
305 }
306 } else {
307 ix = denormal_subf1(iy, ix);
308 sign ^= 0x80000000;
309 }
310 } else {
311 sign = hx & 0x80000000;
312 ix = hx & 0x7fffffff;
313 iy = hy & 0x7fffffff;
314 if (iy < 0x00800000)
315 ix = denormal_addf1(ix, iy);
316 else
317 ix = denormal_addf1(iy, ix);
318 }
319
320 return sign | ix;
321}
322
323/* ix - iy where iy: denormal and ix, iy >= 0 */
324static long long denormal_subd1(unsigned long long ix, unsigned long long iy)
325{
326 long long frac;
327 int exp;
328
329 if (ix < 0x0010000000000000LL)
330 return ix - iy;
331
332 exp = (ix & 0x7ff0000000000000LL) >> 52;
333 if (exp - 1 > 63)
334 return ix;
335 iy >>= exp - 1;
336 if (iy == 0)
337 return ix;
338
339 frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL;
340 frac -= iy;
341 while (frac < 0x0010000000000000LL) {
342 if (--exp == 0)
343 return frac;
344 frac <<= 1;
345 }
346
347 return ((long long)exp << 52) | (frac & 0x000fffffffffffffLL);
348}
349
350/* ix + iy where iy: denormal and ix, iy >= 0 */
351static long long denormal_addd1(unsigned long long ix, unsigned long long iy)
352{
353 long long frac;
354 long long exp;
355
356 if (ix < 0x0010000000000000LL)
357 return ix + iy;
358
359 exp = (ix & 0x7ff0000000000000LL) >> 52;
360 if (exp - 1 > 63)
361 return ix;
362 iy >>= exp - 1;
363 if (iy == 0)
364 return ix;
365
366 frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL;
367 frac += iy;
368 if (frac >= 0x0020000000000000LL) {
369 frac >>= 1;
370 ++exp;
371 }
372
373 return (exp << 52) | (frac & 0x000fffffffffffffLL);
374}
375
376static long long denormal_addd(long long hx, long long hy)
377{
378 unsigned long long ix, iy;
379 long long sign;
380
381 if ((hx ^ hy) & 0x8000000000000000LL) {
382 sign = hx & 0x8000000000000000LL;
383 ix = hx & 0x7fffffffffffffffLL;
384 iy = hy & 0x7fffffffffffffffLL;
385 if (iy < 0x0010000000000000LL) {
386 ix = denormal_subd1(ix, iy);
387 if (ix < 0) {
388 ix = -ix;
389 sign ^= 0x8000000000000000LL;
390 }
391 } else {
392 ix = denormal_subd1(iy, ix);
393 sign ^= 0x8000000000000000LL;
394 }
395 } else {
396 sign = hx & 0x8000000000000000LL;
397 ix = hx & 0x7fffffffffffffffLL;
398 iy = hy & 0x7fffffffffffffffLL;
399 if (iy < 0x0010000000000000LL)
400 ix = denormal_addd1(ix, iy);
401 else
402 ix = denormal_addd1(iy, ix);
403 }
404
405 return sign | ix;
406}
407
408/**
409 * denormal_to_double - Given denormalized float number,
410 * store double float
411 *
412 * @fpu: Pointer to sh_fpu_hard structure
413 * @n: Index to FP register
414 */
415static void
416denormal_to_double (struct sh_fpu_hard_struct *fpu, int n)
417{
418 unsigned long du, dl;
419 unsigned long x = fpu->fpul;
420 int exp = 1023 - 126;
421
422 if (x != 0 && (x & 0x7f800000) == 0) {
423 du = (x & 0x80000000);
424 while ((x & 0x00800000) == 0) {
425 x <<= 1;
426 exp--;
427 }
428 x &= 0x007fffff;
429 du |= (exp << 20) | (x >> 3);
430 dl = x << 29;
431
432 fpu->fp_regs[n] = du;
433 fpu->fp_regs[n+1] = dl;
434 }
435}
436
437/**
438 * ieee_fpe_handler - Handle denormalized number exception
439 *
440 * @regs: Pointer to register structure
441 *
442 * Returns 1 when it's handled (should not cause exception).
443 */
444static int
445ieee_fpe_handler (struct pt_regs *regs)
446{
447 unsigned short insn = *(unsigned short *) regs->pc;
448 unsigned short finsn;
449 unsigned long nextpc;
450 int nib[4] = {
451 (insn >> 12) & 0xf,
452 (insn >> 8) & 0xf,
453 (insn >> 4) & 0xf,
454 insn & 0xf};
455
456 if (nib[0] == 0xb ||
457 (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */
458 regs->pr = regs->pc + 4;
459 if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */
460 nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3);
461 finsn = *(unsigned short *) (regs->pc + 2);
462 } else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */
463 if (regs->sr & 1)
464 nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
465 else
466 nextpc = regs->pc + 4;
467 finsn = *(unsigned short *) (regs->pc + 2);
468 } else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */
469 if (regs->sr & 1)
470 nextpc = regs->pc + 4;
471 else
472 nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
473 finsn = *(unsigned short *) (regs->pc + 2);
474 } else if (nib[0] == 0x4 && nib[3] == 0xb &&
475 (nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */
476 nextpc = regs->regs[nib[1]];
477 finsn = *(unsigned short *) (regs->pc + 2);
478 } else if (nib[0] == 0x0 && nib[3] == 0x3 &&
479 (nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */
480 nextpc = regs->pc + 4 + regs->regs[nib[1]];
481 finsn = *(unsigned short *) (regs->pc + 2);
482 } else if (insn == 0x000b) { /* rts */
483 nextpc = regs->pr;
484 finsn = *(unsigned short *) (regs->pc + 2);
485 } else {
486 nextpc = regs->pc + 2;
487 finsn = insn;
488 }
489
490#define FPSCR_FPU_ERROR (1 << 17)
491
492 if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */
493 struct task_struct *tsk = current;
494
495 if ((tsk->thread.fpu.hard.fpscr & FPSCR_FPU_ERROR)) {
496 /* FPU error */
497 denormal_to_double (&tsk->thread.fpu.hard,
498 (finsn >> 8) & 0xf);
499 } else
500 return 0;
501
502 regs->pc = nextpc;
503 return 1;
504 } else if ((finsn & 0xf00f) == 0xf002) { /* fmul */
505 struct task_struct *tsk = current;
506 int fpscr;
507 int n, m, prec;
508 unsigned int hx, hy;
509
510 n = (finsn >> 8) & 0xf;
511 m = (finsn >> 4) & 0xf;
512 hx = tsk->thread.fpu.hard.fp_regs[n];
513 hy = tsk->thread.fpu.hard.fp_regs[m];
514 fpscr = tsk->thread.fpu.hard.fpscr;
515 prec = fpscr & (1 << 19);
516
517 if ((fpscr & FPSCR_FPU_ERROR)
518 && (prec && ((hx & 0x7fffffff) < 0x00100000
519 || (hy & 0x7fffffff) < 0x00100000))) {
520 long long llx, lly;
521
522 /* FPU error because of denormal */
523 llx = ((long long) hx << 32)
524 | tsk->thread.fpu.hard.fp_regs[n+1];
525 lly = ((long long) hy << 32)
526 | tsk->thread.fpu.hard.fp_regs[m+1];
527 if ((hx & 0x7fffffff) >= 0x00100000)
528 llx = denormal_muld(lly, llx);
529 else
530 llx = denormal_muld(llx, lly);
531 tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
532 tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
533 } else if ((fpscr & FPSCR_FPU_ERROR)
534 && (!prec && ((hx & 0x7fffffff) < 0x00800000
535 || (hy & 0x7fffffff) < 0x00800000))) {
536 /* FPU error because of denormal */
537 if ((hx & 0x7fffffff) >= 0x00800000)
538 hx = denormal_mulf(hy, hx);
539 else
540 hx = denormal_mulf(hx, hy);
541 tsk->thread.fpu.hard.fp_regs[n] = hx;
542 } else
543 return 0;
544
545 regs->pc = nextpc;
546 return 1;
547 } else if ((finsn & 0xf00e) == 0xf000) { /* fadd, fsub */
548 struct task_struct *tsk = current;
549 int fpscr;
550 int n, m, prec;
551 unsigned int hx, hy;
552
553 n = (finsn >> 8) & 0xf;
554 m = (finsn >> 4) & 0xf;
555 hx = tsk->thread.fpu.hard.fp_regs[n];
556 hy = tsk->thread.fpu.hard.fp_regs[m];
557 fpscr = tsk->thread.fpu.hard.fpscr;
558 prec = fpscr & (1 << 19);
559
560 if ((fpscr & FPSCR_FPU_ERROR)
561 && (prec && ((hx & 0x7fffffff) < 0x00100000
562 || (hy & 0x7fffffff) < 0x00100000))) {
563 long long llx, lly;
564
565 /* FPU error because of denormal */
566 llx = ((long long) hx << 32)
567 | tsk->thread.fpu.hard.fp_regs[n+1];
568 lly = ((long long) hy << 32)
569 | tsk->thread.fpu.hard.fp_regs[m+1];
570 if ((finsn & 0xf00f) == 0xf000)
571 llx = denormal_addd(llx, lly);
572 else
573 llx = denormal_addd(llx, lly ^ (1LL << 63));
574 tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
575 tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
576 } else if ((fpscr & FPSCR_FPU_ERROR)
577 && (!prec && ((hx & 0x7fffffff) < 0x00800000
578 || (hy & 0x7fffffff) < 0x00800000))) {
579 /* FPU error because of denormal */
580 if ((finsn & 0xf00f) == 0xf000)
581 hx = denormal_addf(hx, hy);
582 else
583 hx = denormal_addf(hx, hy ^ 0x80000000);
584 tsk->thread.fpu.hard.fp_regs[n] = hx;
585 } else
586 return 0;
587
588 regs->pc = nextpc;
589 return 1;
590 }
591
592 return 0;
593}
594
595BUILD_TRAP_HANDLER(fpu_error)
596{
597 struct task_struct *tsk = current;
598 TRAP_HANDLER_DECL;
599
600 save_fpu(tsk, regs);
601 if (ieee_fpe_handler(regs)) {
602 tsk->thread.fpu.hard.fpscr &=
603 ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
604 grab_fpu(regs);
605 restore_fpu(tsk);
606 set_tsk_thread_flag(tsk, TIF_USEDFPU);
607 return;
608 }
609
610 force_sig(SIGFPE, tsk);
611}
612
613BUILD_TRAP_HANDLER(fpu_state_restore)
614{
615 struct task_struct *tsk = current;
616 TRAP_HANDLER_DECL;
617
618 grab_fpu(regs);
619 if (!user_mode(regs)) {
620 printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
621 return;
622 }
623
624 if (used_math()) {
625 /* Using the FPU again. */
626 restore_fpu(tsk);
627 } else {
628 /* First time FPU user. */
629 fpu_init();
630 set_used_math();
631 }
632 set_tsk_thread_flag(tsk, TIF_USEDFPU);
633}
diff --git a/arch/sh/kernel/cpu/sh3/ex.S b/arch/sh/kernel/cpu/sh3/ex.S
index b6abf38d3a8d..11b6d9c6edae 100644
--- a/arch/sh/kernel/cpu/sh3/ex.S
+++ b/arch/sh/kernel/cpu/sh3/ex.S
@@ -36,7 +36,7 @@ ENTRY(exception_handling_table)
36 .long exception_error ! address error store /* 100 */ 36 .long exception_error ! address error store /* 100 */
37#endif 37#endif
38#if defined(CONFIG_SH_FPU) 38#if defined(CONFIG_SH_FPU)
39 .long do_fpu_error /* 120 */ 39 .long fpu_error_trap_handler /* 120 */
40#else 40#else
41 .long exception_error /* 120 */ 41 .long exception_error /* 120 */
42#endif 42#endif
diff --git a/arch/sh/kernel/cpu/sh4/fpu.c b/arch/sh/kernel/cpu/sh4/fpu.c
index c5a4fc77fa06..e624180b4467 100644
--- a/arch/sh/kernel/cpu/sh4/fpu.c
+++ b/arch/sh/kernel/cpu/sh4/fpu.c
@@ -82,8 +82,8 @@ save_fpu(struct task_struct *tsk, struct pt_regs *regs)
82 "r" (FPSCR_INIT) 82 "r" (FPSCR_INIT)
83 : "memory"); 83 : "memory");
84 84
85 disable_fpu(); 85 disable_fpu();
86 release_fpu(regs); 86 release_fpu(regs);
87} 87}
88 88
89static void 89static void
@@ -91,7 +91,7 @@ restore_fpu(struct task_struct *tsk)
91{ 91{
92 unsigned long dummy; 92 unsigned long dummy;
93 93
94 enable_fpu(); 94 enable_fpu();
95 asm volatile("lds %2, fpscr\n\t" 95 asm volatile("lds %2, fpscr\n\t"
96 "fmov.s @%0+, fr0\n\t" 96 "fmov.s @%0+, fr0\n\t"
97 "fmov.s @%0+, fr1\n\t" 97 "fmov.s @%0+, fr1\n\t"
@@ -138,7 +138,7 @@ restore_fpu(struct task_struct *tsk)
138/* 138/*
139 * Load the FPU with signalling NANS. This bit pattern we're using 139 * Load the FPU with signalling NANS. This bit pattern we're using
140 * has the property that no matter wether considered as single or as 140 * has the property that no matter wether considered as single or as
141 * double precision represents signaling NANS. 141 * double precision represents signaling NANS.
142 */ 142 */
143 143
144static void 144static void
@@ -184,7 +184,7 @@ fpu_init(void)
184 "lds %2, fpscr\n\t" 184 "lds %2, fpscr\n\t"
185 : /* no output */ 185 : /* no output */
186 : "r" (0), "r" (FPSCR_RCHG), "r" (FPSCR_INIT)); 186 : "r" (0), "r" (FPSCR_RCHG), "r" (FPSCR_INIT));
187 disable_fpu(); 187 disable_fpu();
188} 188}
189 189
190/** 190/**
@@ -238,7 +238,6 @@ ieee_fpe_handler (struct pt_regs *regs)
238 if (nib[0] == 0xb || 238 if (nib[0] == 0xb ||
239 (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */ 239 (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */
240 regs->pr = regs->pc + 4; 240 regs->pr = regs->pc + 4;
241
242 if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */ 241 if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */
243 nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3); 242 nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3);
244 finsn = *(unsigned short *) (regs->pc + 2); 243 finsn = *(unsigned short *) (regs->pc + 2);
@@ -293,12 +292,10 @@ ieee_fpe_handler (struct pt_regs *regs)
293 return 0; 292 return 0;
294} 293}
295 294
296asmlinkage void 295BUILD_TRAP_HANDLER(fpu_error)
297do_fpu_error(unsigned long r4, unsigned long r5, unsigned long r6,
298 unsigned long r7, struct pt_regs __regs)
299{ 296{
300 struct pt_regs *regs = RELOC_HIDE(&__regs, 0);
301 struct task_struct *tsk = current; 297 struct task_struct *tsk = current;
298 TRAP_HANDLER_DECL;
302 299
303 if (ieee_fpe_handler(regs)) 300 if (ieee_fpe_handler(regs))
304 return; 301 return;
@@ -308,12 +305,10 @@ do_fpu_error(unsigned long r4, unsigned long r5, unsigned long r6,
308 force_sig(SIGFPE, tsk); 305 force_sig(SIGFPE, tsk);
309} 306}
310 307
311asmlinkage void 308BUILD_TRAP_HANDLER(fpu_state_restore)
312do_fpu_state_restore(unsigned long r4, unsigned long r5, unsigned long r6,
313 unsigned long r7, struct pt_regs __regs)
314{ 309{
315 struct pt_regs *regs = RELOC_HIDE(&__regs, 0);
316 struct task_struct *tsk = current; 310 struct task_struct *tsk = current;
311 TRAP_HANDLER_DECL;
317 312
318 grab_fpu(regs); 313 grab_fpu(regs);
319 if (!user_mode(regs)) { 314 if (!user_mode(regs)) {
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index 0d05fb3c48e3..2e58f7a6b746 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -662,11 +662,6 @@ asmlinkage void do_divide_error(unsigned long r4, unsigned long r5,
662} 662}
663#endif 663#endif
664 664
665/* arch/sh/kernel/cpu/sh4/fpu.c */
666extern int do_fpu_inst(unsigned short, struct pt_regs *);
667extern asmlinkage void do_fpu_state_restore(unsigned long r4, unsigned long r5,
668 unsigned long r6, unsigned long r7, struct pt_regs __regs);
669
670asmlinkage void do_reserved_inst(unsigned long r4, unsigned long r5, 665asmlinkage void do_reserved_inst(unsigned long r4, unsigned long r5,
671 unsigned long r6, unsigned long r7, 666 unsigned long r6, unsigned long r7,
672 struct pt_regs __regs) 667 struct pt_regs __regs)
@@ -853,11 +848,11 @@ void __init trap_init(void)
853 set_exception_table_evt(0x820, do_illegal_slot_inst); 848 set_exception_table_evt(0x820, do_illegal_slot_inst);
854#elif defined(CONFIG_SH_FPU) 849#elif defined(CONFIG_SH_FPU)
855#ifdef CONFIG_CPU_SUBTYPE_SHX3 850#ifdef CONFIG_CPU_SUBTYPE_SHX3
856 set_exception_table_evt(0xd80, do_fpu_state_restore); 851 set_exception_table_evt(0xd80, fpu_state_restore_trap_handler);
857 set_exception_table_evt(0xda0, do_fpu_state_restore); 852 set_exception_table_evt(0xda0, fpu_state_restore_trap_handler);
858#else 853#else
859 set_exception_table_evt(0x800, do_fpu_state_restore); 854 set_exception_table_evt(0x800, fpu_state_restore_trap_handler);
860 set_exception_table_evt(0x820, do_fpu_state_restore); 855 set_exception_table_evt(0x820, fpu_state_restore_trap_handler);
861#endif 856#endif
862#endif 857#endif
863 858
diff --git a/include/asm-sh/fpu.h b/include/asm-sh/fpu.h
index 33db698a6b4d..f8429880a270 100644
--- a/include/asm-sh/fpu.h
+++ b/include/asm-sh/fpu.h
@@ -26,6 +26,8 @@ extern void save_fpu(struct task_struct *__tsk, struct pt_regs *regs);
26#define save_fpu(tsk, regs) do { } while (0) 26#define save_fpu(tsk, regs) do { } while (0)
27#endif 27#endif
28 28
29extern int do_fpu_inst(unsigned short, struct pt_regs *);
30
29#define unlazy_fpu(tsk, regs) do { \ 31#define unlazy_fpu(tsk, regs) do { \
30 if (test_tsk_thread_flag(tsk, TIF_USEDFPU)) { \ 32 if (test_tsk_thread_flag(tsk, TIF_USEDFPU)) { \
31 save_fpu(tsk, regs); \ 33 save_fpu(tsk, regs); \
diff --git a/include/asm-sh/sigcontext.h b/include/asm-sh/sigcontext.h
index 8583143fa28f..8ce1435bc0bf 100644
--- a/include/asm-sh/sigcontext.h
+++ b/include/asm-sh/sigcontext.h
@@ -25,7 +25,8 @@ struct sigcontext {
25 unsigned long sc_mach; 25 unsigned long sc_mach;
26 unsigned long sc_macl; 26 unsigned long sc_macl;
27 27
28#if defined(__SH4__) || defined(CONFIG_CPU_SH4) 28#if defined(__SH4__) || defined(CONFIG_CPU_SH4) || \
29 defined(__SH2A__) || defined(CONFIG_CPU_SH2A)
29 /* FPU registers */ 30 /* FPU registers */
30 unsigned long sc_fpregs[16]; 31 unsigned long sc_fpregs[16];
31 unsigned long sc_xfpregs[16]; 32 unsigned long sc_xfpregs[16];
diff --git a/include/asm-sh/system.h b/include/asm-sh/system.h
index 8b01fc4a56af..ad3d2a636130 100644
--- a/include/asm-sh/system.h
+++ b/include/asm-sh/system.h
@@ -205,6 +205,8 @@ asmlinkage void name##_trap_handler(unsigned int vec, struct pt_regs *regs)
205BUILD_TRAP_HANDLER(address_error); 205BUILD_TRAP_HANDLER(address_error);
206BUILD_TRAP_HANDLER(debug); 206BUILD_TRAP_HANDLER(debug);
207BUILD_TRAP_HANDLER(bug); 207BUILD_TRAP_HANDLER(bug);
208BUILD_TRAP_HANDLER(fpu_error);
209BUILD_TRAP_HANDLER(fpu_state_restore);
208 210
209#define arch_align_stack(x) (x) 211#define arch_align_stack(x) (x)
210 212